LLVM 19.0.0git
LoopUnrollPass.cpp
Go to the documentation of this file.
1//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements a simple loop unroller. It works best when loops have
10// been canonicalized by the -indvars pass, allowing it to determine the trip
11// counts of loops easily.
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/StringRef.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
41#include "llvm/IR/Dominators.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Metadata.h"
47#include "llvm/IR/PassManager.h"
49#include "llvm/Pass.h"
52#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <limits>
67#include <optional>
68#include <string>
69#include <tuple>
70#include <utility>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "loop-unroll"
75
77 "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
78 cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
79 " the current top-most loop. This is sometimes preferred to reduce"
80 " compile time."));
81
83 UnrollThreshold("unroll-threshold", cl::Hidden,
84 cl::desc("The cost threshold for loop unrolling"));
85
88 "unroll-optsize-threshold", cl::init(0), cl::Hidden,
89 cl::desc("The cost threshold for loop unrolling when optimizing for "
90 "size"));
91
93 "unroll-partial-threshold", cl::Hidden,
94 cl::desc("The cost threshold for partial loop unrolling"));
95
97 "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
98 cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
99 "to the threshold when aggressively unrolling a loop due to the "
100 "dynamic cost savings. If completely unrolling a loop will reduce "
101 "the total runtime from X to Y, we boost the loop unroll "
102 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103 "X/Y). This limit avoids excessive code bloat."));
104
106 "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
107 cl::desc("Don't allow loop unrolling to simulate more than this number of"
108 "iterations when checking full unroll profitability"));
109
111 "unroll-count", cl::Hidden,
112 cl::desc("Use this unroll count for all loops including those with "
113 "unroll_count pragma values, for testing purposes"));
114
116 "unroll-max-count", cl::Hidden,
117 cl::desc("Set the max unroll count for partial and runtime unrolling, for"
118 "testing purposes"));
119
121 "unroll-full-max-count", cl::Hidden,
122 cl::desc(
123 "Set the max unroll count for full unrolling, for testing purposes"));
124
125static cl::opt<bool>
126 UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
127 cl::desc("Allows loops to be partially unrolled until "
128 "-unroll-threshold loop size is reached."));
129
131 "unroll-allow-remainder", cl::Hidden,
132 cl::desc("Allow generation of a loop remainder (extra iterations) "
133 "when unrolling a loop."));
134
135static cl::opt<bool>
136 UnrollRuntime("unroll-runtime", cl::Hidden,
137 cl::desc("Unroll loops with run-time trip counts"));
138
140 "unroll-max-upperbound", cl::init(8), cl::Hidden,
141 cl::desc(
142 "The max of trip count upper bound that is considered in unrolling"));
143
145 "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
146 cl::desc("Unrolled size limit for loops with an unroll(full) or "
147 "unroll_count pragma."));
148
150 "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
151 cl::desc("If the runtime tripcount for the loop is lower than the "
152 "threshold, the loop is considered as flat and will be less "
153 "aggressively unrolled."));
154
156 "unroll-remainder", cl::Hidden,
157 cl::desc("Allow the loop remainder to be unrolled."));
158
159// This option isn't ever intended to be enabled, it serves to allow
160// experiments to check the assumptions about when this kind of revisit is
161// necessary.
163 "unroll-revisit-child-loops", cl::Hidden,
164 cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
165 "This shouldn't typically be needed as child loops (or their "
166 "clones) were already visited."));
167
169 "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
170 cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
171 "optimizations"));
173 UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
175 cl::desc("Default threshold (max size of unrolled "
176 "loop), used in all but O3 optimizations"));
177
179 "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
180 cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
181
182/// A magic value for use with the Threshold parameter to indicate
183/// that the loop unroll should be performed regardless of how much
184/// code expansion would result.
185static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
186
187/// Gather the various unrolling parameters based on the defaults, compiler
188/// flags, TTI overrides and user specified parameters.
192 OptimizationRemarkEmitter &ORE, int OptLevel,
193 std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
194 std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
195 std::optional<bool> UserUpperBound,
196 std::optional<unsigned> UserFullUnrollMaxCount) {
198
199 // Set up the defaults
200 UP.Threshold =
204 UP.PartialThreshold = 150;
206 UP.Count = 0;
208 UP.MaxCount = std::numeric_limits<unsigned>::max();
210 UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
211 UP.BEInsns = 2;
212 UP.Partial = false;
213 UP.Runtime = false;
214 UP.AllowRemainder = true;
215 UP.UnrollRemainder = false;
216 UP.AllowExpensiveTripCount = false;
217 UP.Force = false;
218 UP.UpperBound = false;
219 UP.UnrollAndJam = false;
222
223 // Override with any target specific settings
224 TTI.getUnrollingPreferences(L, SE, UP, &ORE);
225
226 // Apply size attributes
227 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
228 // Let unroll hints / pragmas take precedence over PGSO.
230 llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
231 PGSOQueryType::IRPass));
232 if (OptForSize) {
236 }
237
238 // Apply any user values specified by cl::opt
239 if (UnrollThreshold.getNumOccurrences() > 0)
241 if (UnrollPartialThreshold.getNumOccurrences() > 0)
243 if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
245 if (UnrollMaxCount.getNumOccurrences() > 0)
247 if (UnrollMaxUpperBound.getNumOccurrences() > 0)
249 if (UnrollFullMaxCount.getNumOccurrences() > 0)
251 if (UnrollAllowPartial.getNumOccurrences() > 0)
253 if (UnrollAllowRemainder.getNumOccurrences() > 0)
255 if (UnrollRuntime.getNumOccurrences() > 0)
258 UP.UpperBound = false;
259 if (UnrollUnrollRemainder.getNumOccurrences() > 0)
261 if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
263
264 // Apply user values provided by argument
265 if (UserThreshold) {
266 UP.Threshold = *UserThreshold;
267 UP.PartialThreshold = *UserThreshold;
268 }
269 if (UserCount)
270 UP.Count = *UserCount;
271 if (UserAllowPartial)
272 UP.Partial = *UserAllowPartial;
273 if (UserRuntime)
274 UP.Runtime = *UserRuntime;
275 if (UserUpperBound)
276 UP.UpperBound = *UserUpperBound;
277 if (UserFullUnrollMaxCount)
278 UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
279
280 return UP;
281}
282
283namespace {
284
285/// A struct to densely store the state of an instruction after unrolling at
286/// each iteration.
287///
288/// This is designed to work like a tuple of <Instruction *, int> for the
289/// purposes of hashing and lookup, but to be able to associate two boolean
290/// states with each key.
291struct UnrolledInstState {
292 Instruction *I;
293 int Iteration : 30;
294 unsigned IsFree : 1;
295 unsigned IsCounted : 1;
296};
297
298/// Hashing and equality testing for a set of the instruction states.
299struct UnrolledInstStateKeyInfo {
300 using PtrInfo = DenseMapInfo<Instruction *>;
302
303 static inline UnrolledInstState getEmptyKey() {
304 return {PtrInfo::getEmptyKey(), 0, 0, 0};
305 }
306
307 static inline UnrolledInstState getTombstoneKey() {
308 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
309 }
310
311 static inline unsigned getHashValue(const UnrolledInstState &S) {
312 return PairInfo::getHashValue({S.I, S.Iteration});
313 }
314
315 static inline bool isEqual(const UnrolledInstState &LHS,
316 const UnrolledInstState &RHS) {
317 return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
318 }
319};
320
321struct EstimatedUnrollCost {
322 /// The estimated cost after unrolling.
323 unsigned UnrolledCost;
324
325 /// The estimated dynamic cost of executing the instructions in the
326 /// rolled form.
327 unsigned RolledDynamicCost;
328};
329
330struct PragmaInfo {
331 PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
332 : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
333 PragmaEnableUnroll(PEU) {}
334 const bool UserUnrollCount;
335 const bool PragmaFullUnroll;
336 const unsigned PragmaCount;
337 const bool PragmaEnableUnroll;
338};
339
340} // end anonymous namespace
341
342/// Figure out if the loop is worth full unrolling.
343///
344/// Complete loop unrolling can make some loads constant, and we need to know
345/// if that would expose any further optimization opportunities. This routine
346/// estimates this optimization. It computes cost of unrolled loop
347/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
348/// dynamic cost we mean that we won't count costs of blocks that are known not
349/// to be executed (i.e. if we have a branch in the loop and we know that at the
350/// given iteration its condition would be resolved to true, we won't add up the
351/// cost of the 'false'-block).
352/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
353/// the analysis failed (no benefits expected from the unrolling, or the loop is
354/// too big to analyze), the returned value is std::nullopt.
355static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
356 const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
357 const SmallPtrSetImpl<const Value *> &EphValues,
358 const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
359 unsigned MaxIterationsCountToAnalyze) {
360 // We want to be able to scale offsets by the trip count and add more offsets
361 // to them without checking for overflows, and we already don't want to
362 // analyze *massive* trip counts, so we force the max to be reasonably small.
363 assert(MaxIterationsCountToAnalyze <
364 (unsigned)(std::numeric_limits<int>::max() / 2) &&
365 "The unroll iterations max is too large!");
366
367 // Only analyze inner loops. We can't properly estimate cost of nested loops
368 // and we won't visit inner loops again anyway.
369 if (!L->isInnermost())
370 return std::nullopt;
371
372 // Don't simulate loops with a big or unknown tripcount
373 if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
374 return std::nullopt;
375
378 DenseMap<Value *, Value *> SimplifiedValues;
379 SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
380
381 // The estimated cost of the unrolled form of the loop. We try to estimate
382 // this by simplifying as much as we can while computing the estimate.
383 InstructionCost UnrolledCost = 0;
384
385 // We also track the estimated dynamic (that is, actually executed) cost in
386 // the rolled form. This helps identify cases when the savings from unrolling
387 // aren't just exposing dead control flows, but actual reduced dynamic
388 // instructions due to the simplifications which we expect to occur after
389 // unrolling.
390 InstructionCost RolledDynamicCost = 0;
391
392 // We track the simplification of each instruction in each iteration. We use
393 // this to recursively merge costs into the unrolled cost on-demand so that
394 // we don't count the cost of any dead code. This is essentially a map from
395 // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
397
398 // A small worklist used to accumulate cost of instructions from each
399 // observable and reached root in the loop.
401
402 // PHI-used worklist used between iterations while accumulating cost.
404
405 // Helper function to accumulate cost for instructions in the loop.
406 auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
407 assert(Iteration >= 0 && "Cannot have a negative iteration!");
408 assert(CostWorklist.empty() && "Must start with an empty cost list");
409 assert(PHIUsedList.empty() && "Must start with an empty phi used list");
410 CostWorklist.push_back(&RootI);
412 RootI.getFunction()->hasMinSize() ?
415 for (;; --Iteration) {
416 do {
417 Instruction *I = CostWorklist.pop_back_val();
418
419 // InstCostMap only uses I and Iteration as a key, the other two values
420 // don't matter here.
421 auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
422 if (CostIter == InstCostMap.end())
423 // If an input to a PHI node comes from a dead path through the loop
424 // we may have no cost data for it here. What that actually means is
425 // that it is free.
426 continue;
427 auto &Cost = *CostIter;
428 if (Cost.IsCounted)
429 // Already counted this instruction.
430 continue;
431
432 // Mark that we are counting the cost of this instruction now.
433 Cost.IsCounted = true;
434
435 // If this is a PHI node in the loop header, just add it to the PHI set.
436 if (auto *PhiI = dyn_cast<PHINode>(I))
437 if (PhiI->getParent() == L->getHeader()) {
438 assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
439 "inherently simplify during unrolling.");
440 if (Iteration == 0)
441 continue;
442
443 // Push the incoming value from the backedge into the PHI used list
444 // if it is an in-loop instruction. We'll use this to populate the
445 // cost worklist for the next iteration (as we count backwards).
446 if (auto *OpI = dyn_cast<Instruction>(
447 PhiI->getIncomingValueForBlock(L->getLoopLatch())))
448 if (L->contains(OpI))
449 PHIUsedList.push_back(OpI);
450 continue;
451 }
452
453 // First accumulate the cost of this instruction.
454 if (!Cost.IsFree) {
455 // Consider simplified operands in instruction cost.
457 transform(I->operands(), std::back_inserter(Operands),
458 [&](Value *Op) {
459 if (auto Res = SimplifiedValues.lookup(Op))
460 return Res;
461 return Op;
462 });
463 UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
464 LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
465 << Iteration << "): ");
466 LLVM_DEBUG(I->dump());
467 }
468
469 // We must count the cost of every operand which is not free,
470 // recursively. If we reach a loop PHI node, simply add it to the set
471 // to be considered on the next iteration (backwards!).
472 for (Value *Op : I->operands()) {
473 // Check whether this operand is free due to being a constant or
474 // outside the loop.
475 auto *OpI = dyn_cast<Instruction>(Op);
476 if (!OpI || !L->contains(OpI))
477 continue;
478
479 // Otherwise accumulate its cost.
480 CostWorklist.push_back(OpI);
481 }
482 } while (!CostWorklist.empty());
483
484 if (PHIUsedList.empty())
485 // We've exhausted the search.
486 break;
487
488 assert(Iteration > 0 &&
489 "Cannot track PHI-used values past the first iteration!");
490 CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
491 PHIUsedList.clear();
492 }
493 };
494
495 // Ensure that we don't violate the loop structure invariants relied on by
496 // this analysis.
497 assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
498 assert(L->isLCSSAForm(DT) &&
499 "Must have loops in LCSSA form to track live-out values.");
500
501 LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
502
504 L->getHeader()->getParent()->hasMinSize() ?
506 // Simulate execution of each iteration of the loop counting instructions,
507 // which would be simplified.
508 // Since the same load will take different values on different iterations,
509 // we literally have to go through all loop's iterations.
510 for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
511 LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
512
513 // Prepare for the iteration by collecting any simplified entry or backedge
514 // inputs.
515 for (Instruction &I : *L->getHeader()) {
516 auto *PHI = dyn_cast<PHINode>(&I);
517 if (!PHI)
518 break;
519
520 // The loop header PHI nodes must have exactly two input: one from the
521 // loop preheader and one from the loop latch.
522 assert(
523 PHI->getNumIncomingValues() == 2 &&
524 "Must have an incoming value only for the preheader and the latch.");
525
526 Value *V = PHI->getIncomingValueForBlock(
527 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
528 if (Iteration != 0 && SimplifiedValues.count(V))
529 V = SimplifiedValues.lookup(V);
530 SimplifiedInputValues.push_back({PHI, V});
531 }
532
533 // Now clear and re-populate the map for the next iteration.
534 SimplifiedValues.clear();
535 while (!SimplifiedInputValues.empty())
536 SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
537
538 UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
539
540 BBWorklist.clear();
541 BBWorklist.insert(L->getHeader());
542 // Note that we *must not* cache the size, this loop grows the worklist.
543 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
544 BasicBlock *BB = BBWorklist[Idx];
545
546 // Visit all instructions in the given basic block and try to simplify
547 // it. We don't change the actual IR, just count optimization
548 // opportunities.
549 for (Instruction &I : *BB) {
550 // These won't get into the final code - don't even try calculating the
551 // cost for them.
552 if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
553 continue;
554
555 // Track this instruction's expected baseline cost when executing the
556 // rolled loop form.
557 RolledDynamicCost += TTI.getInstructionCost(&I, CostKind);
558
559 // Visit the instruction to analyze its loop cost after unrolling,
560 // and if the visitor returns true, mark the instruction as free after
561 // unrolling and continue.
562 bool IsFree = Analyzer.visit(I);
563 bool Inserted = InstCostMap.insert({&I, (int)Iteration,
564 (unsigned)IsFree,
565 /*IsCounted*/ false}).second;
566 (void)Inserted;
567 assert(Inserted && "Cannot have a state for an unvisited instruction!");
568
569 if (IsFree)
570 continue;
571
572 // Can't properly model a cost of a call.
573 // FIXME: With a proper cost model we should be able to do it.
574 if (auto *CI = dyn_cast<CallInst>(&I)) {
575 const Function *Callee = CI->getCalledFunction();
576 if (!Callee || TTI.isLoweredToCall(Callee)) {
577 LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
578 return std::nullopt;
579 }
580 }
581
582 // If the instruction might have a side-effect recursively account for
583 // the cost of it and all the instructions leading up to it.
584 if (I.mayHaveSideEffects())
585 AddCostRecursively(I, Iteration);
586
587 // If unrolled body turns out to be too big, bail out.
588 if (UnrolledCost > MaxUnrolledLoopSize) {
589 LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
590 << " UnrolledCost: " << UnrolledCost
591 << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
592 << "\n");
593 return std::nullopt;
594 }
595 }
596
597 Instruction *TI = BB->getTerminator();
598
599 auto getSimplifiedConstant = [&](Value *V) -> Constant * {
600 if (SimplifiedValues.count(V))
601 V = SimplifiedValues.lookup(V);
602 return dyn_cast<Constant>(V);
603 };
604
605 // Add in the live successors by first checking whether we have terminator
606 // that may be simplified based on the values simplified by this call.
607 BasicBlock *KnownSucc = nullptr;
608 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
609 if (BI->isConditional()) {
610 if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
611 // Just take the first successor if condition is undef
612 if (isa<UndefValue>(SimpleCond))
613 KnownSucc = BI->getSuccessor(0);
614 else if (ConstantInt *SimpleCondVal =
615 dyn_cast<ConstantInt>(SimpleCond))
616 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
617 }
618 }
619 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
620 if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
621 // Just take the first successor if condition is undef
622 if (isa<UndefValue>(SimpleCond))
623 KnownSucc = SI->getSuccessor(0);
624 else if (ConstantInt *SimpleCondVal =
625 dyn_cast<ConstantInt>(SimpleCond))
626 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
627 }
628 }
629 if (KnownSucc) {
630 if (L->contains(KnownSucc))
631 BBWorklist.insert(KnownSucc);
632 else
633 ExitWorklist.insert({BB, KnownSucc});
634 continue;
635 }
636
637 // Add BB's successors to the worklist.
638 for (BasicBlock *Succ : successors(BB))
639 if (L->contains(Succ))
640 BBWorklist.insert(Succ);
641 else
642 ExitWorklist.insert({BB, Succ});
643 AddCostRecursively(*TI, Iteration);
644 }
645
646 // If we found no optimization opportunities on the first iteration, we
647 // won't find them on later ones too.
648 if (UnrolledCost == RolledDynamicCost) {
649 LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
650 << " UnrolledCost: " << UnrolledCost << "\n");
651 return std::nullopt;
652 }
653 }
654
655 while (!ExitWorklist.empty()) {
656 BasicBlock *ExitingBB, *ExitBB;
657 std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
658
659 for (Instruction &I : *ExitBB) {
660 auto *PN = dyn_cast<PHINode>(&I);
661 if (!PN)
662 break;
663
664 Value *Op = PN->getIncomingValueForBlock(ExitingBB);
665 if (auto *OpI = dyn_cast<Instruction>(Op))
666 if (L->contains(OpI))
667 AddCostRecursively(*OpI, TripCount - 1);
668 }
669 }
670
671 assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
672 "All instructions must have a valid cost, whether the "
673 "loop is rolled or unrolled.");
674
675 LLVM_DEBUG(dbgs() << "Analysis finished:\n"
676 << "UnrolledCost: " << UnrolledCost << ", "
677 << "RolledDynamicCost: " << RolledDynamicCost << "\n");
678 return {{unsigned(*UnrolledCost.getValue()),
679 unsigned(*RolledDynamicCost.getValue())}};
680}
681
683 const Loop *L, const TargetTransformInfo &TTI,
684 const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
686 for (BasicBlock *BB : L->blocks())
687 Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
688 L);
689 NumInlineCandidates = Metrics.NumInlineCandidates;
690 NotDuplicatable = Metrics.notDuplicatable;
691 Convergence = Metrics.Convergence;
692 LoopSize = Metrics.NumInsts;
694 Metrics.Convergence != ConvergenceKind::Uncontrolled &&
696
697 // Don't allow an estimate of size zero. This would allows unrolling of loops
698 // with huge iteration counts, which is a compile time problem even if it's
699 // not a problem for code quality. Also, the code using this size may assume
700 // that each loop has at least three instructions (likely a conditional
701 // branch, a comparison feeding that branch, and some kind of loop increment
702 // feeding that comparison instruction).
703 if (LoopSize.isValid() && LoopSize < BEInsns + 1)
704 // This is an open coded max() on InstructionCost
705 LoopSize = BEInsns + 1;
706}
707
709 switch (Convergence) {
711 LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
712 return false;
713 default:
714 break;
715 }
716 if (!LoopSize.isValid()) {
717 LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
718 return false;
719 }
720 if (NotDuplicatable) {
721 LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
722 return false;
723 }
724 return true;
725}
726
729 unsigned CountOverwrite) const {
730 unsigned LS = *LoopSize.getValue();
731 assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
732 if (CountOverwrite)
733 return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
734 else
735 return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
736}
737
738// Returns the loop hint metadata node with the given name (for example,
739// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
740// returned.
742 if (MDNode *LoopID = L->getLoopID())
743 return GetUnrollMetadata(LoopID, Name);
744 return nullptr;
745}
746
747// Returns true if the loop has an unroll(full) pragma.
748static bool hasUnrollFullPragma(const Loop *L) {
749 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
750}
751
752// Returns true if the loop has an unroll(enable) pragma. This metadata is used
753// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
754static bool hasUnrollEnablePragma(const Loop *L) {
755 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
756}
757
758// Returns true if the loop has an runtime unroll(disable) pragma.
759static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
760 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
761}
762
763// If loop has an unroll_count pragma return the (necessarily
764// positive) value from the pragma. Otherwise return 0.
765static unsigned unrollCountPragmaValue(const Loop *L) {
766 MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
767 if (MD) {
768 assert(MD->getNumOperands() == 2 &&
769 "Unroll count hint metadata should have two operands.");
770 unsigned Count =
771 mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
772 assert(Count >= 1 && "Unroll count must be positive.");
773 return Count;
774 }
775 return 0;
776}
777
778// Computes the boosting factor for complete unrolling.
779// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
780// be beneficial to fully unroll the loop even if unrolledcost is large. We
781// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
782// the unroll threshold.
783static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
784 unsigned MaxPercentThresholdBoost) {
785 if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
786 return 100;
787 else if (Cost.UnrolledCost != 0)
788 // The boosting factor is RolledDynamicCost / UnrolledCost
789 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
790 MaxPercentThresholdBoost);
791 else
792 return MaxPercentThresholdBoost;
793}
794
795static std::optional<unsigned>
796shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
797 const unsigned TripMultiple, const unsigned TripCount,
798 unsigned MaxTripCount, const UnrollCostEstimator UCE,
800
801 // Using unroll pragma
802 // 1st priority is unroll count set by "unroll-count" option.
803
804 if (PInfo.UserUnrollCount) {
805 if (UP.AllowRemainder &&
806 UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
807 return (unsigned)UnrollCount;
808 }
809
810 // 2nd priority is unroll count set by pragma.
811 if (PInfo.PragmaCount > 0) {
812 if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
813 return PInfo.PragmaCount;
814 }
815
816 if (PInfo.PragmaFullUnroll && TripCount != 0) {
817 // Certain cases with UBSAN can cause trip count to be calculated as
818 // INT_MAX, Block full unrolling at a reasonable limit so that the compiler
819 // doesn't hang trying to unroll the loop. See PR77842
820 if (TripCount > PragmaUnrollFullMaxIterations) {
821 LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
822 return std::nullopt;
823 }
824
825 return TripCount;
826 }
827
828 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
829 MaxTripCount <= UP.MaxUpperBound)
830 return MaxTripCount;
831
832 // if didn't return until here, should continue to other priorties
833 return std::nullopt;
834}
835
836static std::optional<unsigned> shouldFullUnroll(
839 const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
841 assert(FullUnrollTripCount && "should be non-zero!");
842
843 if (FullUnrollTripCount > UP.FullUnrollMaxCount)
844 return std::nullopt;
845
846 // When computing the unrolled size, note that BEInsns are not replicated
847 // like the rest of the loop body.
848 if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
849 return FullUnrollTripCount;
850
851 // The loop isn't that small, but we still can fully unroll it if that
852 // helps to remove a significant number of instructions.
853 // To check that, run additional analysis on the loop.
854 if (std::optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
855 L, FullUnrollTripCount, DT, SE, EphValues, TTI,
858 unsigned Boost =
860 if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
861 return FullUnrollTripCount;
862 }
863 return std::nullopt;
864}
865
866static std::optional<unsigned>
867shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
868 const UnrollCostEstimator UCE,
870
871 if (!TripCount)
872 return std::nullopt;
873
874 if (!UP.Partial) {
875 LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
876 << "-unroll-allow-partial not given\n");
877 return 0;
878 }
879 unsigned count = UP.Count;
880 if (count == 0)
881 count = TripCount;
882 if (UP.PartialThreshold != NoThreshold) {
883 // Reduce unroll count to be modulo of TripCount for partial unrolling.
885 count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
886 (LoopSize - UP.BEInsns);
887 if (count > UP.MaxCount)
888 count = UP.MaxCount;
889 while (count != 0 && TripCount % count != 0)
890 count--;
891 if (UP.AllowRemainder && count <= 1) {
892 // If there is no Count that is modulo of TripCount, set Count to
893 // largest power-of-two factor that satisfies the threshold limit.
894 // As we'll create fixup loop, do the type of unrolling only if
895 // remainder loop is allowed.
897 while (count != 0 &&
899 count >>= 1;
900 }
901 if (count < 2) {
902 count = 0;
903 }
904 } else {
905 count = TripCount;
906 }
907 if (count > UP.MaxCount)
908 count = UP.MaxCount;
909
910 LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
911
912 return count;
913}
914// Returns true if unroll count was set explicitly.
915// Calculates unroll count and writes it to UP.Count.
916// Unless IgnoreUser is true, will also use metadata and command-line options
917// that are specific to to the LoopUnroll pass (which, for instance, are
918// irrelevant for the LoopUnrollAndJam pass).
919// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
920// many LoopUnroll-specific options. The shared functionality should be
921// refactored into it own function.
925 const SmallPtrSetImpl<const Value *> &EphValues,
926 OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
927 bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
929 TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
930
931 unsigned LoopSize = UCE.getRolledLoopSize();
932
933 const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
934 const bool PragmaFullUnroll = hasUnrollFullPragma(L);
935 const unsigned PragmaCount = unrollCountPragmaValue(L);
936 const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
937
938 const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
939 PragmaEnableUnroll || UserUnrollCount;
940
941 PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
942 PragmaEnableUnroll);
943 // Use an explicit peel count that has been specified for testing. In this
944 // case it's not permitted to also specify an explicit unroll count.
945 if (PP.PeelCount) {
946 if (UnrollCount.getNumOccurrences() > 0) {
947 report_fatal_error("Cannot specify both explicit peel count and "
948 "explicit unroll count", /*GenCrashDiag=*/false);
949 }
950 UP.Count = 1;
951 UP.Runtime = false;
952 return true;
953 }
954 // Check for explicit Count.
955 // 1st priority is unroll count set by "unroll-count" option.
956 // 2nd priority is unroll count set by pragma.
957 if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
958 MaxTripCount, UCE, UP)) {
959 UP.Count = *UnrollFactor;
960
961 if (UserUnrollCount || (PragmaCount > 0)) {
962 UP.AllowExpensiveTripCount = true;
963 UP.Force = true;
964 }
965 UP.Runtime |= (PragmaCount > 0);
966 return ExplicitUnroll;
967 } else {
968 if (ExplicitUnroll && TripCount != 0) {
969 // If the loop has an unrolling pragma, we want to be more aggressive with
970 // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
971 // value which is larger than the default limits.
972 UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
974 std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
975 }
976 }
977
978 // 3rd priority is exact full unrolling. This will eliminate all copies
979 // of some exit test.
980 UP.Count = 0;
981 if (TripCount) {
982 UP.Count = TripCount;
983 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
984 TripCount, UCE, UP)) {
985 UP.Count = *UnrollFactor;
986 UseUpperBound = false;
987 return ExplicitUnroll;
988 }
989 }
990
991 // 4th priority is bounded unrolling.
992 // We can unroll by the upper bound amount if it's generally allowed or if
993 // we know that the loop is executed either the upper bound or zero times.
994 // (MaxOrZero unrolling keeps only the first loop test, so the number of
995 // loop tests remains the same compared to the non-unrolled version, whereas
996 // the generic upper bound unrolling keeps all but the last loop test so the
997 // number of loop tests goes up which may end up being worse on targets with
998 // constrained branch predictor resources so is controlled by an option.)
999 // In addition we only unroll small upper bounds.
1000 // Note that the cost of bounded unrolling is always strictly greater than
1001 // cost of exact full unrolling. As such, if we have an exact count and
1002 // found it unprofitable, we'll never chose to bounded unroll.
1003 if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
1004 MaxTripCount <= UP.MaxUpperBound) {
1005 UP.Count = MaxTripCount;
1006 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
1007 MaxTripCount, UCE, UP)) {
1008 UP.Count = *UnrollFactor;
1009 UseUpperBound = true;
1010 return ExplicitUnroll;
1011 }
1012 }
1013
1014 // 5th priority is loop peeling.
1015 computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
1016 if (PP.PeelCount) {
1017 UP.Runtime = false;
1018 UP.Count = 1;
1019 return ExplicitUnroll;
1020 }
1021
1022 // Before starting partial unrolling, set up.partial to true,
1023 // if user explicitly asked for unrolling
1024 if (TripCount)
1025 UP.Partial |= ExplicitUnroll;
1026
1027 // 6th priority is partial unrolling.
1028 // Try partial unroll only when TripCount could be statically calculated.
1029 if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
1030 UP.Count = *UnrollFactor;
1031
1032 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1033 UP.Count != TripCount)
1034 ORE->emit([&]() {
1036 "FullUnrollAsDirectedTooLarge",
1037 L->getStartLoc(), L->getHeader())
1038 << "Unable to fully unroll loop as directed by unroll pragma "
1039 "because "
1040 "unrolled size is too large.";
1041 });
1042
1043 if (UP.PartialThreshold != NoThreshold) {
1044 if (UP.Count == 0) {
1045 if (PragmaEnableUnroll)
1046 ORE->emit([&]() {
1048 "UnrollAsDirectedTooLarge",
1049 L->getStartLoc(), L->getHeader())
1050 << "Unable to unroll loop as directed by unroll(enable) "
1051 "pragma "
1052 "because unrolled size is too large.";
1053 });
1054 }
1055 }
1056 return ExplicitUnroll;
1057 }
1058 assert(TripCount == 0 &&
1059 "All cases when TripCount is constant should be covered here.");
1060 if (PragmaFullUnroll)
1061 ORE->emit([&]() {
1063 DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
1064 L->getStartLoc(), L->getHeader())
1065 << "Unable to fully unroll loop as directed by unroll(full) "
1066 "pragma "
1067 "because loop has a runtime trip count.";
1068 });
1069
1070 // 7th priority is runtime unrolling.
1071 // Don't unroll a runtime trip count loop when it is disabled.
1073 UP.Count = 0;
1074 return false;
1075 }
1076
1077 // Don't unroll a small upper bound loop unless user or TTI asked to do so.
1078 if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
1079 UP.Count = 0;
1080 return false;
1081 }
1082
1083 // Check if the runtime trip count is too small when profile is available.
1084 if (L->getHeader()->getParent()->hasProfileData()) {
1085 if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1086 if (*ProfileTripCount < FlatLoopTripCountThreshold)
1087 return false;
1088 else
1089 UP.AllowExpensiveTripCount = true;
1090 }
1091 }
1092 UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1093 if (!UP.Runtime) {
1094 LLVM_DEBUG(
1095 dbgs() << " will not try to unroll loop with runtime trip count "
1096 << "-unroll-runtime not given\n");
1097 UP.Count = 0;
1098 return false;
1099 }
1100 if (UP.Count == 0)
1102
1103 // Reduce unroll count to be the largest power-of-two factor of
1104 // the original count which satisfies the threshold limit.
1105 while (UP.Count != 0 &&
1107 UP.Count >>= 1;
1108
1109#ifndef NDEBUG
1110 unsigned OrigCount = UP.Count;
1111#endif
1112
1113 if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1114 while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1115 UP.Count >>= 1;
1116 LLVM_DEBUG(
1117 dbgs() << "Remainder loop is restricted (that could architecture "
1118 "specific or because the loop contains a convergent "
1119 "instruction), so unroll count must divide the trip "
1120 "multiple, "
1121 << TripMultiple << ". Reducing unroll count from " << OrigCount
1122 << " to " << UP.Count << ".\n");
1123
1124 using namespace ore;
1125
1126 if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
1127 ORE->emit([&]() {
1129 "DifferentUnrollCountFromDirected",
1130 L->getStartLoc(), L->getHeader())
1131 << "Unable to unroll loop the number of times directed by "
1132 "unroll_count pragma because remainder loop is restricted "
1133 "(that could architecture specific or because the loop "
1134 "contains a convergent instruction) and so must have an "
1135 "unroll "
1136 "count that divides the loop trip multiple of "
1137 << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1138 << NV("UnrollCount", UP.Count) << " time(s).";
1139 });
1140 }
1141
1142 if (UP.Count > UP.MaxCount)
1143 UP.Count = UP.MaxCount;
1144
1145 if (MaxTripCount && UP.Count > MaxTripCount)
1146 UP.Count = MaxTripCount;
1147
1148 LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
1149 << "\n");
1150 if (UP.Count < 2)
1151 UP.Count = 0;
1152 return ExplicitUnroll;
1153}
1154
1155static LoopUnrollResult
1159 ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1160 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
1161 std::optional<unsigned> ProvidedCount,
1162 std::optional<unsigned> ProvidedThreshold,
1163 std::optional<bool> ProvidedAllowPartial,
1164 std::optional<bool> ProvidedRuntime,
1165 std::optional<bool> ProvidedUpperBound,
1166 std::optional<bool> ProvidedAllowPeeling,
1167 std::optional<bool> ProvidedAllowProfileBasedPeeling,
1168 std::optional<unsigned> ProvidedFullUnrollMaxCount,
1169 AAResults *AA = nullptr) {
1170
1171 LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1172 << L->getHeader()->getParent()->getName() << "] Loop %"
1173 << L->getHeader()->getName() << "\n");
1175 if (TM & TM_Disable)
1177
1178 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1179 // parent loop has an explicit unroll-and-jam pragma. This is to prevent
1180 // automatic unrolling from interfering with the user requested
1181 // transformation.
1182 Loop *ParentL = L->getParentLoop();
1183 if (ParentL != nullptr &&
1186 LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
1187 << " llvm.loop.unroll_and_jam.\n");
1189 }
1190
1191 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1192 // loop has an explicit unroll-and-jam pragma. This is to prevent automatic
1193 // unrolling from interfering with the user requested transformation.
1196 LLVM_DEBUG(
1197 dbgs()
1198 << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1200 }
1201
1202 if (!L->isLoopSimplifyForm()) {
1203 LLVM_DEBUG(
1204 dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1206 }
1207
1208 // When automatic unrolling is disabled, do not unroll unless overridden for
1209 // this loop.
1210 if (OnlyWhenForced && !(TM & TM_Enable))
1212
1213 bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1215 L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1216 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1217 ProvidedFullUnrollMaxCount);
1219 L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1220
1221 // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1222 // as threshold later on.
1223 if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1224 !OptForSize)
1226
1228 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1229
1230 UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
1231 if (!UCE.canUnroll()) {
1232 LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");
1234 }
1235
1236 unsigned LoopSize = UCE.getRolledLoopSize();
1237 LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1238
1239 // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1240 // later), to (fully) unroll loops, if it does not increase code size.
1241 if (OptForSize)
1242 UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1243
1244 if (UCE.NumInlineCandidates != 0) {
1245 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1247 }
1248
1249 // Find the smallest exact trip count for any exit. This is an upper bound
1250 // on the loop trip count, but an exit at an earlier iteration is still
1251 // possible. An unroll by the smallest exact trip count guarantees that all
1252 // branches relating to at least one exit can be eliminated. This is unlike
1253 // the max trip count, which only guarantees that the backedge can be broken.
1254 unsigned TripCount = 0;
1255 unsigned TripMultiple = 1;
1256 SmallVector<BasicBlock *, 8> ExitingBlocks;
1257 L->getExitingBlocks(ExitingBlocks);
1258 for (BasicBlock *ExitingBlock : ExitingBlocks)
1259 if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1260 if (!TripCount || TC < TripCount)
1261 TripCount = TripMultiple = TC;
1262
1263 if (!TripCount) {
1264 // If no exact trip count is known, determine the trip multiple of either
1265 // the loop latch or the single exiting block.
1266 // TODO: Relax for multiple exits.
1267 BasicBlock *ExitingBlock = L->getLoopLatch();
1268 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1269 ExitingBlock = L->getExitingBlock();
1270 if (ExitingBlock)
1271 TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1272 }
1273
1274 // If the loop contains a convergent operation, the prelude we'd add
1275 // to do the first few instructions before we hit the unrolled loop
1276 // is unsafe -- it adds a control-flow dependency to the convergent
1277 // operation. Therefore restrict remainder loop (try unrolling without).
1278 //
1279 // TODO: This is somewhat conservative; we could allow the remainder if the
1280 // trip count is uniform.
1282
1283 // Try to find the trip count upper bound if we cannot find the exact trip
1284 // count.
1285 unsigned MaxTripCount = 0;
1286 bool MaxOrZero = false;
1287 if (!TripCount) {
1288 MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1289 MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1290 }
1291
1292 // computeUnrollCount() decides whether it is beneficial to use upper bound to
1293 // fully unroll the loop.
1294 bool UseUpperBound = false;
1295 bool IsCountSetExplicitly = computeUnrollCount(
1296 L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
1297 MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
1298 if (!UP.Count)
1300
1302
1303 if (PP.PeelCount) {
1304 assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1305 LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1306 << " with iteration count " << PP.PeelCount << "!\n");
1307 ORE.emit([&]() {
1308 return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1309 L->getHeader())
1310 << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1311 << " iterations";
1312 });
1313
1314 ValueToValueMapTy VMap;
1315 if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {
1316 simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, nullptr);
1317 // If the loop was peeled, we already "used up" the profile information
1318 // we had, so we don't want to unroll or peel again.
1320 L->setLoopAlreadyUnrolled();
1322 }
1324 }
1325
1326 // Do not attempt partial/runtime unrolling in FullLoopUnrolling
1327 if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {
1328 LLVM_DEBUG(
1329 dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1331 }
1332
1333 // At this point, UP.Runtime indicates that run-time unrolling is allowed.
1334 // However, we only want to actually perform it if we don't know the trip
1335 // count and the unroll count doesn't divide the known trip multiple.
1336 // TODO: This decision should probably be pushed up into
1337 // computeUnrollCount().
1338 UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1339
1340 // Save loop properties before it is transformed.
1341 MDNode *OrigLoopID = L->getLoopID();
1342
1343 // Unroll the loop.
1344 Loop *RemainderLoop = nullptr;
1346 ULO.Count = UP.Count;
1347 ULO.Force = UP.Force;
1350 ULO.Runtime = UP.Runtime;
1351 ULO.ForgetAllSCEV = ForgetAllSCEV;
1353 LoopUnrollResult UnrollResult = UnrollLoop(
1354 L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
1355 if (UnrollResult == LoopUnrollResult::Unmodified)
1357
1358 if (RemainderLoop) {
1359 std::optional<MDNode *> RemainderLoopID =
1362 if (RemainderLoopID)
1363 RemainderLoop->setLoopID(*RemainderLoopID);
1364 }
1365
1366 if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1367 std::optional<MDNode *> NewLoopID =
1370 if (NewLoopID) {
1371 L->setLoopID(*NewLoopID);
1372
1373 // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1374 // explicitly.
1375 return UnrollResult;
1376 }
1377 }
1378
1379 // If loop has an unroll count pragma or unrolled by explicitly set count
1380 // mark loop as unrolled to prevent unrolling beyond that requested.
1381 if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1382 L->setLoopAlreadyUnrolled();
1383
1384 return UnrollResult;
1385}
1386
1387namespace {
1388
1389class LoopUnroll : public LoopPass {
1390public:
1391 static char ID; // Pass ID, replacement for typeid
1392
1393 int OptLevel;
1394
1395 /// If false, use a cost model to determine whether unrolling of a loop is
1396 /// profitable. If true, only loops that explicitly request unrolling via
1397 /// metadata are considered. All other loops are skipped.
1398 bool OnlyWhenForced;
1399
1400 /// If false, when SCEV is invalidated, only forget everything in the
1401 /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1402 /// Otherwise, forgetAllLoops and rebuild when needed next.
1403 bool ForgetAllSCEV;
1404
1405 std::optional<unsigned> ProvidedCount;
1406 std::optional<unsigned> ProvidedThreshold;
1407 std::optional<bool> ProvidedAllowPartial;
1408 std::optional<bool> ProvidedRuntime;
1409 std::optional<bool> ProvidedUpperBound;
1410 std::optional<bool> ProvidedAllowPeeling;
1411 std::optional<bool> ProvidedAllowProfileBasedPeeling;
1412 std::optional<unsigned> ProvidedFullUnrollMaxCount;
1413
1414 LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1415 bool ForgetAllSCEV = false,
1416 std::optional<unsigned> Threshold = std::nullopt,
1417 std::optional<unsigned> Count = std::nullopt,
1418 std::optional<bool> AllowPartial = std::nullopt,
1419 std::optional<bool> Runtime = std::nullopt,
1420 std::optional<bool> UpperBound = std::nullopt,
1421 std::optional<bool> AllowPeeling = std::nullopt,
1422 std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1423 std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
1424 : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1425 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1426 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1427 ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1428 ProvidedAllowPeeling(AllowPeeling),
1429 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1430 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1432 }
1433
1434 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1435 if (skipLoop(L))
1436 return false;
1437
1438 Function &F = *L->getHeader()->getParent();
1439
1440 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1441 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1442 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1443 const TargetTransformInfo &TTI =
1444 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1445 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1446 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1447 // pass. Function analyses need to be preserved across loop transformations
1448 // but ORE cannot be preserved (see comment before the pass definition).
1450 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1451
1453 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1454 /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1455 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1456 ProvidedUpperBound, ProvidedAllowPeeling,
1457 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
1458
1459 if (Result == LoopUnrollResult::FullyUnrolled)
1460 LPM.markLoopAsDeleted(*L);
1461
1462 return Result != LoopUnrollResult::Unmodified;
1463 }
1464
1465 /// This transformation requires natural loop information & requires that
1466 /// loop preheaders be inserted into the CFG...
1467 void getAnalysisUsage(AnalysisUsage &AU) const override {
1470 // FIXME: Loop passes are required to preserve domtree, and for now we just
1471 // recreate dom info if anything gets unrolled.
1473 }
1474};
1475
1476} // end anonymous namespace
1477
1478char LoopUnroll::ID = 0;
1479
1480INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1484INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1485
1486Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1487 bool ForgetAllSCEV, int Threshold, int Count,
1488 int AllowPartial, int Runtime, int UpperBound,
1489 int AllowPeeling) {
1490 // TODO: It would make more sense for this function to take the optionals
1491 // directly, but that's dangerous since it would silently break out of tree
1492 // callers.
1493 return new LoopUnroll(
1494 OptLevel, OnlyWhenForced, ForgetAllSCEV,
1495 Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold),
1496 Count == -1 ? std::nullopt : std::optional<unsigned>(Count),
1497 AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial),
1498 Runtime == -1 ? std::nullopt : std::optional<bool>(Runtime),
1499 UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound),
1500 AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
1501}
1502
1505 LPMUpdater &Updater) {
1506 // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1507 // pass. Function analyses need to be preserved across loop transformations
1508 // but ORE cannot be preserved (see comment before the pass definition).
1509 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
1510
1511 // Keep track of the previous loop structure so we can identify new loops
1512 // created by unrolling.
1513 Loop *ParentL = L.getParentLoop();
1514 SmallPtrSet<Loop *, 4> OldLoops;
1515 if (ParentL)
1516 OldLoops.insert(ParentL->begin(), ParentL->end());
1517 else
1518 OldLoops.insert(AR.LI.begin(), AR.LI.end());
1519
1520 std::string LoopName = std::string(L.getName());
1521
1522 bool Changed =
1523 tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1524 /*BFI*/ nullptr, /*PSI*/ nullptr,
1525 /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
1526 OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
1527 /*Threshold*/ std::nullopt, /*AllowPartial*/ false,
1528 /*Runtime*/ false, /*UpperBound*/ false,
1529 /*AllowPeeling*/ true,
1530 /*AllowProfileBasedPeeling*/ false,
1531 /*FullUnrollMaxCount*/ std::nullopt) !=
1533 if (!Changed)
1534 return PreservedAnalyses::all();
1535
1536 // The parent must not be damaged by unrolling!
1537#ifndef NDEBUG
1538 if (ParentL)
1539 ParentL->verifyLoop();
1540#endif
1541
1542 // Unrolling can do several things to introduce new loops into a loop nest:
1543 // - Full unrolling clones child loops within the current loop but then
1544 // removes the current loop making all of the children appear to be new
1545 // sibling loops.
1546 //
1547 // When a new loop appears as a sibling loop after fully unrolling,
1548 // its nesting structure has fundamentally changed and we want to revisit
1549 // it to reflect that.
1550 //
1551 // When unrolling has removed the current loop, we need to tell the
1552 // infrastructure that it is gone.
1553 //
1554 // Finally, we support a debugging/testing mode where we revisit child loops
1555 // as well. These are not expected to require further optimizations as either
1556 // they or the loop they were cloned from have been directly visited already.
1557 // But the debugging mode allows us to check this assumption.
1558 bool IsCurrentLoopValid = false;
1559 SmallVector<Loop *, 4> SibLoops;
1560 if (ParentL)
1561 SibLoops.append(ParentL->begin(), ParentL->end());
1562 else
1563 SibLoops.append(AR.LI.begin(), AR.LI.end());
1564 erase_if(SibLoops, [&](Loop *SibLoop) {
1565 if (SibLoop == &L) {
1566 IsCurrentLoopValid = true;
1567 return true;
1568 }
1569
1570 // Otherwise erase the loop from the list if it was in the old loops.
1571 return OldLoops.contains(SibLoop);
1572 });
1573 Updater.addSiblingLoops(SibLoops);
1574
1575 if (!IsCurrentLoopValid) {
1576 Updater.markLoopAsDeleted(L, LoopName);
1577 } else {
1578 // We can only walk child loops if the current loop remained valid.
1580 // Walk *all* of the child loops.
1581 SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1582 Updater.addChildLoops(ChildLoops);
1583 }
1584 }
1585
1587}
1588
1591 auto &LI = AM.getResult<LoopAnalysis>(F);
1592 // There are no loops in the function. Return before computing other expensive
1593 // analyses.
1594 if (LI.empty())
1595 return PreservedAnalyses::all();
1596 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1597 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1598 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1599 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1601 AAResults &AA = AM.getResult<AAManager>(F);
1602
1603 LoopAnalysisManager *LAM = nullptr;
1604 if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1605 LAM = &LAMProxy->getManager();
1606
1607 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1608 ProfileSummaryInfo *PSI =
1609 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1610 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1611 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1612
1613 bool Changed = false;
1614
1615 // The unroller requires loops to be in simplified form, and also needs LCSSA.
1616 // Since simplification may add new inner loops, it has to run before the
1617 // legality and profitability checks. This means running the loop unroller
1618 // will simplify all loops, regardless of whether anything end up being
1619 // unrolled.
1620 for (const auto &L : LI) {
1621 Changed |=
1622 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1623 Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1624 }
1625
1626 // Add the loop nests in the reverse order of LoopInfo. See method
1627 // declaration.
1629 appendLoopsToWorklist(LI, Worklist);
1630
1631 while (!Worklist.empty()) {
1632 // Because the LoopInfo stores the loops in RPO, we walk the worklist
1633 // from back to front so that we work forward across the CFG, which
1634 // for unrolling is only needed to get optimization remarks emitted in
1635 // a forward order.
1636 Loop &L = *Worklist.pop_back_val();
1637#ifndef NDEBUG
1638 Loop *ParentL = L.getParentLoop();
1639#endif
1640
1641 // Check if the profile summary indicates that the profiled application
1642 // has a huge working set size, in which case we disable peeling to avoid
1643 // bloating it further.
1644 std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1645 if (PSI && PSI->hasHugeWorkingSetSize())
1646 LocalAllowPeeling = false;
1647 std::string LoopName = std::string(L.getName());
1648 // The API here is quite complex to call and we allow to select some
1649 // flavors of unrolling during construction time (by setting UnrollOpts).
1651 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1652 /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
1653 UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
1654 /*Count*/ std::nullopt,
1655 /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
1656 UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1657 UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
1658 &AA);
1659 Changed |= Result != LoopUnrollResult::Unmodified;
1660
1661 // The parent must not be damaged by unrolling!
1662#ifndef NDEBUG
1663 if (Result != LoopUnrollResult::Unmodified && ParentL)
1664 ParentL->verifyLoop();
1665#endif
1666
1667 // Clear any cached analysis results for L if we removed it completely.
1668 if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1669 LAM->clear(L, LoopName);
1670 }
1671
1672 if (!Changed)
1673 return PreservedAnalyses::all();
1674
1676}
1677
1679 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1680 static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1681 OS, MapClassName2PassName);
1682 OS << '<';
1683 if (UnrollOpts.AllowPartial != std::nullopt)
1684 OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
1685 if (UnrollOpts.AllowPeeling != std::nullopt)
1686 OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";
1687 if (UnrollOpts.AllowRuntime != std::nullopt)
1688 OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";
1689 if (UnrollOpts.AllowUpperBound != std::nullopt)
1690 OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";
1691 if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1692 OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
1693 << "profile-peeling;";
1694 if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
1695 OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';
1696 OS << 'O' << UnrollOpts.OptLevel;
1697 OS << '>';
1698}
Rewrite undef for PHI
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header provides classes for managing per-loop analyses.
loops
Definition: LoopInfo.cpp:1197
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
static bool hasUnrollFullPragma(const Loop *L)
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static unsigned unrollCountPragmaValue(const Loop *L)
static bool hasUnrollEnablePragma(const Loop *L)
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine Trace Metrics
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
if(VerifyEach)
LoopAnalysisManager LAM
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:685
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:631
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:110
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
void verifyLoop() const
Verify loop structure.
iterator end() const
iterator begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
iterator end() const
iterator begin() const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:756
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
bool empty() const
Determine if the PriorityWorklist is empty or not.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Produce an estimate of the unrolled cost of the specified loop.
Definition: UnrollLoop.h:127
ConvergenceKind Convergence
Definition: UnrollLoop.h:133
uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const
Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.
bool canUnroll() const
Whether it is legal to unroll this loop.
UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
uint64_t getRolledLoopSize() const
Definition: UnrollLoop.h:143
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:179
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:849
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, AAResults *AA=nullptr)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:344
void initializeLoopUnrollPass(PassRegistry &)
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:538
auto successors(const MachineBasicBlock *BB)
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:425
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
char & LCSSAID
Definition: LCSSA.cpp:507
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:872
CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
Definition: LoopInfo.cpp:1108
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
cl::opt< bool > ForgetSCEVInLoopUnroll
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:56
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:44
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:275
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:292
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:284
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:281
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1669
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:47
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:45
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:460
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
Definition: LoopPeel.cpp:916
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:34
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:50
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
bool OnlyWhenForced
If false, use a cost model to determine whether unrolling of a loop is profitable.
const bool ForgetSCEV
If true, forget all loops when unrolling.
std::optional< unsigned > FullUnrollMaxCount
std::optional< bool > AllowPartial
std::optional< bool > AllowRuntime
std::optional< bool > AllowProfileBasedPeeling
std::optional< bool > AllowPeeling
std::optional< bool > AllowUpperBound
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:74
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
const Instruction * Heart
Definition: UnrollLoop.h:77