LLVM 23.0.0git
LoopUnrollPass.cpp
Go to the documentation of this file.
1//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements a simple loop unroller. It works best when loops have
10// been canonicalized by the -indvars pass, allowing it to determine the trip
11// counts of loops easily.
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/Constant.h"
38#include "llvm/IR/Constants.h"
40#include "llvm/IR/Dominators.h"
41#include "llvm/IR/Function.h"
42#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Metadata.h"
45#include "llvm/IR/PassManager.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <cstdint>
65#include <limits>
66#include <optional>
67#include <string>
68#include <tuple>
69#include <utility>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "loop-unroll"
74
76 "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
77 cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
78 " the current top-most loop. This is sometimes preferred to reduce"
79 " compile time."));
80
82 UnrollThreshold("unroll-threshold", cl::Hidden,
83 cl::desc("The cost threshold for loop unrolling"));
84
87 "unroll-optsize-threshold", cl::init(0), cl::Hidden,
88 cl::desc("The cost threshold for loop unrolling when optimizing for "
89 "size"));
90
92 "unroll-partial-threshold", cl::Hidden,
93 cl::desc("The cost threshold for partial loop unrolling"));
94
96 "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
97 cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
98 "to the threshold when aggressively unrolling a loop due to the "
99 "dynamic cost savings. If completely unrolling a loop will reduce "
100 "the total runtime from X to Y, we boost the loop unroll "
101 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
102 "X/Y). This limit avoids excessive code bloat."));
103
105 "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
106 cl::desc("Don't allow loop unrolling to simulate more than this number of "
107 "iterations when checking full unroll profitability"));
108
110 "unroll-count", cl::Hidden,
111 cl::desc("Use this unroll count for all loops including those with "
112 "unroll_count pragma values, for testing purposes"));
113
115 "unroll-max-count", cl::Hidden,
116 cl::desc("Set the max unroll count for partial and runtime unrolling, for"
117 "testing purposes"));
118
120 "unroll-full-max-count", cl::Hidden,
121 cl::desc(
122 "Set the max unroll count for full unrolling, for testing purposes"));
123
124static cl::opt<bool>
125 UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
126 cl::desc("Allows loops to be partially unrolled until "
127 "-unroll-threshold loop size is reached."));
128
130 "unroll-allow-remainder", cl::Hidden,
131 cl::desc("Allow generation of a loop remainder (extra iterations) "
132 "when unrolling a loop."));
133
134static cl::opt<bool>
135 UnrollRuntime("unroll-runtime", cl::Hidden,
136 cl::desc("Unroll loops with run-time trip counts"));
137
139 "unroll-max-upperbound", cl::init(8), cl::Hidden,
140 cl::desc(
141 "The max of trip count upper bound that is considered in unrolling"));
142
144 "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
145 cl::desc("Unrolled size limit for loops with unroll metadata "
146 "(full, enable, or count)."));
147
149 "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
150 cl::desc("If the runtime tripcount for the loop is lower than the "
151 "threshold, the loop is considered as flat and will be less "
152 "aggressively unrolled."));
153
155 "unroll-remainder", cl::Hidden,
156 cl::desc("Allow the loop remainder to be unrolled."));
157
158// This option isn't ever intended to be enabled, it serves to allow
159// experiments to check the assumptions about when this kind of revisit is
160// necessary.
162 "unroll-revisit-child-loops", cl::Hidden,
163 cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
164 "This shouldn't typically be needed as child loops (or their "
165 "clones) were already visited."));
166
168 "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
169 cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
170 "optimizations"));
172 UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
174 cl::desc("Default threshold (max size of unrolled "
175 "loop), used in all but O3 optimizations"));
176
178 "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
179 cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
180
181/// A magic value for use with the Threshold parameter to indicate
182/// that the loop unroll should be performed regardless of how much
183/// code expansion would result.
184static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
185
186/// Gather the various unrolling parameters based on the defaults, compiler
187/// flags, TTI overrides and user specified parameters.
191 OptimizationRemarkEmitter &ORE, int OptLevel,
192 std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
193 std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
194 std::optional<bool> UserUpperBound,
195 std::optional<unsigned> UserFullUnrollMaxCount) {
197
198 // Set up the defaults
199 UP.Threshold =
203 UP.PartialThreshold = 150;
205 UP.Count = 0;
207 UP.MaxCount = std::numeric_limits<unsigned>::max();
209 UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
210 UP.BEInsns = 2;
211 UP.Partial = false;
212 UP.Runtime = false;
213 UP.AllowRemainder = true;
214 UP.UnrollRemainder = false;
215 UP.AllowExpensiveTripCount = false;
216 UP.Force = false;
217 UP.UpperBound = false;
218 UP.UnrollAndJam = false;
222 UP.RuntimeUnrollMultiExit = false;
223 UP.AddAdditionalAccumulators = false;
224
225 // Override with any target specific settings
226 TTI.getUnrollingPreferences(L, SE, UP, &ORE);
227
228 // Apply size attributes
229 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
230 // Let unroll hints / pragmas take precedence over PGSO.
232 llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
234 if (OptForSize) {
238 }
239
240 // Apply any user values specified by cl::opt
241 if (UnrollThreshold.getNumOccurrences() > 0)
243 if (UnrollPartialThreshold.getNumOccurrences() > 0)
245 if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
247 if (UnrollMaxCount.getNumOccurrences() > 0)
249 if (UnrollMaxUpperBound.getNumOccurrences() > 0)
251 if (UnrollFullMaxCount.getNumOccurrences() > 0)
253 if (UnrollAllowPartial.getNumOccurrences() > 0)
255 if (UnrollAllowRemainder.getNumOccurrences() > 0)
257 if (UnrollRuntime.getNumOccurrences() > 0)
259 if (UnrollMaxUpperBound == 0)
260 UP.UpperBound = false;
261 if (UnrollUnrollRemainder.getNumOccurrences() > 0)
263 if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
265
266 // Apply user values provided by argument
267 if (UserThreshold) {
268 UP.Threshold = *UserThreshold;
269 UP.PartialThreshold = *UserThreshold;
270 }
271 if (UserCount)
272 UP.Count = *UserCount;
273 if (UserAllowPartial)
274 UP.Partial = *UserAllowPartial;
275 if (UserRuntime)
276 UP.Runtime = *UserRuntime;
277 if (UserUpperBound)
278 UP.UpperBound = *UserUpperBound;
279 if (UserFullUnrollMaxCount)
280 UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
281
282 return UP;
283}
284
285namespace {
286
287/// A struct to densely store the state of an instruction after unrolling at
288/// each iteration.
289///
290/// This is designed to work like a tuple of <Instruction *, int> for the
291/// purposes of hashing and lookup, but to be able to associate two boolean
292/// states with each key.
293struct UnrolledInstState {
294 Instruction *I;
295 int Iteration : 30;
296 unsigned IsFree : 1;
297 unsigned IsCounted : 1;
298};
299
300/// Hashing and equality testing for a set of the instruction states.
301struct UnrolledInstStateKeyInfo {
302 using PtrInfo = DenseMapInfo<Instruction *>;
303 using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
304
305 static inline UnrolledInstState getEmptyKey() {
306 return {PtrInfo::getEmptyKey(), 0, 0, 0};
307 }
308
309 static inline UnrolledInstState getTombstoneKey() {
310 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
311 }
312
313 static inline unsigned getHashValue(const UnrolledInstState &S) {
314 return PairInfo::getHashValue({S.I, S.Iteration});
315 }
316
317 static inline bool isEqual(const UnrolledInstState &LHS,
318 const UnrolledInstState &RHS) {
319 return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
320 }
321};
322
323struct EstimatedUnrollCost {
324 /// The estimated cost after unrolling.
325 unsigned UnrolledCost;
326
327 /// The estimated dynamic cost of executing the instructions in the
328 /// rolled form.
329 unsigned RolledDynamicCost;
330};
331
332} // end anonymous namespace
333
334/// Figure out if the loop is worth full unrolling.
335///
336/// Complete loop unrolling can make some loads constant, and we need to know
337/// if that would expose any further optimization opportunities. This routine
338/// estimates this optimization. It computes cost of unrolled loop
339/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
340/// dynamic cost we mean that we won't count costs of blocks that are known not
341/// to be executed (i.e. if we have a branch in the loop and we know that at the
342/// given iteration its condition would be resolved to true, we won't add up the
343/// cost of the 'false'-block).
344/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
345/// the analysis failed (no benefits expected from the unrolling, or the loop is
346/// too big to analyze), the returned value is std::nullopt.
347static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
348 const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
349 const SmallPtrSetImpl<const Value *> &EphValues,
350 const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
351 unsigned MaxIterationsCountToAnalyze) {
352 // We want to be able to scale offsets by the trip count and add more offsets
353 // to them without checking for overflows, and we already don't want to
354 // analyze *massive* trip counts, so we force the max to be reasonably small.
355 assert(MaxIterationsCountToAnalyze <
356 (unsigned)(std::numeric_limits<int>::max() / 2) &&
357 "The unroll iterations max is too large!");
358
359 // Only analyze inner loops. We can't properly estimate cost of nested loops
360 // and we won't visit inner loops again anyway.
361 if (!L->isInnermost()) {
363 << "Not analyzing loop cost: not an innermost loop.\n");
364 return std::nullopt;
365 }
366
367 // Don't simulate loops with a big or unknown tripcount
368 if (!TripCount || TripCount > MaxIterationsCountToAnalyze) {
370 << "Not analyzing loop cost: trip count "
371 << (TripCount ? "too large" : "unknown") << ".\n");
372 return std::nullopt;
373 }
374
377 DenseMap<Value *, Value *> SimplifiedValues;
378 SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
379
380 // The estimated cost of the unrolled form of the loop. We try to estimate
381 // this by simplifying as much as we can while computing the estimate.
382 InstructionCost UnrolledCost = 0;
383
384 // We also track the estimated dynamic (that is, actually executed) cost in
385 // the rolled form. This helps identify cases when the savings from unrolling
386 // aren't just exposing dead control flows, but actual reduced dynamic
387 // instructions due to the simplifications which we expect to occur after
388 // unrolling.
389 InstructionCost RolledDynamicCost = 0;
390
391 // We track the simplification of each instruction in each iteration. We use
392 // this to recursively merge costs into the unrolled cost on-demand so that
393 // we don't count the cost of any dead code. This is essentially a map from
394 // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
396
397 // A small worklist used to accumulate cost of instructions from each
398 // observable and reached root in the loop.
400
401 // PHI-used worklist used between iterations while accumulating cost.
403
404 // Helper function to accumulate cost for instructions in the loop.
405 auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
406 assert(Iteration >= 0 && "Cannot have a negative iteration!");
407 assert(CostWorklist.empty() && "Must start with an empty cost list");
408 assert(PHIUsedList.empty() && "Must start with an empty phi used list");
409 CostWorklist.push_back(&RootI);
411 RootI.getFunction()->hasMinSize() ?
414 for (;; --Iteration) {
415 do {
416 Instruction *I = CostWorklist.pop_back_val();
417
418 // InstCostMap only uses I and Iteration as a key, the other two values
419 // don't matter here.
420 auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
421 if (CostIter == InstCostMap.end())
422 // If an input to a PHI node comes from a dead path through the loop
423 // we may have no cost data for it here. What that actually means is
424 // that it is free.
425 continue;
426 auto &Cost = *CostIter;
427 if (Cost.IsCounted)
428 // Already counted this instruction.
429 continue;
430
431 // Mark that we are counting the cost of this instruction now.
432 Cost.IsCounted = true;
433
434 // If this is a PHI node in the loop header, just add it to the PHI set.
435 if (auto *PhiI = dyn_cast<PHINode>(I))
436 if (PhiI->getParent() == L->getHeader()) {
437 assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
438 "inherently simplify during unrolling.");
439 if (Iteration == 0)
440 continue;
441
442 // Push the incoming value from the backedge into the PHI used list
443 // if it is an in-loop instruction. We'll use this to populate the
444 // cost worklist for the next iteration (as we count backwards).
445 if (auto *OpI = dyn_cast<Instruction>(
446 PhiI->getIncomingValueForBlock(L->getLoopLatch())))
447 if (L->contains(OpI))
448 PHIUsedList.push_back(OpI);
449 continue;
450 }
451
452 // First accumulate the cost of this instruction.
453 if (!Cost.IsFree) {
454 // Consider simplified operands in instruction cost.
456 transform(I->operands(), std::back_inserter(Operands),
457 [&](Value *Op) {
458 if (auto Res = SimplifiedValues.lookup(Op))
459 return Res;
460 return Op;
461 });
462 UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
464 << "Adding cost of instruction (iteration " << Iteration
465 << "): ");
466 LLVM_DEBUG(I->dump());
467 }
468
469 // We must count the cost of every operand which is not free,
470 // recursively. If we reach a loop PHI node, simply add it to the set
471 // to be considered on the next iteration (backwards!).
472 for (Value *Op : I->operands()) {
473 // Check whether this operand is free due to being a constant or
474 // outside the loop.
475 auto *OpI = dyn_cast<Instruction>(Op);
476 if (!OpI || !L->contains(OpI))
477 continue;
478
479 // Otherwise accumulate its cost.
480 CostWorklist.push_back(OpI);
481 }
482 } while (!CostWorklist.empty());
483
484 if (PHIUsedList.empty())
485 // We've exhausted the search.
486 break;
487
488 assert(Iteration > 0 &&
489 "Cannot track PHI-used values past the first iteration!");
490 CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
491 PHIUsedList.clear();
492 }
493 };
494
495 // Ensure that we don't violate the loop structure invariants relied on by
496 // this analysis.
497 assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
498 assert(L->isLCSSAForm(DT) &&
499 "Must have loops in LCSSA form to track live-out values.");
500
502 << "Starting LoopUnroll profitability analysis...\n");
503
505 L->getHeader()->getParent()->hasMinSize() ?
507 // Simulate execution of each iteration of the loop counting instructions,
508 // which would be simplified.
509 // Since the same load will take different values on different iterations,
510 // we literally have to go through all loop's iterations.
511 for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
512 LLVM_DEBUG(dbgs().indent(3) << "Analyzing iteration " << Iteration << "\n");
513
514 // Prepare for the iteration by collecting any simplified entry or backedge
515 // inputs.
516 for (Instruction &I : *L->getHeader()) {
517 auto *PHI = dyn_cast<PHINode>(&I);
518 if (!PHI)
519 break;
520
521 // The loop header PHI nodes must have exactly two input: one from the
522 // loop preheader and one from the loop latch.
523 assert(
524 PHI->getNumIncomingValues() == 2 &&
525 "Must have an incoming value only for the preheader and the latch.");
526
527 Value *V = PHI->getIncomingValueForBlock(
528 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
529 if (Iteration != 0 && SimplifiedValues.count(V))
530 V = SimplifiedValues.lookup(V);
531 SimplifiedInputValues.push_back({PHI, V});
532 }
533
534 // Now clear and re-populate the map for the next iteration.
535 SimplifiedValues.clear();
536 while (!SimplifiedInputValues.empty())
537 SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
538
539 UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
540
541 BBWorklist.clear();
542 BBWorklist.insert(L->getHeader());
543 // Note that we *must not* cache the size, this loop grows the worklist.
544 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
545 BasicBlock *BB = BBWorklist[Idx];
546
547 // Visit all instructions in the given basic block and try to simplify
548 // it. We don't change the actual IR, just count optimization
549 // opportunities.
550 for (Instruction &I : *BB) {
551 // These won't get into the final code - don't even try calculating the
552 // cost for them.
553 if (EphValues.count(&I))
554 continue;
555
556 // Track this instruction's expected baseline cost when executing the
557 // rolled loop form.
558 RolledDynamicCost += TTI.getInstructionCost(&I, CostKind);
559
560 // Visit the instruction to analyze its loop cost after unrolling,
561 // and if the visitor returns true, mark the instruction as free after
562 // unrolling and continue.
563 bool IsFree = Analyzer.visit(I);
564 bool Inserted = InstCostMap.insert({&I, (int)Iteration,
565 (unsigned)IsFree,
566 /*IsCounted*/ false}).second;
567 (void)Inserted;
568 assert(Inserted && "Cannot have a state for an unvisited instruction!");
569
570 if (IsFree)
571 continue;
572
573 // Can't properly model a cost of a call.
574 // FIXME: With a proper cost model we should be able to do it.
575 if (auto *CI = dyn_cast<CallInst>(&I)) {
576 const Function *Callee = CI->getCalledFunction();
577 if (!Callee || TTI.isLoweredToCall(Callee)) {
579 << "Can't analyze cost of loop with call\n");
580 return std::nullopt;
581 }
582 }
583
584 // If the instruction might have a side-effect recursively account for
585 // the cost of it and all the instructions leading up to it.
586 if (I.mayHaveSideEffects())
587 AddCostRecursively(I, Iteration);
588
589 // If unrolled body turns out to be too big, bail out.
590 if (UnrolledCost > MaxUnrolledLoopSize) {
591 LLVM_DEBUG({
592 dbgs().indent(3) << "Exceeded threshold.. exiting.\n";
593 dbgs().indent(3)
594 << "UnrolledCost: " << UnrolledCost
595 << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n";
596 });
597 return std::nullopt;
598 }
599 }
600
601 Instruction *TI = BB->getTerminator();
602
603 auto getSimplifiedConstant = [&](Value *V) -> Constant * {
604 if (SimplifiedValues.count(V))
605 V = SimplifiedValues.lookup(V);
606 return dyn_cast<Constant>(V);
607 };
608
609 // Add in the live successors by first checking whether we have terminator
610 // that may be simplified based on the values simplified by this call.
611 BasicBlock *KnownSucc = nullptr;
612 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
613 if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
614 // Just take the first successor if condition is undef
615 if (isa<UndefValue>(SimpleCond))
616 KnownSucc = BI->getSuccessor(0);
617 else if (ConstantInt *SimpleCondVal =
618 dyn_cast<ConstantInt>(SimpleCond))
619 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
620 }
621 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
622 if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
623 // Just take the first successor if condition is undef
624 if (isa<UndefValue>(SimpleCond))
625 KnownSucc = SI->getSuccessor(0);
626 else if (ConstantInt *SimpleCondVal =
627 dyn_cast<ConstantInt>(SimpleCond))
628 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
629 }
630 }
631 if (KnownSucc) {
632 if (L->contains(KnownSucc))
633 BBWorklist.insert(KnownSucc);
634 else
635 ExitWorklist.insert({BB, KnownSucc});
636 continue;
637 }
638
639 // Add BB's successors to the worklist.
640 for (BasicBlock *Succ : successors(BB))
641 if (L->contains(Succ))
642 BBWorklist.insert(Succ);
643 else
644 ExitWorklist.insert({BB, Succ});
645 AddCostRecursively(*TI, Iteration);
646 }
647
648 // If we found no optimization opportunities on the first iteration, we
649 // won't find them on later ones too.
650 if (UnrolledCost == RolledDynamicCost) {
651 LLVM_DEBUG({
652 dbgs().indent(3) << "No opportunities found.. exiting.\n";
653 dbgs().indent(3) << "UnrolledCost: " << UnrolledCost << "\n";
654 });
655 return std::nullopt;
656 }
657 }
658
659 while (!ExitWorklist.empty()) {
660 BasicBlock *ExitingBB, *ExitBB;
661 std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
662
663 for (Instruction &I : *ExitBB) {
664 auto *PN = dyn_cast<PHINode>(&I);
665 if (!PN)
666 break;
667
668 Value *Op = PN->getIncomingValueForBlock(ExitingBB);
669 if (auto *OpI = dyn_cast<Instruction>(Op))
670 if (L->contains(OpI))
671 AddCostRecursively(*OpI, TripCount - 1);
672 }
673 }
674
675 assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
676 "All instructions must have a valid cost, whether the "
677 "loop is rolled or unrolled.");
678
679 LLVM_DEBUG({
680 dbgs().indent(3) << "Analysis finished:\n";
681 dbgs().indent(3) << "UnrolledCost: " << UnrolledCost
682 << ", RolledDynamicCost: " << RolledDynamicCost << "\n";
683 });
684 return {{unsigned(UnrolledCost.getValue()),
685 unsigned(RolledDynamicCost.getValue())}};
686}
687
689 const Loop *L, const TargetTransformInfo &TTI,
690 const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
692 for (BasicBlock *BB : L->blocks())
693 Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
694 L);
695 NumInlineCandidates = Metrics.NumInlineCandidates;
696 NotDuplicatable = Metrics.notDuplicatable;
697 Convergence = Metrics.Convergence;
698 LoopSize = Metrics.NumInsts;
700 Metrics.Convergence != ConvergenceKind::Uncontrolled &&
702
703 // Don't allow an estimate of size zero. This would allows unrolling of loops
704 // with huge iteration counts, which is a compile time problem even if it's
705 // not a problem for code quality. Also, the code using this size may assume
706 // that each loop has at least three instructions (likely a conditional
707 // branch, a comparison feeding that branch, and some kind of loop increment
708 // feeding that comparison instruction).
709 if (LoopSize.isValid() && LoopSize < BEInsns + 1)
710 // This is an open coded max() on InstructionCost
711 LoopSize = BEInsns + 1;
712}
713
715 const Loop *L) const {
716 auto ReportCannotUnroll = [&](StringRef Reason) {
717 LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: " << Reason << ".\n");
718 if (ORE && L)
719 ORE->emit([&]() {
720 return OptimizationRemarkMissed(DEBUG_TYPE, "CannotUnrollLoop",
721 L->getStartLoc(), L->getHeader())
722 << "unable to unroll loop: " << Reason;
723 });
724 };
725
727 ReportCannotUnroll("contains convergent operations");
728 return false;
729 }
730 if (!LoopSize.isValid()) {
731 ReportCannotUnroll("loop size could not be computed");
732 return false;
733 }
734 if (NotDuplicatable) {
735 ReportCannotUnroll("contains non-duplicatable instructions");
736 return false;
737 }
738 return true;
739}
740
743 unsigned CountOverwrite) const {
744 unsigned LS = LoopSize.getValue();
745 assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
746 if (CountOverwrite)
747 return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
748 else
749 return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
750}
751
752// Returns true if the loop has an unroll(full) pragma.
753static bool hasUnrollFullPragma(const Loop *L) {
754 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
755}
756
757// Returns true if the loop has an unroll(enable) pragma. This metadata is used
758// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
759static bool hasUnrollEnablePragma(const Loop *L) {
760 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
761}
762
763// Returns true if the loop has a runtime unroll(disable) pragma.
764static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
765 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
766}
767
768// If loop has an unroll_count pragma return the (necessarily
769// positive) value from the pragma. Otherwise return 0.
770static unsigned unrollCountPragmaValue(const Loop *L) {
771 MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
772 if (MD) {
773 assert(MD->getNumOperands() == 2 &&
774 "Unroll count hint metadata should have two operands.");
775 unsigned Count =
776 mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
777 assert(Count >= 1 && "Unroll count must be positive.");
778 return Count;
779 }
780 return 0;
781}
782
791
792// Computes the boosting factor for complete unrolling.
793// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
794// be beneficial to fully unroll the loop even if unrolledcost is large. We
795// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
796// the unroll threshold.
797static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
798 unsigned MaxPercentThresholdBoost) {
799 if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
800 return 100;
801 else if (Cost.UnrolledCost != 0)
802 // The boosting factor is RolledDynamicCost / UnrolledCost
803 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
804 MaxPercentThresholdBoost);
805 else
806 return MaxPercentThresholdBoost;
807}
808
809static std::optional<unsigned>
811 const unsigned TripMultiple, const unsigned TripCount,
812 unsigned MaxTripCount, const UnrollCostEstimator UCE,
815
816 // Using unroll pragma
817 // 1st priority is unroll count set by "unroll-count" option.
818
819 if (PInfo.UserUnrollCount) {
820 if (UP.AllowRemainder &&
821 UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold) {
822 LLVM_DEBUG(dbgs().indent(2) << "Unrolling with user-specified count: "
823 << UnrollCount << ".\n");
824 return (unsigned)UnrollCount;
825 }
827 << "Not unrolling with user count " << UnrollCount << ": "
828 << (UP.AllowRemainder ? "exceeds threshold"
829 : "remainder not allowed")
830 << ".\n");
831 }
832
833 // 2nd priority is unroll count set by pragma.
834 if (PInfo.PragmaCount > 0) {
835 if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0))) {
836 LLVM_DEBUG(dbgs().indent(2) << "Unrolling with pragma count: "
837 << PInfo.PragmaCount << ".\n");
838 return PInfo.PragmaCount;
839 }
841 << "Not unrolling with pragma count " << PInfo.PragmaCount
842 << ": remainder not allowed, count does not divide trip "
843 << "multiple " << TripMultiple << ".\n");
844 ORE->emit([&]() {
845 return OptimizationRemarkAnalysis(DEBUG_TYPE, "PragmaUnrollCountRejected",
846 L->getStartLoc(), L->getHeader())
847 << "may be unable to unroll loop with count "
848 << ore::NV("PragmaCount", PInfo.PragmaCount)
849 << ": remainder loop is not allowed and count does not divide "
850 "trip multiple "
851 << ore::NV("TripMultiple", TripMultiple);
852 });
853 }
854
855 if (PInfo.PragmaFullUnroll) {
856 if (TripCount != 0) {
857 // Certain cases with UBSAN can cause trip count to be calculated as
858 // INT_MAX, Block full unrolling at a reasonable limit so that the
859 // compiler doesn't hang trying to unroll the loop. See PR77842
860 if (TripCount > PragmaUnrollFullMaxIterations) {
862 << "Won't unroll; trip count is too large.\n");
863 ORE->emit([&]() {
865 "PragmaFullUnrollTripCountTooLarge",
866 L->getStartLoc(), L->getHeader())
867 << "may be unable to fully unroll loop: trip count "
868 << ore::NV("TripCount", TripCount) << " exceeds limit "
870 });
871 return std::nullopt;
872 }
873
875 << "Fully unrolling with trip count: " << TripCount << ".\n");
876 return TripCount;
877 }
879 << "Not fully unrolling: unknown trip count.\n");
880 ORE->emit([&]() {
882 "PragmaFullUnrollUnknownTripCount",
883 L->getStartLoc(), L->getHeader())
884 << "may be unable to fully unroll loop: trip count is unknown";
885 });
886 }
887
888 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
889 MaxTripCount <= UP.MaxUpperBound) {
891 << "Unrolling with max trip count: " << MaxTripCount << ".\n");
892 return MaxTripCount;
893 }
894
895 return std::nullopt;
896}
897
898static std::optional<unsigned> shouldFullUnroll(
901 const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
903 assert(FullUnrollTripCount && "should be non-zero!");
904
905 if (FullUnrollTripCount > UP.FullUnrollMaxCount) {
907 << "Not unrolling: trip count " << FullUnrollTripCount
908 << " exceeds max count " << UP.FullUnrollMaxCount << ".\n");
909 return std::nullopt;
910 }
911
912 // When computing the unrolled size, note that BEInsns are not replicated
913 // like the rest of the loop body.
914 uint64_t UnrolledSize = UCE.getUnrolledLoopSize(UP, FullUnrollTripCount);
915 if (UnrolledSize < UP.Threshold) {
916 LLVM_DEBUG(dbgs().indent(2) << "Unrolling: size " << UnrolledSize
917 << " < threshold " << UP.Threshold << ".\n");
918 return FullUnrollTripCount;
919 }
920
922 << "Unrolled size " << UnrolledSize << " exceeds threshold "
923 << UP.Threshold << "; checking for cost benefit.\n");
924
925 // The loop isn't that small, but we still can fully unroll it if that
926 // helps to remove a significant number of instructions.
927 // To check that, run additional analysis on the loop.
928 if (std::optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
929 L, FullUnrollTripCount, DT, SE, EphValues, TTI,
932 unsigned Boost =
934 unsigned BoostedThreshold = UP.Threshold * Boost / 100;
935 if (Cost->UnrolledCost < BoostedThreshold) {
936 LLVM_DEBUG(dbgs().indent(2) << "Profitable after cost analysis.\n");
937 return FullUnrollTripCount;
938 }
940 << "Not unrolling: cost " << Cost->UnrolledCost
941 << " >= boosted threshold " << BoostedThreshold << ".\n");
942 }
943
944 return std::nullopt;
945}
946
947static std::optional<unsigned>
948shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
949 const UnrollCostEstimator UCE,
951
952 if (!TripCount)
953 return std::nullopt;
954
955 if (!UP.Partial) {
956 LLVM_DEBUG(dbgs().indent(2) << "Will not try to unroll partially because "
957 << "-unroll-allow-partial not given\n");
958 return 0;
959 }
960 unsigned count = UP.Count;
961 if (count == 0)
962 count = TripCount;
963 if (UP.PartialThreshold != NoThreshold) {
964 // Reduce unroll count to be modulo of TripCount for partial unrolling.
965 if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) {
966 unsigned NewCount =
967 (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
968 (LoopSize - UP.BEInsns);
970 << "Unrolled size exceeds threshold; reducing count "
971 << "from " << count << " to " << NewCount << ".\n");
972 count = NewCount;
973 }
974 if (count > UP.MaxCount)
975 count = UP.MaxCount;
976 while (count != 0 && TripCount % count != 0)
977 count--;
978 if (UP.AllowRemainder && count <= 1) {
979 // If there is no Count that is modulo of TripCount, set Count to
980 // largest power-of-two factor that satisfies the threshold limit.
981 // As we'll create fixup loop, do the type of unrolling only if
982 // remainder loop is allowed.
983 // Note: DefaultUnrollRuntimeCount is used as a reasonable starting point
984 // even though this is partial unrolling (not runtime unrolling).
986 while (count != 0 &&
988 count >>= 1;
989 }
990 if (count < 2) {
992 << "Will not partially unroll: no profitable count.\n");
993 count = 0;
994 }
995 } else {
996 count = TripCount;
997 }
998 if (count > UP.MaxCount)
999 count = UP.MaxCount;
1000
1002 << "Partially unrolling with count: " << count << "\n");
1003
1004 return count;
1005}
1006// Calculates unroll count and writes it to UP.Count.
1007// Unless IgnoreUser is true, will also use metadata and command-line options
1008// that are specific to the LoopUnroll pass (which, for instance, are
1009// irrelevant for the LoopUnrollAndJam pass).
1010// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
1011// many LoopUnroll-specific options. The shared functionality should be
1012// refactored into it own function.
1014 DominatorTree &DT, LoopInfo *LI,
1016 const SmallPtrSetImpl<const Value *> &EphValues,
1018 const unsigned TripCount,
1019 const unsigned MaxTripCount, const bool MaxOrZero,
1020 const unsigned TripMultiple,
1021 const UnrollCostEstimator &UCE,
1024
1025 unsigned LoopSize = UCE.getRolledLoopSize();
1026
1027 LLVM_DEBUG(dbgs().indent(1) << "Computing unroll count: TripCount="
1028 << TripCount << ", MaxTripCount=" << MaxTripCount
1029 << (MaxOrZero ? " (MaxOrZero)" : "")
1030 << ", TripMultiple=" << TripMultiple << "\n");
1031
1032 UnrollPragmaInfo PInfo(L);
1033 LLVM_DEBUG({
1034 if (PInfo.ExplicitUnroll) {
1035 dbgs().indent(1) << "Explicit unroll requested:";
1036 if (PInfo.UserUnrollCount)
1037 dbgs() << " user-count";
1038 if (PInfo.PragmaFullUnroll)
1039 dbgs() << " pragma-full";
1040 if (PInfo.PragmaCount > 0)
1041 dbgs() << " pragma-count(" << PInfo.PragmaCount << ")";
1042 if (PInfo.PragmaEnableUnroll)
1043 dbgs() << " pragma-enable";
1044 dbgs() << "\n";
1045 }
1046 });
1047
1048 // Use an explicit peel count that has been specified for testing. In this
1049 // case it's not permitted to also specify an explicit unroll count.
1050 if (PP.PeelCount) {
1051 if (UnrollCount.getNumOccurrences() > 0) {
1052 reportFatalUsageError("Cannot specify both explicit peel count and "
1053 "explicit unroll count");
1054 }
1056 << "Using explicit peel count: " << PP.PeelCount << ".\n");
1057 UP.Count = 1;
1058 UP.Runtime = false;
1059 return;
1060 }
1061
1062 // If a user provided an explicit unroll pragma (with or without count),
1063 // enable runtime unrolling and override expensive trip count checks.
1064 if (PInfo.PragmaEnableUnroll || PInfo.PragmaCount > 0) {
1065 UP.AllowExpensiveTripCount = true;
1066 UP.Runtime = true;
1067 }
1068
1069 // Check for explicit Count.
1070 // 1st priority is unroll count set by "unroll-count" option.
1071 // 2nd priority is unroll count set by pragma.
1072 LLVM_DEBUG(dbgs().indent(1) << "Trying pragma unroll...\n");
1073 if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
1074 MaxTripCount, UCE, UP, ORE)) {
1075 UP.Count = *UnrollFactor;
1076
1077 if (PInfo.UserUnrollCount || (PInfo.PragmaCount > 0)) {
1078 UP.AllowExpensiveTripCount = true;
1079 UP.Force = true;
1080 }
1081 return;
1082 } else {
1083 if (PInfo.ExplicitUnroll && TripCount != 0) {
1084 // If the loop has an unrolling pragma, we want to be more aggressive with
1085 // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
1086 // value which is larger than the default limits.
1087 UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
1088 UP.PartialThreshold =
1089 std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
1090 }
1091 }
1092
1093 // 3rd priority is exact full unrolling. This will eliminate all copies
1094 // of some exit test.
1095 LLVM_DEBUG(dbgs().indent(1) << "Trying full unroll...\n");
1096 assert(UP.Count == 0);
1097 if (TripCount) {
1098 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
1099 TripCount, UCE, UP)) {
1100 UP.Count = *UnrollFactor;
1101 return;
1102 }
1103 }
1104
1105 // 4th priority is bounded unrolling.
1106 // We can unroll by the upper bound amount if it's generally allowed or if
1107 // we know that the loop is executed either the upper bound or zero times.
1108 // (MaxOrZero unrolling keeps only the first loop test, so the number of
1109 // loop tests remains the same compared to the non-unrolled version, whereas
1110 // the generic upper bound unrolling keeps all but the last loop test so the
1111 // number of loop tests goes up which may end up being worse on targets with
1112 // constrained branch predictor resources so is controlled by an option.)
1113 // In addition we only unroll small upper bounds.
1114 // Note that the cost of bounded unrolling is always strictly greater than
1115 // cost of exact full unrolling. As such, if we have an exact count and
1116 // found it unprofitable, we'll never chose to bounded unroll.
1117 LLVM_DEBUG(dbgs().indent(1) << "Trying upper-bound unroll...\n");
1118 if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
1119 MaxTripCount <= UP.MaxUpperBound) {
1120 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
1121 MaxTripCount, UCE, UP)) {
1122 UP.Count = *UnrollFactor;
1123 return;
1124 }
1125 }
1126
1127 // 5th priority is loop peeling.
1128 LLVM_DEBUG(dbgs().indent(1) << "Trying loop peeling...\n");
1129 computePeelCount(L, LoopSize, PP, TripCount, DT, SE, TTI, AC, UP.Threshold);
1130 if (PP.PeelCount) {
1132 << "Peeling with count: " << PP.PeelCount << ".\n");
1133 UP.Runtime = false;
1134 UP.Count = 1;
1135 return;
1136 }
1137
1138 // Before starting partial unrolling, set up.partial to true,
1139 // if user explicitly asked for unrolling
1140 if (TripCount)
1141 UP.Partial |= PInfo.ExplicitUnroll;
1142
1143 // 6th priority is partial unrolling.
1144 // Try partial unroll only when TripCount could be statically calculated.
1145 LLVM_DEBUG(dbgs().indent(1) << "Trying partial unroll...\n");
1146 if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
1147 UP.Count = *UnrollFactor;
1148 return;
1149 }
1150 assert(TripCount == 0 &&
1151 "All cases when TripCount is constant should be covered here.");
1152
1153 // 7th priority is runtime unrolling.
1154 LLVM_DEBUG(dbgs().indent(1) << "Trying runtime unroll...\n");
1155 // Don't unroll a runtime trip count loop when it is disabled.
1156 if (PInfo.PragmaRuntimeUnrollDisable) {
1158 << "Not runtime unrolling: disabled by pragma.\n");
1159 return;
1160 }
1161
1162 // Don't unroll a small upper bound loop unless user or TTI asked to do so.
1163 if (MaxTripCount && !UP.Force && MaxTripCount <= UP.MaxUpperBound) {
1164 LLVM_DEBUG(dbgs().indent(2) << "Not runtime unrolling: max trip count "
1165 << MaxTripCount << " is small (<= "
1166 << UP.MaxUpperBound << ") and not forced.\n");
1167 return;
1168 }
1169
1170 // Check if the runtime trip count is too small when profile is available.
1171 if (L->getHeader()->getParent()->hasProfileData()) {
1172 if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1173 if (*ProfileTripCount < FlatLoopTripCountThreshold)
1174 return;
1175 else
1176 UP.AllowExpensiveTripCount = true;
1177 }
1178 }
1179 if (!UP.Runtime) {
1181 << "Will not try to unroll loop with runtime trip count "
1182 << "because -unroll-runtime not given\n");
1183 return;
1184 }
1185
1186 assert(UP.Count == 0);
1188
1189 // Reduce unroll count to be the largest power-of-two factor of
1190 // the original count which satisfies the threshold limit.
1191 while (UP.Count != 0 &&
1193 UP.Count >>= 1;
1194
1195#ifndef NDEBUG
1196 unsigned OrigCount = UP.Count;
1197#endif
1198
1199 if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1200 while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1201 UP.Count >>= 1;
1203 << "Remainder loop is restricted (that could be architecture "
1204 "specific or because the loop contains a convergent "
1205 "instruction), so unroll count must divide the trip "
1206 "multiple, "
1207 << TripMultiple << ". Reducing unroll count from " << OrigCount
1208 << " to " << UP.Count << ".\n");
1209 }
1210
1211 if (UP.Count > UP.MaxCount)
1212 UP.Count = UP.MaxCount;
1213
1214 if (MaxTripCount && UP.Count > MaxTripCount)
1215 UP.Count = MaxTripCount;
1216
1217 if (UP.Count < 2)
1218 UP.Count = 0;
1219 else
1221 << "Runtime unrolling with count: " << UP.Count << "\n");
1222 return;
1223}
1224
1225static LoopUnrollResult
1229 ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1230 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
1231 std::optional<unsigned> ProvidedCount,
1232 std::optional<unsigned> ProvidedThreshold,
1233 std::optional<bool> ProvidedAllowPartial,
1234 std::optional<bool> ProvidedRuntime,
1235 std::optional<bool> ProvidedUpperBound,
1236 std::optional<bool> ProvidedAllowPeeling,
1237 std::optional<bool> ProvidedAllowProfileBasedPeeling,
1238 std::optional<unsigned> ProvidedFullUnrollMaxCount,
1239 AAResults *AA = nullptr) {
1240
1241 LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1242 << L->getHeader()->getParent()->getName() << "] Loop %"
1243 << L->getHeader()->getName()
1244 << " (depth=" << L->getLoopDepth() << ")\n");
1246 if (TM & TM_Disable) {
1247 LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: transformation disabled by "
1248 << "metadata.\n");
1250 }
1251
1252 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1253 // parent loop has an explicit unroll-and-jam pragma. This is to prevent
1254 // automatic unrolling from interfering with the user requested
1255 // transformation.
1256 Loop *ParentL = L->getParentLoop();
1257 if (ParentL != nullptr &&
1260 LLVM_DEBUG(dbgs().indent(1) << "Not unrolling loop since parent loop has"
1261 << " llvm.loop.unroll_and_jam.\n");
1263 }
1264
1265 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1266 // loop has an explicit unroll-and-jam pragma. This is to prevent automatic
1267 // unrolling from interfering with the user requested transformation.
1270 LLVM_DEBUG(
1271 dbgs().indent(1)
1272 << "Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1274 }
1275
1276 if (!L->isLoopSimplifyForm()) {
1278 << "Not unrolling loop which is not in loop-simplify form.\n");
1279 if (TM & TM_ForcedByUser) {
1280 ORE.emit([&]() {
1281 return OptimizationRemarkMissed(DEBUG_TYPE, "NotInLoopSimplifyForm",
1282 L->getStartLoc(), L->getHeader())
1283 << "unable to unroll loop: not in loop-simplify form";
1284 });
1285 }
1287 }
1288
1289 // When automatic unrolling is disabled, do not unroll unless overridden for
1290 // this loop.
1291 if (OnlyWhenForced && !(TM & TM_Enable)) {
1292 LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: automatic unrolling "
1293 << "disabled and loop not explicitly "
1294 << "enabled.\n");
1296 }
1297
1298 bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1300 L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1301 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1302 ProvidedFullUnrollMaxCount);
1304 L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1305
1306 // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1307 // as threshold later on.
1308 if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1309 !OptForSize) {
1310 LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: all thresholds are zero.\n");
1311 if (TM & TM_ForcedByUser) {
1312 ORE.emit([&]() {
1313 return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollThresholdsZero",
1314 L->getStartLoc(), L->getHeader())
1315 << "unable to unroll loop: unroll threshold is zero";
1316 });
1317 }
1319 }
1320
1322 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1323
1324 UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
1325 if (!UCE.canUnroll((TM & TM_ForcedByUser) ? &ORE : nullptr, L))
1327
1328 unsigned LoopSize = UCE.getRolledLoopSize();
1329 LLVM_DEBUG(dbgs() << "Loop Size = " << LoopSize << "\n");
1330
1331 // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1332 // later), to (fully) unroll loops, if it does not increase code size.
1333 if (OptForSize)
1334 UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1335
1336 if (UCE.NumInlineCandidates != 0) {
1338 << "Not unrolling loop with inlinable calls.\n");
1339 if (TM & TM_ForcedByUser) {
1340 ORE.emit([&]() {
1342 "InlineCandidatesPreventUnroll",
1343 L->getStartLoc(), L->getHeader())
1344 << "unable to unroll loop: contains inlinable calls";
1345 });
1346 }
1348 }
1349
1350 // Find the smallest exact trip count for any exit. This is an upper bound
1351 // on the loop trip count, but an exit at an earlier iteration is still
1352 // possible. An unroll by the smallest exact trip count guarantees that all
1353 // branches relating to at least one exit can be eliminated. This is unlike
1354 // the max trip count, which only guarantees that the backedge can be broken.
1355 unsigned TripCount = 0;
1356 unsigned TripMultiple = 1;
1357 SmallVector<BasicBlock *, 8> ExitingBlocks;
1358 L->getExitingBlocks(ExitingBlocks);
1359 for (BasicBlock *ExitingBlock : ExitingBlocks)
1360 if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1361 if (!TripCount || TC < TripCount)
1362 TripCount = TripMultiple = TC;
1363
1364 if (!TripCount) {
1365 // If no exact trip count is known, determine the trip multiple of either
1366 // the loop latch or the single exiting block.
1367 // TODO: Relax for multiple exits.
1368 BasicBlock *ExitingBlock = L->getLoopLatch();
1369 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1370 ExitingBlock = L->getExitingBlock();
1371 if (ExitingBlock)
1372 TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1373 }
1374
1375 // If the loop contains a convergent operation, the prelude we'd add
1376 // to do the first few instructions before we hit the unrolled loop
1377 // is unsafe -- it adds a control-flow dependency to the convergent
1378 // operation. Therefore restrict remainder loop (try unrolling without).
1379 //
1380 // TODO: This is somewhat conservative; we could allow the remainder if the
1381 // trip count is uniform.
1383
1384 // Try to find the trip count upper bound if we cannot find the exact trip
1385 // count.
1386 unsigned MaxTripCount = 0;
1387 bool MaxOrZero = false;
1388 if (!TripCount) {
1389 MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1390 MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1391 }
1392
1393 // computeUnrollCount() decides whether it is beneficial to use upper bound to
1394 // fully unroll the loop.
1395 computeUnrollCount(L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount,
1396 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
1397 if (!UP.Count) {
1399 << "Not unrolling: no viable strategy found.\n");
1400 if (TM & TM_ForcedByUser) {
1401 ORE.emit([&]() {
1402 return OptimizationRemarkMissed(DEBUG_TYPE, "NoUnrollStrategy",
1403 L->getStartLoc(), L->getHeader())
1404 << "unable to unroll loop: no viable unroll count found";
1405 });
1406 }
1408 }
1409
1411
1412 if (PP.PeelCount) {
1413 assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1414 LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1415 << " with iteration count " << PP.PeelCount << "!\n");
1416 ORE.emit([&]() {
1417 return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1418 L->getHeader())
1419 << "peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1420 << " iterations";
1421 });
1422
1423 ValueToValueMapTy VMap;
1424 peelLoop(L, PP.PeelCount, PP.PeelLast, LI, &SE, DT, &AC, PreserveLCSSA,
1425 VMap);
1426 simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, L->getBlocks(),
1427 nullptr);
1428 // If the loop was peeled, we already "used up" the profile information
1429 // we had, so we don't want to unroll or peel again.
1431 L->setLoopAlreadyUnrolled();
1433 }
1434
1435 // Do not attempt partial/runtime unrolling in FullLoopUnrolling
1436 if (OnlyFullUnroll && ((!TripCount && !MaxTripCount) ||
1437 UP.Count < TripCount || UP.Count < MaxTripCount)) {
1439 << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1441 }
1442
1443 // At this point, UP.Runtime indicates that run-time unrolling is allowed.
1444 // However, we only want to actually perform it if we don't know the trip
1445 // count and the unroll count doesn't divide the known trip multiple.
1446 // TODO: This decision should probably be pushed up into
1447 // computeUnrollCount().
1448 UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1449
1450 // Save loop properties before it is transformed.
1451 MDNode *OrigLoopID = L->getLoopID();
1452 UnrollPragmaInfo PInfo(L);
1453 DebugLoc LoopStartLoc = L->getStartLoc();
1454 BasicBlock *LoopHeader = L->getHeader();
1455
1456 // Unroll the loop.
1457 Loop *RemainderLoop = nullptr;
1459 ULO.Count = UP.Count;
1460 ULO.Force = UP.Force;
1463 ULO.Runtime = UP.Runtime;
1464 ULO.ForgetAllSCEV = ForgetAllSCEV;
1469 LoopUnrollResult UnrollResult = UnrollLoop(
1470 L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
1471 if (UnrollResult == LoopUnrollResult::Unmodified) {
1472 if (PInfo.ExplicitUnroll) {
1474 << "Failed to unroll loop as explicitly requested.\n");
1475 ORE.emit([&]() {
1476 return OptimizationRemarkMissed(DEBUG_TYPE, "FailedToUnrollAsRequested",
1477 LoopStartLoc, LoopHeader)
1478 << "failed to unroll loop as explicitly requested";
1479 });
1480 }
1482 }
1483
1484 if (PInfo.PragmaFullUnroll && ULO.Count != TripCount) {
1485 ORE.emit([&]() {
1486 return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedFailed",
1487 LoopStartLoc, LoopHeader)
1488 << "unable to fully unroll loop as directed; "
1489 << "unrolled by factor " << ore::NV("UnrollCount", ULO.Count);
1490 });
1491 }
1492 if (PInfo.PragmaCount > 0 && ULO.Count != PInfo.PragmaCount) {
1493 ORE.emit([&]() {
1494 return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollCountDiffers",
1495 LoopStartLoc, LoopHeader)
1496 << "unable to unroll loop with requested count "
1497 << ore::NV("RequestedCount", PInfo.PragmaCount)
1498 << "; unrolled by factor " << ore::NV("UnrollCount", ULO.Count);
1499 });
1500 }
1501
1502 if (RemainderLoop) {
1503 std::optional<MDNode *> RemainderLoopID =
1506 if (RemainderLoopID)
1507 RemainderLoop->setLoopID(*RemainderLoopID);
1508 }
1509
1510 if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1511 std::optional<MDNode *> NewLoopID =
1514 if (NewLoopID) {
1515 L->setLoopID(*NewLoopID);
1516
1517 // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1518 // explicitly.
1519 return UnrollResult;
1520 }
1521 }
1522
1523 // If loop has an unroll count pragma or unrolled by explicitly set count
1524 // mark loop as unrolled to prevent unrolling beyond that requested.
1525 if (UnrollResult != LoopUnrollResult::FullyUnrolled && PInfo.ExplicitUnroll)
1526 L->setLoopAlreadyUnrolled();
1527
1528 return UnrollResult;
1529}
1530
1531namespace {
1532
1533class LoopUnroll : public LoopPass {
1534public:
1535 static char ID; // Pass ID, replacement for typeid
1536
1537 int OptLevel;
1538
1539 /// If false, use a cost model to determine whether unrolling of a loop is
1540 /// profitable. If true, only loops that explicitly request unrolling via
1541 /// metadata are considered. All other loops are skipped.
1542 bool OnlyWhenForced;
1543
1544 /// If false, when SCEV is invalidated, only forget everything in the
1545 /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1546 /// Otherwise, forgetAllLoops and rebuild when needed next.
1547 bool ForgetAllSCEV;
1548
1549 std::optional<unsigned> ProvidedCount;
1550 std::optional<unsigned> ProvidedThreshold;
1551 std::optional<bool> ProvidedAllowPartial;
1552 std::optional<bool> ProvidedRuntime;
1553 std::optional<bool> ProvidedUpperBound;
1554 std::optional<bool> ProvidedAllowPeeling;
1555 std::optional<bool> ProvidedAllowProfileBasedPeeling;
1556 std::optional<unsigned> ProvidedFullUnrollMaxCount;
1557
1558 LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1559 bool ForgetAllSCEV = false,
1560 std::optional<unsigned> Threshold = std::nullopt,
1561 std::optional<unsigned> Count = std::nullopt,
1562 std::optional<bool> AllowPartial = std::nullopt,
1563 std::optional<bool> Runtime = std::nullopt,
1564 std::optional<bool> UpperBound = std::nullopt,
1565 std::optional<bool> AllowPeeling = std::nullopt,
1566 std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1567 std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
1568 : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1569 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1570 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1571 ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1572 ProvidedAllowPeeling(AllowPeeling),
1573 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1574 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1576 }
1577
1578 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1579 if (skipLoop(L))
1580 return false;
1581
1582 Function &F = *L->getHeader()->getParent();
1583
1584 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1585 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1586 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1587 const TargetTransformInfo &TTI =
1588 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1589 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1590 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1591 // pass. Function analyses need to be preserved across loop transformations
1592 // but ORE cannot be preserved (see comment before the pass definition).
1593 OptimizationRemarkEmitter ORE(&F);
1594 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1595
1597 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1598 /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1599 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1600 ProvidedUpperBound, ProvidedAllowPeeling,
1601 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
1602
1603 if (Result == LoopUnrollResult::FullyUnrolled)
1604 LPM.markLoopAsDeleted(*L);
1605
1606 return Result != LoopUnrollResult::Unmodified;
1607 }
1608
1609 /// This transformation requires natural loop information & requires that
1610 /// loop preheaders be inserted into the CFG...
1611 void getAnalysisUsage(AnalysisUsage &AU) const override {
1612 AU.addRequired<AssumptionCacheTracker>();
1613 AU.addRequired<TargetTransformInfoWrapperPass>();
1614 // FIXME: Loop passes are required to preserve domtree, and for now we just
1615 // recreate dom info if anything gets unrolled.
1617 }
1618};
1619
1620} // end anonymous namespace
1621
1622char LoopUnroll::ID = 0;
1623
1624INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1628INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1629
1630Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1631 bool ForgetAllSCEV, int Threshold, int Count,
1632 int AllowPartial, int Runtime, int UpperBound,
1633 int AllowPeeling) {
1634 // TODO: It would make more sense for this function to take the optionals
1635 // directly, but that's dangerous since it would silently break out of tree
1636 // callers.
1637 return new LoopUnroll(
1638 OptLevel, OnlyWhenForced, ForgetAllSCEV,
1639 Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold),
1640 Count == -1 ? std::nullopt : std::optional<unsigned>(Count),
1641 AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial),
1642 Runtime == -1 ? std::nullopt : std::optional<bool>(Runtime),
1643 UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound),
1644 AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
1645}
1646
1649 LPMUpdater &Updater) {
1650 // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1651 // pass. Function analyses need to be preserved across loop transformations
1652 // but ORE cannot be preserved (see comment before the pass definition).
1653 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
1654
1655 // Keep track of the previous loop structure so we can identify new loops
1656 // created by unrolling.
1657 Loop *ParentL = L.getParentLoop();
1658 SmallPtrSet<Loop *, 4> OldLoops;
1659 if (ParentL)
1660 OldLoops.insert_range(*ParentL);
1661 else
1662 OldLoops.insert_range(AR.LI);
1663
1664 std::string LoopName = std::string(L.getName());
1665
1666 bool Changed =
1667 tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1668 /*BFI*/ nullptr, /*PSI*/ nullptr,
1669 /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
1670 OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
1671 /*Threshold*/ std::nullopt, /*AllowPartial*/ false,
1672 /*Runtime*/ false, /*UpperBound*/ false,
1673 /*AllowPeeling*/ true,
1674 /*AllowProfileBasedPeeling*/ false,
1675 /*FullUnrollMaxCount*/ std::nullopt) !=
1677 if (!Changed)
1678 return PreservedAnalyses::all();
1679
1680 // The parent must not be damaged by unrolling!
1681#ifndef NDEBUG
1682 if (ParentL)
1683 ParentL->verifyLoop();
1684#endif
1685
1686 // Unrolling can do several things to introduce new loops into a loop nest:
1687 // - Full unrolling clones child loops within the current loop but then
1688 // removes the current loop making all of the children appear to be new
1689 // sibling loops.
1690 //
1691 // When a new loop appears as a sibling loop after fully unrolling,
1692 // its nesting structure has fundamentally changed and we want to revisit
1693 // it to reflect that.
1694 //
1695 // When unrolling has removed the current loop, we need to tell the
1696 // infrastructure that it is gone.
1697 //
1698 // Finally, we support a debugging/testing mode where we revisit child loops
1699 // as well. These are not expected to require further optimizations as either
1700 // they or the loop they were cloned from have been directly visited already.
1701 // But the debugging mode allows us to check this assumption.
1702 bool IsCurrentLoopValid = false;
1703 SmallVector<Loop *, 4> SibLoops;
1704 if (ParentL)
1705 SibLoops.append(ParentL->begin(), ParentL->end());
1706 else
1707 SibLoops.append(AR.LI.begin(), AR.LI.end());
1708 erase_if(SibLoops, [&](Loop *SibLoop) {
1709 if (SibLoop == &L) {
1710 IsCurrentLoopValid = true;
1711 return true;
1712 }
1713
1714 // Otherwise erase the loop from the list if it was in the old loops.
1715 return OldLoops.contains(SibLoop);
1716 });
1717 Updater.addSiblingLoops(SibLoops);
1718
1719 if (!IsCurrentLoopValid) {
1720 Updater.markLoopAsDeleted(L, LoopName);
1721 } else {
1722 // We can only walk child loops if the current loop remained valid.
1724 // Walk *all* of the child loops.
1725 SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1726 Updater.addChildLoops(ChildLoops);
1727 }
1728 }
1729
1731}
1732
1735 auto &LI = AM.getResult<LoopAnalysis>(F);
1736 // There are no loops in the function. Return before computing other expensive
1737 // analyses.
1738 if (LI.empty())
1739 return PreservedAnalyses::all();
1740 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1741 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1742 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1743 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1746
1747 LoopAnalysisManager *LAM = nullptr;
1748 if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1749 LAM = &LAMProxy->getManager();
1750
1751 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1752 ProfileSummaryInfo *PSI =
1753 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1754 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1755 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1756
1757 bool Changed = false;
1758
1759 // The unroller requires loops to be in simplified form, and also needs LCSSA.
1760 // Since simplification may add new inner loops, it has to run before the
1761 // legality and profitability checks. This means running the loop unroller
1762 // will simplify all loops, regardless of whether anything end up being
1763 // unrolled.
1764 for (const auto &L : LI) {
1765 Changed |=
1766 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1767 Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1768 }
1769
1770 // Add the loop nests in the reverse order of LoopInfo. See method
1771 // declaration.
1773 appendLoopsToWorklist(LI, Worklist);
1774
1775 while (!Worklist.empty()) {
1776 // Because the LoopInfo stores the loops in RPO, we walk the worklist
1777 // from back to front so that we work forward across the CFG, which
1778 // for unrolling is only needed to get optimization remarks emitted in
1779 // a forward order.
1780 Loop &L = *Worklist.pop_back_val();
1781#ifndef NDEBUG
1782 Loop *ParentL = L.getParentLoop();
1783#endif
1784
1785 // Check if the profile summary indicates that the profiled application
1786 // has a huge working set size, in which case we disable peeling to avoid
1787 // bloating it further.
1788 std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1789 if (PSI && PSI->hasHugeWorkingSetSize())
1790 LocalAllowPeeling = false;
1791 std::string LoopName = std::string(L.getName());
1792 // The API here is quite complex to call and we allow to select some
1793 // flavors of unrolling during construction time (by setting UnrollOpts).
1795 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1796 /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
1797 UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
1798 /*Count*/ std::nullopt,
1799 /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
1800 UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1801 UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
1802 &AA);
1804
1805 // The parent must not be damaged by unrolling!
1806#ifndef NDEBUG
1807 if (Result != LoopUnrollResult::Unmodified && ParentL)
1808 ParentL->verifyLoop();
1809#endif
1810
1811 // Clear any cached analysis results for L if we removed it completely.
1812 if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1813 LAM->clear(L, LoopName);
1814 }
1815
1816 if (!Changed)
1817 return PreservedAnalyses::all();
1818
1820}
1821
1823 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1824 static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1825 OS, MapClassName2PassName);
1826 OS << '<';
1827 if (UnrollOpts.AllowPartial != std::nullopt)
1828 OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
1829 if (UnrollOpts.AllowPeeling != std::nullopt)
1830 OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";
1831 if (UnrollOpts.AllowRuntime != std::nullopt)
1832 OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";
1833 if (UnrollOpts.AllowUpperBound != std::nullopt)
1834 OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";
1835 if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1836 OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
1837 << "profile-peeling;";
1838 if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
1839 OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';
1840 OS << 'O' << UnrollOpts.OptLevel;
1841 OS << '>';
1842}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
static bool hasUnrollFullPragma(const Loop *L)
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static unsigned unrollCountPragmaValue(const Loop *L)
static bool hasUnrollEnablePragma(const Loop *L)
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with unroll metadata " "(full, enable, or count)."))
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability"))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Trace Metrics
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
LoopAnalysisManager LAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A manager for alias analyses.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional Branch instruction.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
void markLoopAsDeleted(Loop &L)
Definition LoopPass.cpp:111
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
void verifyLoop() const
Verify loop structure.
iterator end() const
iterator begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
iterator end() const
iterator begin() const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:547
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool empty() const
Determine if the PriorityWorklist is empty or not.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Produce an estimate of the unrolled cost of the specified loop.
Definition UnrollLoop.h:151
ConvergenceKind Convergence
Definition UnrollLoop.h:157
LLVM_ABI uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const
Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
LLVM_ABI UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
uint64_t getRolledLoopSize() const
Definition UnrollLoop.h:169
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
LLVM Value Representation.
Definition Value.h:75
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
iterator find(const_arg_type_t< ValueT > V)
Definition DenseSet.h:167
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Changed
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition LCSSA.cpp:449
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI char & LCSSAID
Definition LCSSA.cpp:526
LLVM_ABI void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, ArrayRef< BasicBlock * > Blocks, AAResults *AA=nullptr)
Perform some cleanup and simplifications on loops after unrolling.
LLVM_ABI Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
LLVM_ABI void initializeLoopUnrollPass(PassRegistry &)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
LLVM_ABI TransformationMode hasUnrollAndJamTransformation(const Loop *L)
cl::opt< bool > ForgetSCEVInLoopUnroll
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition LoopPeel.cpp:752
LLVM_TEMPLATE_ABI void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI TransformationMode hasUnrollTransformation(const Loop *L)
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition UnrollLoop.h:58
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
Definition UnrollLoop.h:65
@ Unmodified
The loop was not modified.
Definition UnrollLoop.h:60
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
Definition UnrollLoop.h:69
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
void peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
const char *const LLVMLoopUnrollFollowupAll
Definition UnrollLoop.h:45
TargetTransformInfo TTI
TransformationMode
The mode sets how eager a transformation should be applied.
Definition LoopUtils.h:283
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition LoopUtils.h:300
@ TM_Disable
The transformation should not be applied.
Definition LoopUtils.h:292
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition LoopUtils.h:289
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
LLVM_ABI MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
DWARFExpression::Operation Op
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
const char *const LLVMLoopUnrollFollowupRemainder
Definition UnrollLoop.h:48
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition UnrollLoop.h:46
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition CodeMetrics.h:34
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool RuntimeUnrollMultiExit
Allow runtime unrolling multi-exit loops.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
bool AddAdditionalAccumulators
Allow unrolling to add parallel reduction phis.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
const Instruction * Heart
Definition UnrollLoop.h:79
const bool PragmaFullUnroll
Definition UnrollLoop.h:131
UnrollPragmaInfo(const Loop *L)
const unsigned PragmaCount
Definition UnrollLoop.h:132
const bool ExplicitUnroll
Definition UnrollLoop.h:135
const bool PragmaRuntimeUnrollDisable
Definition UnrollLoop.h:134
const bool UserUnrollCount
Definition UnrollLoop.h:130
const bool PragmaEnableUnroll
Definition UnrollLoop.h:133