LLVM  15.0.0git
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Analysis/LoopPass.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DiagnosticInfo.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/PassManager.h"
48 #include "llvm/InitializePasses.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
55 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils.h"
63 #include <algorithm>
64 #include <cassert>
65 #include <cstdint>
66 #include <limits>
67 #include <string>
68 #include <tuple>
69 #include <utility>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "loop-unroll"
74 
76  "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
77  cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
78  " the current top-most loop. This is sometimes preferred to reduce"
79  " compile time."));
80 
81 static cl::opt<unsigned>
82  UnrollThreshold("unroll-threshold", cl::Hidden,
83  cl::desc("The cost threshold for loop unrolling"));
84 
85 static cl::opt<unsigned>
87  "unroll-optsize-threshold", cl::init(0), cl::Hidden,
88  cl::desc("The cost threshold for loop unrolling when optimizing for "
89  "size"));
90 
92  "unroll-partial-threshold", cl::Hidden,
93  cl::desc("The cost threshold for partial loop unrolling"));
94 
96  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
97  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
98  "to the threshold when aggressively unrolling a loop due to the "
99  "dynamic cost savings. If completely unrolling a loop will reduce "
100  "the total runtime from X to Y, we boost the loop unroll "
101  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
102  "X/Y). This limit avoids excessive code bloat."));
103 
105  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
106  cl::desc("Don't allow loop unrolling to simulate more than this number of"
107  "iterations when checking full unroll profitability"));
108 
110  "unroll-count", cl::Hidden,
111  cl::desc("Use this unroll count for all loops including those with "
112  "unroll_count pragma values, for testing purposes"));
113 
115  "unroll-max-count", cl::Hidden,
116  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
117  "testing purposes"));
118 
120  "unroll-full-max-count", cl::Hidden,
121  cl::desc(
122  "Set the max unroll count for full unrolling, for testing purposes"));
123 
124 static cl::opt<bool>
125  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
126  cl::desc("Allows loops to be partially unrolled until "
127  "-unroll-threshold loop size is reached."));
128 
130  "unroll-allow-remainder", cl::Hidden,
131  cl::desc("Allow generation of a loop remainder (extra iterations) "
132  "when unrolling a loop."));
133 
134 static cl::opt<bool>
135  UnrollRuntime("unroll-runtime", cl::Hidden,
136  cl::desc("Unroll loops with run-time trip counts"));
137 
139  "unroll-max-upperbound", cl::init(8), cl::Hidden,
140  cl::desc(
141  "The max of trip count upper bound that is considered in unrolling"));
142 
144  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
145  cl::desc("Unrolled size limit for loops with an unroll(full) or "
146  "unroll_count pragma."));
147 
149  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
150  cl::desc("If the runtime tripcount for the loop is lower than the "
151  "threshold, the loop is considered as flat and will be less "
152  "aggressively unrolled."));
153 
155  "unroll-remainder", cl::Hidden,
156  cl::desc("Allow the loop remainder to be unrolled."));
157 
158 // This option isn't ever intended to be enabled, it serves to allow
159 // experiments to check the assumptions about when this kind of revisit is
160 // necessary.
162  "unroll-revisit-child-loops", cl::Hidden,
163  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
164  "This shouldn't typically be needed as child loops (or their "
165  "clones) were already visited."));
166 
168  "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
169  cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
170  "optimizations"));
171 static cl::opt<unsigned>
172  UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
173  cl::Hidden,
174  cl::desc("Default threshold (max size of unrolled "
175  "loop), used in all but O3 optimizations"));
176 
177 /// A magic value for use with the Threshold parameter to indicate
178 /// that the loop unroll should be performed regardless of how much
179 /// code expansion would result.
181 
182 /// Gather the various unrolling parameters based on the defaults, compiler
183 /// flags, TTI overrides and user specified parameters.
187  OptimizationRemarkEmitter &ORE, int OptLevel,
188  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
189  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
190  Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
192 
193  // Set up the defaults
194  UP.Threshold =
196  UP.MaxPercentThresholdBoost = 400;
198  UP.PartialThreshold = 150;
200  UP.Count = 0;
204  UP.BEInsns = 2;
205  UP.Partial = false;
206  UP.Runtime = false;
207  UP.AllowRemainder = true;
208  UP.UnrollRemainder = false;
209  UP.AllowExpensiveTripCount = false;
210  UP.Force = false;
211  UP.UpperBound = false;
212  UP.UnrollAndJam = false;
215 
216  // Override with any target specific settings
217  TTI.getUnrollingPreferences(L, SE, UP, &ORE);
218 
219  // Apply size attributes
220  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
221  // Let unroll hints / pragmas take precedence over PGSO.
225  if (OptForSize) {
226  UP.Threshold = UP.OptSizeThreshold;
228  UP.MaxPercentThresholdBoost = 100;
229  }
230 
231  // Apply any user values specified by cl::opt
232  if (UnrollThreshold.getNumOccurrences() > 0)
234  if (UnrollPartialThreshold.getNumOccurrences() > 0)
236  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
238  if (UnrollMaxCount.getNumOccurrences() > 0)
240  if (UnrollFullMaxCount.getNumOccurrences() > 0)
247  UP.Runtime = UnrollRuntime;
248  if (UnrollMaxUpperBound == 0)
249  UP.UpperBound = false;
252  if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
254 
255  // Apply user values provided by argument
256  if (UserThreshold) {
257  UP.Threshold = *UserThreshold;
258  UP.PartialThreshold = *UserThreshold;
259  }
260  if (UserCount)
261  UP.Count = *UserCount;
262  if (UserAllowPartial)
263  UP.Partial = *UserAllowPartial;
264  if (UserRuntime)
265  UP.Runtime = *UserRuntime;
266  if (UserUpperBound)
267  UP.UpperBound = *UserUpperBound;
268  if (UserFullUnrollMaxCount)
269  UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
270 
271  return UP;
272 }
273 
274 namespace {
275 
276 /// A struct to densely store the state of an instruction after unrolling at
277 /// each iteration.
278 ///
279 /// This is designed to work like a tuple of <Instruction *, int> for the
280 /// purposes of hashing and lookup, but to be able to associate two boolean
281 /// states with each key.
282 struct UnrolledInstState {
283  Instruction *I;
284  int Iteration : 30;
285  unsigned IsFree : 1;
286  unsigned IsCounted : 1;
287 };
288 
289 /// Hashing and equality testing for a set of the instruction states.
290 struct UnrolledInstStateKeyInfo {
291  using PtrInfo = DenseMapInfo<Instruction *>;
293 
294  static inline UnrolledInstState getEmptyKey() {
295  return {PtrInfo::getEmptyKey(), 0, 0, 0};
296  }
297 
298  static inline UnrolledInstState getTombstoneKey() {
299  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
300  }
301 
302  static inline unsigned getHashValue(const UnrolledInstState &S) {
303  return PairInfo::getHashValue({S.I, S.Iteration});
304  }
305 
306  static inline bool isEqual(const UnrolledInstState &LHS,
307  const UnrolledInstState &RHS) {
308  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
309  }
310 };
311 
312 struct EstimatedUnrollCost {
313  /// The estimated cost after unrolling.
314  unsigned UnrolledCost;
315 
316  /// The estimated dynamic cost of executing the instructions in the
317  /// rolled form.
318  unsigned RolledDynamicCost;
319 };
320 
321 struct PragmaInfo {
322  PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
323  : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
324  PragmaEnableUnroll(PEU) {}
325  const bool UserUnrollCount;
326  const bool PragmaFullUnroll;
327  const unsigned PragmaCount;
328  const bool PragmaEnableUnroll;
329 };
330 
331 } // end anonymous namespace
332 
333 /// Figure out if the loop is worth full unrolling.
334 ///
335 /// Complete loop unrolling can make some loads constant, and we need to know
336 /// if that would expose any further optimization opportunities. This routine
337 /// estimates this optimization. It computes cost of unrolled loop
338 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
339 /// dynamic cost we mean that we won't count costs of blocks that are known not
340 /// to be executed (i.e. if we have a branch in the loop and we know that at the
341 /// given iteration its condition would be resolved to true, we won't add up the
342 /// cost of the 'false'-block).
343 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
344 /// the analysis failed (no benefits expected from the unrolling, or the loop is
345 /// too big to analyze), the returned value is None.
347  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
348  const SmallPtrSetImpl<const Value *> &EphValues,
349  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
350  unsigned MaxIterationsCountToAnalyze) {
351  // We want to be able to scale offsets by the trip count and add more offsets
352  // to them without checking for overflows, and we already don't want to
353  // analyze *massive* trip counts, so we force the max to be reasonably small.
354  assert(MaxIterationsCountToAnalyze <
355  (unsigned)(std::numeric_limits<int>::max() / 2) &&
356  "The unroll iterations max is too large!");
357 
358  // Only analyze inner loops. We can't properly estimate cost of nested loops
359  // and we won't visit inner loops again anyway.
360  if (!L->isInnermost())
361  return None;
362 
363  // Don't simulate loops with a big or unknown tripcount
364  if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
365  return None;
366 
369  DenseMap<Value *, Value *> SimplifiedValues;
370  SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
371 
372  // The estimated cost of the unrolled form of the loop. We try to estimate
373  // this by simplifying as much as we can while computing the estimate.
374  InstructionCost UnrolledCost = 0;
375 
376  // We also track the estimated dynamic (that is, actually executed) cost in
377  // the rolled form. This helps identify cases when the savings from unrolling
378  // aren't just exposing dead control flows, but actual reduced dynamic
379  // instructions due to the simplifications which we expect to occur after
380  // unrolling.
381  InstructionCost RolledDynamicCost = 0;
382 
383  // We track the simplification of each instruction in each iteration. We use
384  // this to recursively merge costs into the unrolled cost on-demand so that
385  // we don't count the cost of any dead code. This is essentially a map from
386  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
388 
389  // A small worklist used to accumulate cost of instructions from each
390  // observable and reached root in the loop.
391  SmallVector<Instruction *, 16> CostWorklist;
392 
393  // PHI-used worklist used between iterations while accumulating cost.
394  SmallVector<Instruction *, 4> PHIUsedList;
395 
396  // Helper function to accumulate cost for instructions in the loop.
397  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
398  assert(Iteration >= 0 && "Cannot have a negative iteration!");
399  assert(CostWorklist.empty() && "Must start with an empty cost list");
400  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
401  CostWorklist.push_back(&RootI);
403  RootI.getFunction()->hasMinSize() ?
406  for (;; --Iteration) {
407  do {
408  Instruction *I = CostWorklist.pop_back_val();
409 
410  // InstCostMap only uses I and Iteration as a key, the other two values
411  // don't matter here.
412  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
413  if (CostIter == InstCostMap.end())
414  // If an input to a PHI node comes from a dead path through the loop
415  // we may have no cost data for it here. What that actually means is
416  // that it is free.
417  continue;
418  auto &Cost = *CostIter;
419  if (Cost.IsCounted)
420  // Already counted this instruction.
421  continue;
422 
423  // Mark that we are counting the cost of this instruction now.
424  Cost.IsCounted = true;
425 
426  // If this is a PHI node in the loop header, just add it to the PHI set.
427  if (auto *PhiI = dyn_cast<PHINode>(I))
428  if (PhiI->getParent() == L->getHeader()) {
429  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
430  "inherently simplify during unrolling.");
431  if (Iteration == 0)
432  continue;
433 
434  // Push the incoming value from the backedge into the PHI used list
435  // if it is an in-loop instruction. We'll use this to populate the
436  // cost worklist for the next iteration (as we count backwards).
437  if (auto *OpI = dyn_cast<Instruction>(
438  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
439  if (L->contains(OpI))
440  PHIUsedList.push_back(OpI);
441  continue;
442  }
443 
444  // First accumulate the cost of this instruction.
445  if (!Cost.IsFree) {
446  UnrolledCost += TTI.getUserCost(I, CostKind);
447  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
448  << Iteration << "): ");
449  LLVM_DEBUG(I->dump());
450  }
451 
452  // We must count the cost of every operand which is not free,
453  // recursively. If we reach a loop PHI node, simply add it to the set
454  // to be considered on the next iteration (backwards!).
455  for (Value *Op : I->operands()) {
456  // Check whether this operand is free due to being a constant or
457  // outside the loop.
458  auto *OpI = dyn_cast<Instruction>(Op);
459  if (!OpI || !L->contains(OpI))
460  continue;
461 
462  // Otherwise accumulate its cost.
463  CostWorklist.push_back(OpI);
464  }
465  } while (!CostWorklist.empty());
466 
467  if (PHIUsedList.empty())
468  // We've exhausted the search.
469  break;
470 
471  assert(Iteration > 0 &&
472  "Cannot track PHI-used values past the first iteration!");
473  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
474  PHIUsedList.clear();
475  }
476  };
477 
478  // Ensure that we don't violate the loop structure invariants relied on by
479  // this analysis.
480  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
481  assert(L->isLCSSAForm(DT) &&
482  "Must have loops in LCSSA form to track live-out values.");
483 
484  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
485 
487  L->getHeader()->getParent()->hasMinSize() ?
489  // Simulate execution of each iteration of the loop counting instructions,
490  // which would be simplified.
491  // Since the same load will take different values on different iterations,
492  // we literally have to go through all loop's iterations.
493  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
494  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
495 
496  // Prepare for the iteration by collecting any simplified entry or backedge
497  // inputs.
498  for (Instruction &I : *L->getHeader()) {
499  auto *PHI = dyn_cast<PHINode>(&I);
500  if (!PHI)
501  break;
502 
503  // The loop header PHI nodes must have exactly two input: one from the
504  // loop preheader and one from the loop latch.
505  assert(
506  PHI->getNumIncomingValues() == 2 &&
507  "Must have an incoming value only for the preheader and the latch.");
508 
509  Value *V = PHI->getIncomingValueForBlock(
510  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
511  if (Iteration != 0 && SimplifiedValues.count(V))
512  V = SimplifiedValues.lookup(V);
513  SimplifiedInputValues.push_back({PHI, V});
514  }
515 
516  // Now clear and re-populate the map for the next iteration.
517  SimplifiedValues.clear();
518  while (!SimplifiedInputValues.empty())
519  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
520 
521  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
522 
523  BBWorklist.clear();
524  BBWorklist.insert(L->getHeader());
525  // Note that we *must not* cache the size, this loop grows the worklist.
526  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
527  BasicBlock *BB = BBWorklist[Idx];
528 
529  // Visit all instructions in the given basic block and try to simplify
530  // it. We don't change the actual IR, just count optimization
531  // opportunities.
532  for (Instruction &I : *BB) {
533  // These won't get into the final code - don't even try calculating the
534  // cost for them.
535  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
536  continue;
537 
538  // Track this instruction's expected baseline cost when executing the
539  // rolled loop form.
540  RolledDynamicCost += TTI.getUserCost(&I, CostKind);
541 
542  // Visit the instruction to analyze its loop cost after unrolling,
543  // and if the visitor returns true, mark the instruction as free after
544  // unrolling and continue.
545  bool IsFree = Analyzer.visit(I);
546  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
547  (unsigned)IsFree,
548  /*IsCounted*/ false}).second;
549  (void)Inserted;
550  assert(Inserted && "Cannot have a state for an unvisited instruction!");
551 
552  if (IsFree)
553  continue;
554 
555  // Can't properly model a cost of a call.
556  // FIXME: With a proper cost model we should be able to do it.
557  if (auto *CI = dyn_cast<CallInst>(&I)) {
558  const Function *Callee = CI->getCalledFunction();
559  if (!Callee || TTI.isLoweredToCall(Callee)) {
560  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
561  return None;
562  }
563  }
564 
565  // If the instruction might have a side-effect recursively account for
566  // the cost of it and all the instructions leading up to it.
567  if (I.mayHaveSideEffects())
568  AddCostRecursively(I, Iteration);
569 
570  // If unrolled body turns out to be too big, bail out.
571  if (UnrolledCost > MaxUnrolledLoopSize) {
572  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
573  << " UnrolledCost: " << UnrolledCost
574  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
575  << "\n");
576  return None;
577  }
578  }
579 
580  Instruction *TI = BB->getTerminator();
581 
582  auto getSimplifiedConstant = [&](Value *V) -> Constant * {
583  if (SimplifiedValues.count(V))
584  V = SimplifiedValues.lookup(V);
585  return dyn_cast<Constant>(V);
586  };
587 
588  // Add in the live successors by first checking whether we have terminator
589  // that may be simplified based on the values simplified by this call.
590  BasicBlock *KnownSucc = nullptr;
591  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
592  if (BI->isConditional()) {
593  if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
594  // Just take the first successor if condition is undef
595  if (isa<UndefValue>(SimpleCond))
596  KnownSucc = BI->getSuccessor(0);
597  else if (ConstantInt *SimpleCondVal =
598  dyn_cast<ConstantInt>(SimpleCond))
599  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
600  }
601  }
602  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
603  if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
604  // Just take the first successor if condition is undef
605  if (isa<UndefValue>(SimpleCond))
606  KnownSucc = SI->getSuccessor(0);
607  else if (ConstantInt *SimpleCondVal =
608  dyn_cast<ConstantInt>(SimpleCond))
609  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
610  }
611  }
612  if (KnownSucc) {
613  if (L->contains(KnownSucc))
614  BBWorklist.insert(KnownSucc);
615  else
616  ExitWorklist.insert({BB, KnownSucc});
617  continue;
618  }
619 
620  // Add BB's successors to the worklist.
621  for (BasicBlock *Succ : successors(BB))
622  if (L->contains(Succ))
623  BBWorklist.insert(Succ);
624  else
625  ExitWorklist.insert({BB, Succ});
626  AddCostRecursively(*TI, Iteration);
627  }
628 
629  // If we found no optimization opportunities on the first iteration, we
630  // won't find them on later ones too.
631  if (UnrolledCost == RolledDynamicCost) {
632  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
633  << " UnrolledCost: " << UnrolledCost << "\n");
634  return None;
635  }
636  }
637 
638  while (!ExitWorklist.empty()) {
639  BasicBlock *ExitingBB, *ExitBB;
640  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
641 
642  for (Instruction &I : *ExitBB) {
643  auto *PN = dyn_cast<PHINode>(&I);
644  if (!PN)
645  break;
646 
647  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
648  if (auto *OpI = dyn_cast<Instruction>(Op))
649  if (L->contains(OpI))
650  AddCostRecursively(*OpI, TripCount - 1);
651  }
652  }
653 
654  assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
655  "All instructions must have a valid cost, whether the "
656  "loop is rolled or unrolled.");
657 
658  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
659  << "UnrolledCost: " << UnrolledCost << ", "
660  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
661  return {{unsigned(*UnrolledCost.getValue()),
662  unsigned(*RolledDynamicCost.getValue())}};
663 }
664 
665 /// ApproximateLoopSize - Approximate the size of the loop.
667  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
668  const TargetTransformInfo &TTI,
669  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
671  for (BasicBlock *BB : L->blocks())
672  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
673  NumCalls = Metrics.NumInlineCandidates;
674  NotDuplicatable = Metrics.notDuplicatable;
675  Convergent = Metrics.convergent;
676 
677  InstructionCost LoopSize = Metrics.NumInsts;
678 
679  // Don't allow an estimate of size zero. This would allows unrolling of loops
680  // with huge iteration counts, which is a compile time problem even if it's
681  // not a problem for code quality. Also, the code using this size may assume
682  // that each loop has at least three instructions (likely a conditional
683  // branch, a comparison feeding that branch, and some kind of loop increment
684  // feeding that comparison instruction).
685  if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1)
686  // This is an open coded max() on InstructionCost
687  LoopSize = BEInsns + 1;
688 
689  return LoopSize;
690 }
691 
692 // Returns the loop hint metadata node with the given name (for example,
693 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
694 // returned.
696  if (MDNode *LoopID = L->getLoopID())
697  return GetUnrollMetadata(LoopID, Name);
698  return nullptr;
699 }
700 
701 // Returns true if the loop has an unroll(full) pragma.
702 static bool hasUnrollFullPragma(const Loop *L) {
703  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
704 }
705 
706 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
707 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
708 static bool hasUnrollEnablePragma(const Loop *L) {
709  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
710 }
711 
712 // Returns true if the loop has an runtime unroll(disable) pragma.
713 static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
714  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
715 }
716 
717 // If loop has an unroll_count pragma return the (necessarily
718 // positive) value from the pragma. Otherwise return 0.
719 static unsigned unrollCountPragmaValue(const Loop *L) {
720  MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
721  if (MD) {
722  assert(MD->getNumOperands() == 2 &&
723  "Unroll count hint metadata should have two operands.");
724  unsigned Count =
725  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
726  assert(Count >= 1 && "Unroll count must be positive.");
727  return Count;
728  }
729  return 0;
730 }
731 
732 // Computes the boosting factor for complete unrolling.
733 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
734 // be beneficial to fully unroll the loop even if unrolledcost is large. We
735 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
736 // the unroll threshold.
737 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
738  unsigned MaxPercentThresholdBoost) {
739  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
740  return 100;
741  else if (Cost.UnrolledCost != 0)
742  // The boosting factor is RolledDynamicCost / UnrolledCost
743  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
744  MaxPercentThresholdBoost);
745  else
746  return MaxPercentThresholdBoost;
747 }
748 
749 // Produce an estimate of the unrolled cost of the specified loop. This
750 // is used to a) produce a cost estimate for partial unrolling and b) to
751 // cheaply estimate cost for full unrolling when we don't want to symbolically
752 // evaluate all iterations.
754  const unsigned LoopSize;
755 
756 public:
757  UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
758 
759  // Returns loop size estimation for unrolled loop, given the unrolling
760  // configuration specified by UP.
761  uint64_t
763  const unsigned CountOverwrite = 0) const {
764  assert(LoopSize >= UP.BEInsns &&
765  "LoopSize should not be less than BEInsns!");
766  if (CountOverwrite)
767  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
768  UP.BEInsns;
769  else
770  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
771  UP.BEInsns;
772  }
773 };
774 
775 static Optional<unsigned>
776 shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
777  const unsigned TripMultiple, const unsigned TripCount,
778  const UnrollCostEstimator UCE,
780 
781  // Using unroll pragma
782  // 1st priority is unroll count set by "unroll-count" option.
783 
784  if (PInfo.UserUnrollCount) {
785  if (UP.AllowRemainder &&
786  UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
787  return (unsigned)UnrollCount;
788  }
789 
790  // 2nd priority is unroll count set by pragma.
791  if (PInfo.PragmaCount > 0) {
792  if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
793  return PInfo.PragmaCount;
794  }
795 
796  if (PInfo.PragmaFullUnroll && TripCount != 0)
797  return TripCount;
798 
799  // if didn't return until here, should continue to other priorties
800  return None;
801 }
802 
804  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
805  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
806  const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
808  assert(FullUnrollTripCount && "should be non-zero!");
809 
810  if (FullUnrollTripCount > UP.FullUnrollMaxCount)
811  return None;
812 
813  // When computing the unrolled size, note that BEInsns are not replicated
814  // like the rest of the loop body.
815  if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
816  return FullUnrollTripCount;
817 
818  // The loop isn't that small, but we still can fully unroll it if that
819  // helps to remove a significant number of instructions.
820  // To check that, run additional analysis on the loop.
822  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
823  UP.Threshold * UP.MaxPercentThresholdBoost / 100,
825  unsigned Boost =
827  if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
828  return FullUnrollTripCount;
829  }
830  return None;
831 }
832 
833 static Optional<unsigned>
834 shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
835  const UnrollCostEstimator UCE,
837 
838  if (!TripCount)
839  return None;
840 
841  if (!UP.Partial) {
842  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
843  << "-unroll-allow-partial not given\n");
844  return 0;
845  }
846  unsigned count = UP.Count;
847  if (count == 0)
848  count = TripCount;
849  if (UP.PartialThreshold != NoThreshold) {
850  // Reduce unroll count to be modulo of TripCount for partial unrolling.
851  if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
852  count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
853  (LoopSize - UP.BEInsns);
854  if (count > UP.MaxCount)
855  count = UP.MaxCount;
856  while (count != 0 && TripCount % count != 0)
857  count--;
858  if (UP.AllowRemainder && count <= 1) {
859  // If there is no Count that is modulo of TripCount, set Count to
860  // largest power-of-two factor that satisfies the threshold limit.
861  // As we'll create fixup loop, do the type of unrolling only if
862  // remainder loop is allowed.
864  while (count != 0 &&
866  count >>= 1;
867  }
868  if (count < 2) {
869  count = 0;
870  }
871  } else {
872  count = TripCount;
873  }
874  if (count > UP.MaxCount)
875  count = UP.MaxCount;
876 
877  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
878 
879  return count;
880 }
881 // Returns true if unroll count was set explicitly.
882 // Calculates unroll count and writes it to UP.Count.
883 // Unless IgnoreUser is true, will also use metadata and command-line options
884 // that are specific to to the LoopUnroll pass (which, for instance, are
885 // irrelevant for the LoopUnrollAndJam pass).
886 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
887 // many LoopUnroll-specific options. The shared functionality should be
888 // refactored into it own function.
890  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
891  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
892  OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
893  bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize,
895  TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
896 
897  UnrollCostEstimator UCE(*L, LoopSize);
898 
899  const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
900  const bool PragmaFullUnroll = hasUnrollFullPragma(L);
901  const unsigned PragmaCount = unrollCountPragmaValue(L);
902  const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
903 
904  const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
905  PragmaEnableUnroll || UserUnrollCount;
906 
907  PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
908  PragmaEnableUnroll);
909  // Use an explicit peel count that has been specified for testing. In this
910  // case it's not permitted to also specify an explicit unroll count.
911  if (PP.PeelCount) {
912  if (UnrollCount.getNumOccurrences() > 0) {
913  report_fatal_error("Cannot specify both explicit peel count and "
914  "explicit unroll count", /*GenCrashDiag=*/false);
915  }
916  UP.Count = 1;
917  UP.Runtime = false;
918  return true;
919  }
920  // Check for explicit Count.
921  // 1st priority is unroll count set by "unroll-count" option.
922  // 2nd priority is unroll count set by pragma.
923  if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
924  UCE, UP)) {
925  UP.Count = *UnrollFactor;
926 
927  if (UserUnrollCount || (PragmaCount > 0)) {
928  UP.AllowExpensiveTripCount = true;
929  UP.Force = true;
930  }
931  UP.Runtime |= (PragmaCount > 0);
932  return ExplicitUnroll;
933  } else {
934  if (ExplicitUnroll && TripCount != 0) {
935  // If the loop has an unrolling pragma, we want to be more aggressive with
936  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
937  // value which is larger than the default limits.
938  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
939  UP.PartialThreshold =
940  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
941  }
942  }
943 
944  // 3rd priority is exact full unrolling. This will eliminate all copies
945  // of some exit test.
946  UP.Count = 0;
947  if (TripCount) {
948  UP.Count = TripCount;
949  if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
950  TripCount, UCE, UP)) {
951  UP.Count = *UnrollFactor;
952  UseUpperBound = false;
953  return ExplicitUnroll;
954  }
955  }
956 
957  // 4th priority is bounded unrolling.
958  // We can unroll by the upper bound amount if it's generally allowed or if
959  // we know that the loop is executed either the upper bound or zero times.
960  // (MaxOrZero unrolling keeps only the first loop test, so the number of
961  // loop tests remains the same compared to the non-unrolled version, whereas
962  // the generic upper bound unrolling keeps all but the last loop test so the
963  // number of loop tests goes up which may end up being worse on targets with
964  // constrained branch predictor resources so is controlled by an option.)
965  // In addition we only unroll small upper bounds.
966  // Note that the cost of bounded unrolling is always strictly greater than
967  // cost of exact full unrolling. As such, if we have an exact count and
968  // found it unprofitable, we'll never chose to bounded unroll.
969  if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
970  MaxTripCount <= UnrollMaxUpperBound) {
971  UP.Count = MaxTripCount;
972  if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
973  MaxTripCount, UCE, UP)) {
974  UP.Count = *UnrollFactor;
975  UseUpperBound = true;
976  return ExplicitUnroll;
977  }
978  }
979 
980  // 5th priority is loop peeling.
981  computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
982  if (PP.PeelCount) {
983  UP.Runtime = false;
984  UP.Count = 1;
985  return ExplicitUnroll;
986  }
987 
988  // Before starting partial unrolling, set up.partial to true,
989  // if user explicitly asked for unrolling
990  if (TripCount)
991  UP.Partial |= ExplicitUnroll;
992 
993  // 6th priority is partial unrolling.
994  // Try partial unroll only when TripCount could be statically calculated.
995  if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
996  UP.Count = *UnrollFactor;
997 
998  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
999  UP.Count != TripCount)
1000  ORE->emit([&]() {
1002  "FullUnrollAsDirectedTooLarge",
1003  L->getStartLoc(), L->getHeader())
1004  << "Unable to fully unroll loop as directed by unroll pragma "
1005  "because "
1006  "unrolled size is too large.";
1007  });
1008 
1009  if (UP.PartialThreshold != NoThreshold) {
1010  if (UP.Count == 0) {
1011  if (PragmaEnableUnroll)
1012  ORE->emit([&]() {
1014  "UnrollAsDirectedTooLarge",
1015  L->getStartLoc(), L->getHeader())
1016  << "Unable to unroll loop as directed by unroll(enable) "
1017  "pragma "
1018  "because unrolled size is too large.";
1019  });
1020  }
1021  }
1022  return ExplicitUnroll;
1023  }
1024  assert(TripCount == 0 &&
1025  "All cases when TripCount is constant should be covered here.");
1026  if (PragmaFullUnroll)
1027  ORE->emit([&]() {
1028  return OptimizationRemarkMissed(
1029  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
1030  L->getStartLoc(), L->getHeader())
1031  << "Unable to fully unroll loop as directed by unroll(full) "
1032  "pragma "
1033  "because loop has a runtime trip count.";
1034  });
1035 
1036  // 7th priority is runtime unrolling.
1037  // Don't unroll a runtime trip count loop when it is disabled.
1039  UP.Count = 0;
1040  return false;
1041  }
1042 
1043  // Don't unroll a small upper bound loop unless user or TTI asked to do so.
1044  if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
1045  UP.Count = 0;
1046  return false;
1047  }
1048 
1049  // Check if the runtime trip count is too small when profile is available.
1050  if (L->getHeader()->getParent()->hasProfileData()) {
1051  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1052  if (*ProfileTripCount < FlatLoopTripCountThreshold)
1053  return false;
1054  else
1055  UP.AllowExpensiveTripCount = true;
1056  }
1057  }
1058  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1059  if (!UP.Runtime) {
1060  LLVM_DEBUG(
1061  dbgs() << " will not try to unroll loop with runtime trip count "
1062  << "-unroll-runtime not given\n");
1063  UP.Count = 0;
1064  return false;
1065  }
1066  if (UP.Count == 0)
1068 
1069  // Reduce unroll count to be the largest power-of-two factor of
1070  // the original count which satisfies the threshold limit.
1071  while (UP.Count != 0 &&
1073  UP.Count >>= 1;
1074 
1075 #ifndef NDEBUG
1076  unsigned OrigCount = UP.Count;
1077 #endif
1078 
1079  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1080  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1081  UP.Count >>= 1;
1082  LLVM_DEBUG(
1083  dbgs() << "Remainder loop is restricted (that could architecture "
1084  "specific or because the loop contains a convergent "
1085  "instruction), so unroll count must divide the trip "
1086  "multiple, "
1087  << TripMultiple << ". Reducing unroll count from " << OrigCount
1088  << " to " << UP.Count << ".\n");
1089 
1090  using namespace ore;
1091 
1092  if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
1093  ORE->emit([&]() {
1095  "DifferentUnrollCountFromDirected",
1096  L->getStartLoc(), L->getHeader())
1097  << "Unable to unroll loop the number of times directed by "
1098  "unroll_count pragma because remainder loop is restricted "
1099  "(that could architecture specific or because the loop "
1100  "contains a convergent instruction) and so must have an "
1101  "unroll "
1102  "count that divides the loop trip multiple of "
1103  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1104  << NV("UnrollCount", UP.Count) << " time(s).";
1105  });
1106  }
1107 
1108  if (UP.Count > UP.MaxCount)
1109  UP.Count = UP.MaxCount;
1110 
1111  if (MaxTripCount && UP.Count > MaxTripCount)
1112  UP.Count = MaxTripCount;
1113 
1114  LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
1115  << "\n");
1116  if (UP.Count < 2)
1117  UP.Count = 0;
1118  return ExplicitUnroll;
1119 }
1120 
1122  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
1125  ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1126  bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
1127  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
1128  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
1129  Optional<bool> ProvidedAllowPeeling,
1130  Optional<bool> ProvidedAllowProfileBasedPeeling,
1131  Optional<unsigned> ProvidedFullUnrollMaxCount) {
1132  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1133  << L->getHeader()->getParent()->getName() << "] Loop %"
1134  << L->getHeader()->getName() << "\n");
1136  if (TM & TM_Disable)
1138 
1139  // If this loop isn't forced to be unrolled, avoid unrolling it when the
1140  // parent loop has an explicit unroll-and-jam pragma. This is to prevent
1141  // automatic unrolling from interfering with the user requested
1142  // transformation.
1143  Loop *ParentL = L->getParentLoop();
1144  if (ParentL != nullptr &&
1147  LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
1148  << " llvm.loop.unroll_and_jam.\n");
1150  }
1151 
1152  // If this loop isn't forced to be unrolled, avoid unrolling it when the
1153  // loop has an explicit unroll-and-jam pragma. This is to prevent automatic
1154  // unrolling from interfering with the user requested transformation.
1157  LLVM_DEBUG(
1158  dbgs()
1159  << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1161  }
1162 
1163  if (!L->isLoopSimplifyForm()) {
1164  LLVM_DEBUG(
1165  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1167  }
1168 
1169  // When automatic unrolling is disabled, do not unroll unless overridden for
1170  // this loop.
1171  if (OnlyWhenForced && !(TM & TM_Enable))
1173 
1174  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1175  unsigned NumInlineCandidates;
1176  bool NotDuplicatable;
1177  bool Convergent;
1179  L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1180  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1181  ProvidedFullUnrollMaxCount);
1183  L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1184 
1185  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1186  // as threshold later on.
1187  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1188  !OptForSize)
1190 
1192  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1193 
1194  InstructionCost LoopSizeIC =
1195  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1196  TTI, EphValues, UP.BEInsns);
1197  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSizeIC << "\n");
1198 
1199  if (!LoopSizeIC.isValid()) {
1200  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
1201  << " with invalid cost.\n");
1203  }
1204  unsigned LoopSize = *LoopSizeIC.getValue();
1205 
1206  if (NotDuplicatable) {
1207  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1208  << " instructions.\n");
1210  }
1211 
1212  // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1213  // later), to (fully) unroll loops, if it does not increase code size.
1214  if (OptForSize)
1215  UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1216 
1217  if (NumInlineCandidates != 0) {
1218  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1220  }
1221 
1222  // Find the smallest exact trip count for any exit. This is an upper bound
1223  // on the loop trip count, but an exit at an earlier iteration is still
1224  // possible. An unroll by the smallest exact trip count guarantees that all
1225  // brnaches relating to at least one exit can be eliminated. This is unlike
1226  // the max trip count, which only guarantees that the backedge can be broken.
1227  unsigned TripCount = 0;
1228  unsigned TripMultiple = 1;
1229  SmallVector<BasicBlock *, 8> ExitingBlocks;
1230  L->getExitingBlocks(ExitingBlocks);
1231  for (BasicBlock *ExitingBlock : ExitingBlocks)
1232  if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1233  if (!TripCount || TC < TripCount)
1234  TripCount = TripMultiple = TC;
1235 
1236  if (!TripCount) {
1237  // If no exact trip count is known, determine the trip multiple of either
1238  // the loop latch or the single exiting block.
1239  // TODO: Relax for multiple exits.
1240  BasicBlock *ExitingBlock = L->getLoopLatch();
1241  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1242  ExitingBlock = L->getExitingBlock();
1243  if (ExitingBlock)
1244  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1245  }
1246 
1247  // If the loop contains a convergent operation, the prelude we'd add
1248  // to do the first few instructions before we hit the unrolled loop
1249  // is unsafe -- it adds a control-flow dependency to the convergent
1250  // operation. Therefore restrict remainder loop (try unrolling without).
1251  //
1252  // TODO: This is quite conservative. In practice, convergent_op()
1253  // is likely to be called unconditionally in the loop. In this
1254  // case, the program would be ill-formed (on most architectures)
1255  // unless n were the same on all threads in a thread group.
1256  // Assuming n is the same on all threads, any kind of unrolling is
1257  // safe. But currently llvm's notion of convergence isn't powerful
1258  // enough to express this.
1259  if (Convergent)
1260  UP.AllowRemainder = false;
1261 
1262  // Try to find the trip count upper bound if we cannot find the exact trip
1263  // count.
1264  unsigned MaxTripCount = 0;
1265  bool MaxOrZero = false;
1266  if (!TripCount) {
1267  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1268  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1269  }
1270 
1271  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1272  // fully unroll the loop.
1273  bool UseUpperBound = false;
1274  bool IsCountSetExplicitly = computeUnrollCount(
1275  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
1276  TripMultiple, LoopSize, UP, PP, UseUpperBound);
1277  if (!UP.Count)
1279 
1280  if (PP.PeelCount) {
1281  assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1282  LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1283  << " with iteration count " << PP.PeelCount << "!\n");
1284  ORE.emit([&]() {
1285  return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1286  L->getHeader())
1287  << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1288  << " iterations";
1289  });
1290 
1291  if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA)) {
1292  simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
1293  // If the loop was peeled, we already "used up" the profile information
1294  // we had, so we don't want to unroll or peel again.
1295  if (PP.PeelProfiledIterations)
1298  }
1300  }
1301 
1302  // At this point, UP.Runtime indicates that run-time unrolling is allowed.
1303  // However, we only want to actually perform it if we don't know the trip
1304  // count and the unroll count doesn't divide the known trip multiple.
1305  // TODO: This decision should probably be pushed up into
1306  // computeUnrollCount().
1307  UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1308 
1309  // Save loop properties before it is transformed.
1310  MDNode *OrigLoopID = L->getLoopID();
1311 
1312  // Unroll the loop.
1313  Loop *RemainderLoop = nullptr;
1314  LoopUnrollResult UnrollResult = UnrollLoop(
1315  L,
1316  {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1317  UP.UnrollRemainder, ForgetAllSCEV},
1318  LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
1319  if (UnrollResult == LoopUnrollResult::Unmodified)
1321 
1322  if (RemainderLoop) {
1323  Optional<MDNode *> RemainderLoopID =
1326  if (RemainderLoopID)
1327  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1328  }
1329 
1330  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1331  Optional<MDNode *> NewLoopID =
1334  if (NewLoopID) {
1335  L->setLoopID(NewLoopID.getValue());
1336 
1337  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1338  // explicitly.
1339  return UnrollResult;
1340  }
1341  }
1342 
1343  // If loop has an unroll count pragma or unrolled by explicitly set count
1344  // mark loop as unrolled to prevent unrolling beyond that requested.
1345  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1347 
1348  return UnrollResult;
1349 }
1350 
1351 namespace {
1352 
1353 class LoopUnroll : public LoopPass {
1354 public:
1355  static char ID; // Pass ID, replacement for typeid
1356 
1357  int OptLevel;
1358 
1359  /// If false, use a cost model to determine whether unrolling of a loop is
1360  /// profitable. If true, only loops that explicitly request unrolling via
1361  /// metadata are considered. All other loops are skipped.
1362  bool OnlyWhenForced;
1363 
1364  /// If false, when SCEV is invalidated, only forget everything in the
1365  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1366  /// Otherwise, forgetAllLoops and rebuild when needed next.
1367  bool ForgetAllSCEV;
1368 
1369  Optional<unsigned> ProvidedCount;
1370  Optional<unsigned> ProvidedThreshold;
1371  Optional<bool> ProvidedAllowPartial;
1372  Optional<bool> ProvidedRuntime;
1373  Optional<bool> ProvidedUpperBound;
1374  Optional<bool> ProvidedAllowPeeling;
1375  Optional<bool> ProvidedAllowProfileBasedPeeling;
1376  Optional<unsigned> ProvidedFullUnrollMaxCount;
1377 
1378  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1379  bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1380  Optional<unsigned> Count = None,
1381  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1382  Optional<bool> UpperBound = None,
1383  Optional<bool> AllowPeeling = None,
1384  Optional<bool> AllowProfileBasedPeeling = None,
1385  Optional<unsigned> ProvidedFullUnrollMaxCount = None)
1386  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1387  ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1388  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1389  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1390  ProvidedAllowPeeling(AllowPeeling),
1391  ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1392  ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1394  }
1395 
1396  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1397  if (skipLoop(L))
1398  return false;
1399 
1400  Function &F = *L->getHeader()->getParent();
1401 
1402  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1403  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1404  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1405  const TargetTransformInfo &TTI =
1406  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1407  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1408  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1409  // pass. Function analyses need to be preserved across loop transformations
1410  // but ORE cannot be preserved (see comment before the pass definition).
1412  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1413 
1415  L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1416  OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
1417  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1418  ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
1419  ProvidedFullUnrollMaxCount);
1420 
1421  if (Result == LoopUnrollResult::FullyUnrolled)
1422  LPM.markLoopAsDeleted(*L);
1423 
1425  }
1426 
1427  /// This transformation requires natural loop information & requires that
1428  /// loop preheaders be inserted into the CFG...
1429  void getAnalysisUsage(AnalysisUsage &AU) const override {
1432  // FIXME: Loop passes are required to preserve domtree, and for now we just
1433  // recreate dom info if anything gets unrolled.
1435  }
1436 };
1437 
1438 } // end anonymous namespace
1439 
1440 char LoopUnroll::ID = 0;
1441 
1442 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1446 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1447 
1448 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1449  bool ForgetAllSCEV, int Threshold, int Count,
1450  int AllowPartial, int Runtime, int UpperBound,
1451  int AllowPeeling) {
1452  // TODO: It would make more sense for this function to take the optionals
1453  // directly, but that's dangerous since it would silently break out of tree
1454  // callers.
1455  return new LoopUnroll(
1456  OptLevel, OnlyWhenForced, ForgetAllSCEV,
1457  Threshold == -1 ? None : Optional<unsigned>(Threshold),
1458  Count == -1 ? None : Optional<unsigned>(Count),
1459  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1460  Runtime == -1 ? None : Optional<bool>(Runtime),
1461  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1462  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1463 }
1464 
1465 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1466  bool ForgetAllSCEV) {
1467  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1468  0, 0, 0, 1);
1469 }
1470 
1473  LPMUpdater &Updater) {
1474  // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1475  // pass. Function analyses need to be preserved across loop transformations
1476  // but ORE cannot be preserved (see comment before the pass definition).
1478 
1479  // Keep track of the previous loop structure so we can identify new loops
1480  // created by unrolling.
1481  Loop *ParentL = L.getParentLoop();
1482  SmallPtrSet<Loop *, 4> OldLoops;
1483  if (ParentL)
1484  OldLoops.insert(ParentL->begin(), ParentL->end());
1485  else
1486  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1487 
1488  std::string LoopName = std::string(L.getName());
1489 
1490  bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1491  /*BFI*/ nullptr, /*PSI*/ nullptr,
1492  /*PreserveLCSSA*/ true, OptLevel,
1493  OnlyWhenForced, ForgetSCEV, /*Count*/ None,
1494  /*Threshold*/ None, /*AllowPartial*/ false,
1495  /*Runtime*/ false, /*UpperBound*/ false,
1496  /*AllowPeeling*/ true,
1497  /*AllowProfileBasedPeeling*/ false,
1498  /*FullUnrollMaxCount*/ None) !=
1500  if (!Changed)
1501  return PreservedAnalyses::all();
1502 
1503  // The parent must not be damaged by unrolling!
1504 #ifndef NDEBUG
1505  if (ParentL)
1506  ParentL->verifyLoop();
1507 #endif
1508 
1509  // Unrolling can do several things to introduce new loops into a loop nest:
1510  // - Full unrolling clones child loops within the current loop but then
1511  // removes the current loop making all of the children appear to be new
1512  // sibling loops.
1513  //
1514  // When a new loop appears as a sibling loop after fully unrolling,
1515  // its nesting structure has fundamentally changed and we want to revisit
1516  // it to reflect that.
1517  //
1518  // When unrolling has removed the current loop, we need to tell the
1519  // infrastructure that it is gone.
1520  //
1521  // Finally, we support a debugging/testing mode where we revisit child loops
1522  // as well. These are not expected to require further optimizations as either
1523  // they or the loop they were cloned from have been directly visited already.
1524  // But the debugging mode allows us to check this assumption.
1525  bool IsCurrentLoopValid = false;
1526  SmallVector<Loop *, 4> SibLoops;
1527  if (ParentL)
1528  SibLoops.append(ParentL->begin(), ParentL->end());
1529  else
1530  SibLoops.append(AR.LI.begin(), AR.LI.end());
1531  erase_if(SibLoops, [&](Loop *SibLoop) {
1532  if (SibLoop == &L) {
1533  IsCurrentLoopValid = true;
1534  return true;
1535  }
1536 
1537  // Otherwise erase the loop from the list if it was in the old loops.
1538  return OldLoops.contains(SibLoop);
1539  });
1540  Updater.addSiblingLoops(SibLoops);
1541 
1542  if (!IsCurrentLoopValid) {
1543  Updater.markLoopAsDeleted(L, LoopName);
1544  } else {
1545  // We can only walk child loops if the current loop remained valid.
1547  // Walk *all* of the child loops.
1548  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1549  Updater.addChildLoops(ChildLoops);
1550  }
1551  }
1552 
1554 }
1555 
1558  auto &LI = AM.getResult<LoopAnalysis>(F);
1559  // There are no loops in the function. Return before computing other expensive
1560  // analyses.
1561  if (LI.empty())
1562  return PreservedAnalyses::all();
1563  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1564  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1565  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1566  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1568 
1569  LoopAnalysisManager *LAM = nullptr;
1570  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1571  LAM = &LAMProxy->getManager();
1572 
1573  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1574  ProfileSummaryInfo *PSI =
1575  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1576  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1577  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1578 
1579  bool Changed = false;
1580 
1581  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1582  // Since simplification may add new inner loops, it has to run before the
1583  // legality and profitability checks. This means running the loop unroller
1584  // will simplify all loops, regardless of whether anything end up being
1585  // unrolled.
1586  for (auto &L : LI) {
1587  Changed |=
1588  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1589  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1590  }
1591 
1592  // Add the loop nests in the reverse order of LoopInfo. See method
1593  // declaration.
1595  appendLoopsToWorklist(LI, Worklist);
1596 
1597  while (!Worklist.empty()) {
1598  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1599  // from back to front so that we work forward across the CFG, which
1600  // for unrolling is only needed to get optimization remarks emitted in
1601  // a forward order.
1602  Loop &L = *Worklist.pop_back_val();
1603 #ifndef NDEBUG
1604  Loop *ParentL = L.getParentLoop();
1605 #endif
1606 
1607  // Check if the profile summary indicates that the profiled application
1608  // has a huge working set size, in which case we disable peeling to avoid
1609  // bloating it further.
1610  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1611  if (PSI && PSI->hasHugeWorkingSetSize())
1612  LocalAllowPeeling = false;
1613  std::string LoopName = std::string(L.getName());
1614  // The API here is quite complex to call and we allow to select some
1615  // flavors of unrolling during construction time (by setting UnrollOpts).
1617  &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1618  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1619  UnrollOpts.ForgetSCEV, /*Count*/ None,
1620  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1621  UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1622  UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
1623  Changed |= Result != LoopUnrollResult::Unmodified;
1624 
1625  // The parent must not be damaged by unrolling!
1626 #ifndef NDEBUG
1627  if (Result != LoopUnrollResult::Unmodified && ParentL)
1628  ParentL->verifyLoop();
1629 #endif
1630 
1631  // Clear any cached analysis results for L if we removed it completely.
1632  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1633  LAM->clear(L, LoopName);
1634  }
1635 
1636  if (!Changed)
1637  return PreservedAnalyses::all();
1638 
1640 }
1641 
1643  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1644  static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1645  OS, MapClassName2PassName);
1646  OS << "<";
1647  if (UnrollOpts.AllowPartial != None)
1648  OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
1649  if (UnrollOpts.AllowPeeling != None)
1650  OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
1651  if (UnrollOpts.AllowRuntime != None)
1652  OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
1653  if (UnrollOpts.AllowUpperBound != None)
1654  OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
1655  if (UnrollOpts.AllowProfileBasedPeeling != None)
1656  OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
1657  << "profile-peeling;";
1658  if (UnrollOpts.FullUnrollMaxCount != None)
1659  OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
1660  OS << "O" << UnrollOpts.OptLevel;
1661  OS << ">";
1662 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
shouldFullUnroll
static Optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollPass.cpp:803
FlatLoopTripCountThreshold
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1062
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:481
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:460
llvm::hasUnrollTransformation
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:355
unrollCountPragmaValue
static unsigned unrollCountPragmaValue(const Loop *L)
Definition: LoopUnrollPass.cpp:719
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2479
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:488
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2115
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:456
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
LoopSimplify.h
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:53
analyzeLoopUnrollCost
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
Definition: LoopUnrollPass.cpp:346
llvm::UnrolledInstAnalyzer
Definition: LoopUnrollAnalyzer.h:42
Optional.h
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
Metadata.h
UnrollFullMaxCount
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
UnrollPartialThreshold
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:472
getUnrollMetadataForLoop
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition: LoopUnrollPass.cpp:695
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
llvm::PassInfoMixin< LoopUnrollPass >
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:7831
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
StringRef.h
llvm::LoopUnrollPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: LoopUnrollPass.cpp:1642
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:138
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:31
UnrollRevisitChildLoops
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::find
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:179
UnrollCostEstimator::getUnrolledLoopSize
uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, const unsigned CountOverwrite=0) const
Definition: LoopUnrollPass.cpp:762
SizeOpts.h
llvm::LLVMLoopUnrollFollowupRemainder
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:45
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:630
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1807
shouldPartialUnroll
static Optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollPass.cpp:834
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
hasUnrollFullPragma
static bool hasUnrollFullPragma(const Loop *L)
Definition: LoopUnrollPass.cpp:702
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:266
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:507
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:500
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:144
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:465
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::initializeLoopUnrollPass
void initializeLoopUnrollPass(PassRegistry &)
ScalarEvolution.h
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:484
llvm::AsanDetectStackUseAfterReturnMode::Runtime
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
llvm::LoopUnrollOptions::ForgetSCEV
const bool ForgetSCEV
If true, forget all loops when unrolling.
Definition: LoopUnrollPass.h:78
DenseMap.h
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:538
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:476
llvm::Optional< unsigned >
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:404
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:170
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:184
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:889
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
UnrollMaxPercentThresholdBoost
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
UnrollThresholdDefault
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
loops
loops
Definition: LoopInfo.cpp:1176
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:493
llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition: GCNRegPressure.cpp:55
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1284
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:184
LoopAnalysisManager.h
UnrollCount
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:328
UnrollThreshold
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LoopUnrollOptions::OnlyWhenForced
bool OnlyWhenForced
If false, use a cost model to determine whether unrolling of a loop is profitable.
Definition: LoopUnrollPass.h:73
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::end
iterator end()
Definition: DenseSet.h:174
Instruction.h
CommandLine.h
CodeMetrics.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::TM_Enable
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:274
llvm::ApproximateLoopSize
InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:666
llvm::DenseMapInfo< Instruction * >
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:551
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:536
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:848
Constants.h
UnrollMaxUpperBound
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:496
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::AnalysisManager::clear
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManagerImpl.h:36
llvm::LLVMLoopUnrollFollowupUnrolled
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:43
UnrollAllowRemainder
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: PriorityWorklist.h:153
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:171
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
DenseSet.h
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:510
false
Definition: StackSlotColoring.cpp:141
llvm::simplifyLoopAfterUnroll
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:215
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:502
llvm::LoopUnrollOptions::FullUnrollMaxCount
Optional< unsigned > FullUnrollMaxCount
Definition: LoopUnrollPass.h:67
llvm::Instruction
Definition: Instruction.h:42
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:357
llvm::LoopBase::verifyLoop
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:285
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
LoopUtils.h
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
llvm::LoopUnrollOptions::AllowPeeling
Optional< bool > AllowPeeling
Definition: LoopUnrollPass.h:63
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
Utils.h
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:885
llvm::None
const NoneType None
Definition: None.h:24
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::createSimpleLoopUnrollPass
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false)
Definition: LoopUnrollPass.cpp:1465
CFG.h
LoopInfo.h
llvm::LoopUnrollOptions::AllowUpperBound
Optional< bool > AllowUpperBound
Definition: LoopUnrollPass.h:65
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::LLVMLoopUnrollFollowupAll
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:42
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
UnrollMaxCount
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7675
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1278
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:475
BasicBlock.h
llvm::cl::opt< bool >
UnrollRuntime
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
hasUnrollEnablePragma
static bool hasUnrollEnablePragma(const Loop *L)
Definition: LoopUnrollPass.cpp:708
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:269
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:112
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1709
uint64_t
ProfileSummaryInfo.h
tryToUnrollLoop
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling, Optional< bool > ProvidedAllowProfileBasedPeeling, Optional< unsigned > ProvidedFullUnrollMaxCount)
Definition: LoopUnrollPass.cpp:1121
llvm::LoopPass
Definition: LoopPass.h:28
llvm::hasUnrollAndJamTransformation
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:376
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:255
llvm::DenseMap
Definition: DenseMap.h:716
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7691
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:431
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:54
llvm::LoopUnrollOptions::OptLevel
int OptLevel
Definition: LoopUnrollPass.h:68
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:67
UnrollThresholdAggressive
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
llvm::TM_Disable
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:277
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
UnrollCostEstimator::UnrollCostEstimator
UnrollCostEstimator(Loop &L, unsigned LoopSize)
Definition: LoopUnrollPass.cpp:757
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1675
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:306
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
NoThreshold
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
Definition: LoopUnrollPass.cpp:180
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:739
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:284
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:961
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:282
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
LoopPassManager.h
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
UnrollAllowPartial
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
PragmaUnrollThreshold
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:542
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1102
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::TM_ForcedByUser
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:285
LoopPass.h
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1467
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:222
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:69
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:467
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
UnrollOptSizeThreshold
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::LoopUnrollOptions::AllowProfileBasedPeeling
Optional< bool > AllowProfileBasedPeeling
Definition: LoopUnrollPass.h:66
UnrollUnrollRemainder
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
LoopUnrollAnalyzer.h
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:664
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
UnrollCostEstimator
Definition: LoopUnrollPass.cpp:753
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:486
llvm::ForgetSCEVInLoopUnroll
cl::opt< bool > ForgetSCEVInLoopUnroll
Constant.h
llvm::LoopUnrollPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopUnrollPass.cpp:1556
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:851
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:524
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:458
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:110
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:498
LoopUnrollPass.h
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:500
llvm::ScalarEvolution::isBackedgeTakenCountMaxOrZero
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
Definition: ScalarEvolution.cpp:7913
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:799
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:490
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
shouldPragmaUnroll
static Optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollPass.cpp:776
PassManager.h
UnrollMaxIterationsCountToAnalyze
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:713
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:290
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
isEqual
static bool isEqual(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:1869
llvm::LPMUpdater::addChildLoops
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
Definition: LoopPassManager.h:303
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopUnrollOptions::AllowPartial
Optional< bool > AllowPartial
Definition: LoopUnrollPass.h:62
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:962
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
hasRuntimeUnrollDisablePragma
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
Definition: LoopUnrollPass.cpp:713
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopUnrollPass.cpp:73
Dominators.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:439
UnrollLoop.h
TargetTransformInfo.h
llvm::LoopInfoBase::empty
bool empty() const
Definition: LoopInfo.h:965
DenseMapInfo.h
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:661
llvm::SmallPtrSetImpl< const Value * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:937
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:453
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:241
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3230
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3086
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
llvm::LPMUpdater::addSiblingLoops
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
Definition: LoopPassManager.h:330
LoopPeel.h
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:813
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:232
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
getFullUnrollBoostingFactor
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Definition: LoopUnrollPass.cpp:737
llvm::TransformationMode
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:268
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:696
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1262
SetVector.h
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:450
llvm::createLoopUnrollPass
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Definition: LoopUnrollPass.cpp:1448
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::LoopUnrollOptions::AllowRuntime
Optional< bool > AllowRuntime
Definition: LoopUnrollPass.h:64
llvm::LoopFullUnrollPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopUnrollPass.cpp:1471