LLVM  14.0.0git
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constant.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/InitializePasses.h"
50 #include "llvm/Pass.h"
51 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
58 #include "llvm/Transforms/Utils.h"
64 #include <algorithm>
65 #include <cassert>
66 #include <cstdint>
67 #include <limits>
68 #include <string>
69 #include <tuple>
70 #include <utility>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "loop-unroll"
75 
77  "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
78  cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
79  " the current top-most loop. This is sometimes preferred to reduce"
80  " compile time."));
81 
82 static cl::opt<unsigned>
83  UnrollThreshold("unroll-threshold", cl::Hidden,
84  cl::desc("The cost threshold for loop unrolling"));
85 
86 static cl::opt<unsigned>
88  "unroll-optsize-threshold", cl::init(0), cl::Hidden,
89  cl::desc("The cost threshold for loop unrolling when optimizing for "
90  "size"));
91 
93  "unroll-partial-threshold", cl::Hidden,
94  cl::desc("The cost threshold for partial loop unrolling"));
95 
97  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
98  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
99  "to the threshold when aggressively unrolling a loop due to the "
100  "dynamic cost savings. If completely unrolling a loop will reduce "
101  "the total runtime from X to Y, we boost the loop unroll "
102  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103  "X/Y). This limit avoids excessive code bloat."));
104 
106  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
107  cl::desc("Don't allow loop unrolling to simulate more than this number of"
108  "iterations when checking full unroll profitability"));
109 
111  "unroll-count", cl::Hidden,
112  cl::desc("Use this unroll count for all loops including those with "
113  "unroll_count pragma values, for testing purposes"));
114 
116  "unroll-max-count", cl::Hidden,
117  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
118  "testing purposes"));
119 
121  "unroll-full-max-count", cl::Hidden,
122  cl::desc(
123  "Set the max unroll count for full unrolling, for testing purposes"));
124 
125 static cl::opt<bool>
126  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
127  cl::desc("Allows loops to be partially unrolled until "
128  "-unroll-threshold loop size is reached."));
129 
131  "unroll-allow-remainder", cl::Hidden,
132  cl::desc("Allow generation of a loop remainder (extra iterations) "
133  "when unrolling a loop."));
134 
135 static cl::opt<bool>
136  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
137  cl::desc("Unroll loops with run-time trip counts"));
138 
140  "unroll-max-upperbound", cl::init(8), cl::Hidden,
141  cl::desc(
142  "The max of trip count upper bound that is considered in unrolling"));
143 
145  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
146  cl::desc("Unrolled size limit for loops with an unroll(full) or "
147  "unroll_count pragma."));
148 
150  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
151  cl::desc("If the runtime tripcount for the loop is lower than the "
152  "threshold, the loop is considered as flat and will be less "
153  "aggressively unrolled."));
154 
156  "unroll-remainder", cl::Hidden,
157  cl::desc("Allow the loop remainder to be unrolled."));
158 
159 // This option isn't ever intended to be enabled, it serves to allow
160 // experiments to check the assumptions about when this kind of revisit is
161 // necessary.
163  "unroll-revisit-child-loops", cl::Hidden,
164  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
165  "This shouldn't typically be needed as child loops (or their "
166  "clones) were already visited."));
167 
169  "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
170  cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
171  "optimizations"));
172 static cl::opt<unsigned>
173  UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
174  cl::Hidden,
175  cl::desc("Default threshold (max size of unrolled "
176  "loop), used in all but O3 optimizations"));
177 
178 /// A magic value for use with the Threshold parameter to indicate
179 /// that the loop unroll should be performed regardless of how much
180 /// code expansion would result.
182 
183 /// Gather the various unrolling parameters based on the defaults, compiler
184 /// flags, TTI overrides and user specified parameters.
188  OptimizationRemarkEmitter &ORE, int OptLevel,
189  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
190  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
191  Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
193 
194  // Set up the defaults
195  UP.Threshold =
197  UP.MaxPercentThresholdBoost = 400;
199  UP.PartialThreshold = 150;
201  UP.Count = 0;
205  UP.BEInsns = 2;
206  UP.Partial = false;
207  UP.Runtime = false;
208  UP.AllowRemainder = true;
209  UP.UnrollRemainder = false;
210  UP.AllowExpensiveTripCount = false;
211  UP.Force = false;
212  UP.UpperBound = false;
213  UP.UnrollAndJam = false;
216 
217  // Override with any target specific settings
218  TTI.getUnrollingPreferences(L, SE, UP, &ORE);
219 
220  // Apply size attributes
221  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
222  // Let unroll hints / pragmas take precedence over PGSO.
226  if (OptForSize) {
227  UP.Threshold = UP.OptSizeThreshold;
229  UP.MaxPercentThresholdBoost = 100;
230  }
231 
232  // Apply any user values specified by cl::opt
233  if (UnrollThreshold.getNumOccurrences() > 0)
235  if (UnrollPartialThreshold.getNumOccurrences() > 0)
237  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
239  if (UnrollMaxCount.getNumOccurrences() > 0)
241  if (UnrollFullMaxCount.getNumOccurrences() > 0)
248  UP.Runtime = UnrollRuntime;
249  if (UnrollMaxUpperBound == 0)
250  UP.UpperBound = false;
253  if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
255 
256  // Apply user values provided by argument
257  if (UserThreshold.hasValue()) {
258  UP.Threshold = *UserThreshold;
259  UP.PartialThreshold = *UserThreshold;
260  }
261  if (UserCount.hasValue())
262  UP.Count = *UserCount;
263  if (UserAllowPartial.hasValue())
264  UP.Partial = *UserAllowPartial;
265  if (UserRuntime.hasValue())
266  UP.Runtime = *UserRuntime;
267  if (UserUpperBound.hasValue())
268  UP.UpperBound = *UserUpperBound;
269  if (UserFullUnrollMaxCount.hasValue())
270  UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
271 
272  return UP;
273 }
274 
275 namespace {
276 
277 /// A struct to densely store the state of an instruction after unrolling at
278 /// each iteration.
279 ///
280 /// This is designed to work like a tuple of <Instruction *, int> for the
281 /// purposes of hashing and lookup, but to be able to associate two boolean
282 /// states with each key.
283 struct UnrolledInstState {
284  Instruction *I;
285  int Iteration : 30;
286  unsigned IsFree : 1;
287  unsigned IsCounted : 1;
288 };
289 
290 /// Hashing and equality testing for a set of the instruction states.
291 struct UnrolledInstStateKeyInfo {
292  using PtrInfo = DenseMapInfo<Instruction *>;
294 
295  static inline UnrolledInstState getEmptyKey() {
296  return {PtrInfo::getEmptyKey(), 0, 0, 0};
297  }
298 
299  static inline UnrolledInstState getTombstoneKey() {
300  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
301  }
302 
303  static inline unsigned getHashValue(const UnrolledInstState &S) {
304  return PairInfo::getHashValue({S.I, S.Iteration});
305  }
306 
307  static inline bool isEqual(const UnrolledInstState &LHS,
308  const UnrolledInstState &RHS) {
309  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
310  }
311 };
312 
313 struct EstimatedUnrollCost {
314  /// The estimated cost after unrolling.
315  unsigned UnrolledCost;
316 
317  /// The estimated dynamic cost of executing the instructions in the
318  /// rolled form.
319  unsigned RolledDynamicCost;
320 };
321 
322 struct PragmaInfo {
323  PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
324  : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
325  PragmaEnableUnroll(PEU) {}
326  const bool UserUnrollCount;
327  const bool PragmaFullUnroll;
328  const unsigned PragmaCount;
329  const bool PragmaEnableUnroll;
330 };
331 
332 } // end anonymous namespace
333 
334 /// Figure out if the loop is worth full unrolling.
335 ///
336 /// Complete loop unrolling can make some loads constant, and we need to know
337 /// if that would expose any further optimization opportunities. This routine
338 /// estimates this optimization. It computes cost of unrolled loop
339 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
340 /// dynamic cost we mean that we won't count costs of blocks that are known not
341 /// to be executed (i.e. if we have a branch in the loop and we know that at the
342 /// given iteration its condition would be resolved to true, we won't add up the
343 /// cost of the 'false'-block).
344 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
345 /// the analysis failed (no benefits expected from the unrolling, or the loop is
346 /// too big to analyze), the returned value is None.
348  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
349  const SmallPtrSetImpl<const Value *> &EphValues,
350  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
351  unsigned MaxIterationsCountToAnalyze) {
352  // We want to be able to scale offsets by the trip count and add more offsets
353  // to them without checking for overflows, and we already don't want to
354  // analyze *massive* trip counts, so we force the max to be reasonably small.
355  assert(MaxIterationsCountToAnalyze <
356  (unsigned)(std::numeric_limits<int>::max() / 2) &&
357  "The unroll iterations max is too large!");
358 
359  // Only analyze inner loops. We can't properly estimate cost of nested loops
360  // and we won't visit inner loops again anyway.
361  if (!L->isInnermost())
362  return None;
363 
364  // Don't simulate loops with a big or unknown tripcount
365  if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
366  return None;
367 
370  DenseMap<Value *, Value *> SimplifiedValues;
371  SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
372 
373  // The estimated cost of the unrolled form of the loop. We try to estimate
374  // this by simplifying as much as we can while computing the estimate.
375  InstructionCost UnrolledCost = 0;
376 
377  // We also track the estimated dynamic (that is, actually executed) cost in
378  // the rolled form. This helps identify cases when the savings from unrolling
379  // aren't just exposing dead control flows, but actual reduced dynamic
380  // instructions due to the simplifications which we expect to occur after
381  // unrolling.
382  InstructionCost RolledDynamicCost = 0;
383 
384  // We track the simplification of each instruction in each iteration. We use
385  // this to recursively merge costs into the unrolled cost on-demand so that
386  // we don't count the cost of any dead code. This is essentially a map from
387  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
389 
390  // A small worklist used to accumulate cost of instructions from each
391  // observable and reached root in the loop.
392  SmallVector<Instruction *, 16> CostWorklist;
393 
394  // PHI-used worklist used between iterations while accumulating cost.
395  SmallVector<Instruction *, 4> PHIUsedList;
396 
397  // Helper function to accumulate cost for instructions in the loop.
398  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
399  assert(Iteration >= 0 && "Cannot have a negative iteration!");
400  assert(CostWorklist.empty() && "Must start with an empty cost list");
401  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
402  CostWorklist.push_back(&RootI);
404  RootI.getFunction()->hasMinSize() ?
407  for (;; --Iteration) {
408  do {
409  Instruction *I = CostWorklist.pop_back_val();
410 
411  // InstCostMap only uses I and Iteration as a key, the other two values
412  // don't matter here.
413  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
414  if (CostIter == InstCostMap.end())
415  // If an input to a PHI node comes from a dead path through the loop
416  // we may have no cost data for it here. What that actually means is
417  // that it is free.
418  continue;
419  auto &Cost = *CostIter;
420  if (Cost.IsCounted)
421  // Already counted this instruction.
422  continue;
423 
424  // Mark that we are counting the cost of this instruction now.
425  Cost.IsCounted = true;
426 
427  // If this is a PHI node in the loop header, just add it to the PHI set.
428  if (auto *PhiI = dyn_cast<PHINode>(I))
429  if (PhiI->getParent() == L->getHeader()) {
430  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
431  "inherently simplify during unrolling.");
432  if (Iteration == 0)
433  continue;
434 
435  // Push the incoming value from the backedge into the PHI used list
436  // if it is an in-loop instruction. We'll use this to populate the
437  // cost worklist for the next iteration (as we count backwards).
438  if (auto *OpI = dyn_cast<Instruction>(
439  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
440  if (L->contains(OpI))
441  PHIUsedList.push_back(OpI);
442  continue;
443  }
444 
445  // First accumulate the cost of this instruction.
446  if (!Cost.IsFree) {
447  UnrolledCost += TTI.getUserCost(I, CostKind);
448  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
449  << Iteration << "): ");
450  LLVM_DEBUG(I->dump());
451  }
452 
453  // We must count the cost of every operand which is not free,
454  // recursively. If we reach a loop PHI node, simply add it to the set
455  // to be considered on the next iteration (backwards!).
456  for (Value *Op : I->operands()) {
457  // Check whether this operand is free due to being a constant or
458  // outside the loop.
459  auto *OpI = dyn_cast<Instruction>(Op);
460  if (!OpI || !L->contains(OpI))
461  continue;
462 
463  // Otherwise accumulate its cost.
464  CostWorklist.push_back(OpI);
465  }
466  } while (!CostWorklist.empty());
467 
468  if (PHIUsedList.empty())
469  // We've exhausted the search.
470  break;
471 
472  assert(Iteration > 0 &&
473  "Cannot track PHI-used values past the first iteration!");
474  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
475  PHIUsedList.clear();
476  }
477  };
478 
479  // Ensure that we don't violate the loop structure invariants relied on by
480  // this analysis.
481  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
482  assert(L->isLCSSAForm(DT) &&
483  "Must have loops in LCSSA form to track live-out values.");
484 
485  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
486 
488  L->getHeader()->getParent()->hasMinSize() ?
490  // Simulate execution of each iteration of the loop counting instructions,
491  // which would be simplified.
492  // Since the same load will take different values on different iterations,
493  // we literally have to go through all loop's iterations.
494  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
495  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
496 
497  // Prepare for the iteration by collecting any simplified entry or backedge
498  // inputs.
499  for (Instruction &I : *L->getHeader()) {
500  auto *PHI = dyn_cast<PHINode>(&I);
501  if (!PHI)
502  break;
503 
504  // The loop header PHI nodes must have exactly two input: one from the
505  // loop preheader and one from the loop latch.
506  assert(
507  PHI->getNumIncomingValues() == 2 &&
508  "Must have an incoming value only for the preheader and the latch.");
509 
510  Value *V = PHI->getIncomingValueForBlock(
511  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
512  if (Iteration != 0 && SimplifiedValues.count(V))
513  V = SimplifiedValues.lookup(V);
514  SimplifiedInputValues.push_back({PHI, V});
515  }
516 
517  // Now clear and re-populate the map for the next iteration.
518  SimplifiedValues.clear();
519  while (!SimplifiedInputValues.empty())
520  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
521 
522  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
523 
524  BBWorklist.clear();
525  BBWorklist.insert(L->getHeader());
526  // Note that we *must not* cache the size, this loop grows the worklist.
527  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
528  BasicBlock *BB = BBWorklist[Idx];
529 
530  // Visit all instructions in the given basic block and try to simplify
531  // it. We don't change the actual IR, just count optimization
532  // opportunities.
533  for (Instruction &I : *BB) {
534  // These won't get into the final code - don't even try calculating the
535  // cost for them.
536  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
537  continue;
538 
539  // Track this instruction's expected baseline cost when executing the
540  // rolled loop form.
541  RolledDynamicCost += TTI.getUserCost(&I, CostKind);
542 
543  // Visit the instruction to analyze its loop cost after unrolling,
544  // and if the visitor returns true, mark the instruction as free after
545  // unrolling and continue.
546  bool IsFree = Analyzer.visit(I);
547  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
548  (unsigned)IsFree,
549  /*IsCounted*/ false}).second;
550  (void)Inserted;
551  assert(Inserted && "Cannot have a state for an unvisited instruction!");
552 
553  if (IsFree)
554  continue;
555 
556  // Can't properly model a cost of a call.
557  // FIXME: With a proper cost model we should be able to do it.
558  if (auto *CI = dyn_cast<CallInst>(&I)) {
559  const Function *Callee = CI->getCalledFunction();
560  if (!Callee || TTI.isLoweredToCall(Callee)) {
561  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
562  return None;
563  }
564  }
565 
566  // If the instruction might have a side-effect recursively account for
567  // the cost of it and all the instructions leading up to it.
568  if (I.mayHaveSideEffects())
569  AddCostRecursively(I, Iteration);
570 
571  // If unrolled body turns out to be too big, bail out.
572  if (UnrolledCost > MaxUnrolledLoopSize) {
573  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
574  << " UnrolledCost: " << UnrolledCost
575  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
576  << "\n");
577  return None;
578  }
579  }
580 
581  Instruction *TI = BB->getTerminator();
582 
583  auto getSimplifiedConstant = [&](Value *V) -> Constant * {
584  if (SimplifiedValues.count(V))
585  V = SimplifiedValues.lookup(V);
586  return dyn_cast<Constant>(V);
587  };
588 
589  // Add in the live successors by first checking whether we have terminator
590  // that may be simplified based on the values simplified by this call.
591  BasicBlock *KnownSucc = nullptr;
592  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
593  if (BI->isConditional()) {
594  if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
595  // Just take the first successor if condition is undef
596  if (isa<UndefValue>(SimpleCond))
597  KnownSucc = BI->getSuccessor(0);
598  else if (ConstantInt *SimpleCondVal =
599  dyn_cast<ConstantInt>(SimpleCond))
600  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
601  }
602  }
603  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
604  if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
605  // Just take the first successor if condition is undef
606  if (isa<UndefValue>(SimpleCond))
607  KnownSucc = SI->getSuccessor(0);
608  else if (ConstantInt *SimpleCondVal =
609  dyn_cast<ConstantInt>(SimpleCond))
610  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
611  }
612  }
613  if (KnownSucc) {
614  if (L->contains(KnownSucc))
615  BBWorklist.insert(KnownSucc);
616  else
617  ExitWorklist.insert({BB, KnownSucc});
618  continue;
619  }
620 
621  // Add BB's successors to the worklist.
622  for (BasicBlock *Succ : successors(BB))
623  if (L->contains(Succ))
624  BBWorklist.insert(Succ);
625  else
626  ExitWorklist.insert({BB, Succ});
627  AddCostRecursively(*TI, Iteration);
628  }
629 
630  // If we found no optimization opportunities on the first iteration, we
631  // won't find them on later ones too.
632  if (UnrolledCost == RolledDynamicCost) {
633  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
634  << " UnrolledCost: " << UnrolledCost << "\n");
635  return None;
636  }
637  }
638 
639  while (!ExitWorklist.empty()) {
640  BasicBlock *ExitingBB, *ExitBB;
641  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
642 
643  for (Instruction &I : *ExitBB) {
644  auto *PN = dyn_cast<PHINode>(&I);
645  if (!PN)
646  break;
647 
648  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
649  if (auto *OpI = dyn_cast<Instruction>(Op))
650  if (L->contains(OpI))
651  AddCostRecursively(*OpI, TripCount - 1);
652  }
653  }
654 
655  assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
656  "All instructions must have a valid cost, whether the "
657  "loop is rolled or unrolled.");
658 
659  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
660  << "UnrolledCost: " << UnrolledCost << ", "
661  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
662  return {{unsigned(*UnrolledCost.getValue()),
663  unsigned(*RolledDynamicCost.getValue())}};
664 }
665 
666 /// ApproximateLoopSize - Approximate the size of the loop.
668  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
669  const TargetTransformInfo &TTI,
670  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
672  for (BasicBlock *BB : L->blocks())
673  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
674  NumCalls = Metrics.NumInlineCandidates;
675  NotDuplicatable = Metrics.notDuplicatable;
676  Convergent = Metrics.convergent;
677 
678  unsigned LoopSize = Metrics.NumInsts;
679 
680  // Don't allow an estimate of size zero. This would allows unrolling of loops
681  // with huge iteration counts, which is a compile time problem even if it's
682  // not a problem for code quality. Also, the code using this size may assume
683  // that each loop has at least three instructions (likely a conditional
684  // branch, a comparison feeding that branch, and some kind of loop increment
685  // feeding that comparison instruction).
686  LoopSize = std::max(LoopSize, BEInsns + 1);
687 
688  return LoopSize;
689 }
690 
691 // Returns the loop hint metadata node with the given name (for example,
692 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
693 // returned.
695  if (MDNode *LoopID = L->getLoopID())
696  return GetUnrollMetadata(LoopID, Name);
697  return nullptr;
698 }
699 
700 // Returns true if the loop has an unroll(full) pragma.
701 static bool hasUnrollFullPragma(const Loop *L) {
702  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
703 }
704 
705 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
706 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
707 static bool hasUnrollEnablePragma(const Loop *L) {
708  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
709 }
710 
711 // Returns true if the loop has an runtime unroll(disable) pragma.
712 static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
713  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
714 }
715 
716 // If loop has an unroll_count pragma return the (necessarily
717 // positive) value from the pragma. Otherwise return 0.
718 static unsigned unrollCountPragmaValue(const Loop *L) {
719  MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
720  if (MD) {
721  assert(MD->getNumOperands() == 2 &&
722  "Unroll count hint metadata should have two operands.");
723  unsigned Count =
724  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
725  assert(Count >= 1 && "Unroll count must be positive.");
726  return Count;
727  }
728  return 0;
729 }
730 
731 // Computes the boosting factor for complete unrolling.
732 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
733 // be beneficial to fully unroll the loop even if unrolledcost is large. We
734 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
735 // the unroll threshold.
736 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
737  unsigned MaxPercentThresholdBoost) {
738  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
739  return 100;
740  else if (Cost.UnrolledCost != 0)
741  // The boosting factor is RolledDynamicCost / UnrolledCost
742  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
743  MaxPercentThresholdBoost);
744  else
745  return MaxPercentThresholdBoost;
746 }
747 
748 // Produce an estimate of the unrolled cost of the specified loop. This
749 // is used to a) produce a cost estimate for partial unrolling and b) to
750 // cheaply estimate cost for full unrolling when we don't want to symbolically
751 // evaluate all iterations.
753  const unsigned LoopSize;
754 
755 public:
756  UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
757 
758  // Returns loop size estimation for unrolled loop, given the unrolling
759  // configuration specified by UP.
760  uint64_t
762  const unsigned CountOverwrite = 0) const {
763  assert(LoopSize >= UP.BEInsns &&
764  "LoopSize should not be less than BEInsns!");
765  if (CountOverwrite)
766  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
767  UP.BEInsns;
768  else
769  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
770  UP.BEInsns;
771  }
772 };
773 
774 static Optional<unsigned>
775 shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
776  const unsigned TripMultiple, const unsigned TripCount,
777  const UnrollCostEstimator UCE,
779 
780  // Using unroll pragma
781  // 1st priority is unroll count set by "unroll-count" option.
782 
783  if (PInfo.UserUnrollCount) {
784  if (UP.AllowRemainder &&
785  UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
786  return (unsigned)UnrollCount;
787  }
788 
789  // 2nd priority is unroll count set by pragma.
790  if (PInfo.PragmaCount > 0) {
791  if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
792  UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
793  return PInfo.PragmaCount;
794  }
795 
796  if (PInfo.PragmaFullUnroll && TripCount != 0) {
797  if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
798  return TripCount;
799  }
800  // if didn't return until here, should continue to other priorties
801  return None;
802 }
803 
805  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
806  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
807  const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
809 
810  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
811  // When computing the unrolled size, note that BEInsns are not replicated
812  // like the rest of the loop body.
813  if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
814  return FullUnrollTripCount;
815 
816  } else {
817  // The loop isn't that small, but we still can fully unroll it if that
818  // helps to remove a significant number of instructions.
819  // To check that, run additional analysis on the loop.
821  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
822  UP.Threshold * UP.MaxPercentThresholdBoost / 100,
824  unsigned Boost =
826  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
827  return FullUnrollTripCount;
828  }
829  }
830  }
831  }
832  return None;
833 }
834 
835 static Optional<unsigned>
836 shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
837  const UnrollCostEstimator UCE,
839 
840  unsigned count = UP.Count;
841  if (TripCount) {
842  if (!UP.Partial) {
843  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
844  << "-unroll-allow-partial not given\n");
845  count = 0;
846  return count;
847  }
848  if (count == 0)
849  count = TripCount;
850  if (UP.PartialThreshold != NoThreshold) {
851  // Reduce unroll count to be modulo of TripCount for partial unrolling.
852  if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
853  count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
854  (LoopSize - UP.BEInsns);
855  if (count > UP.MaxCount)
856  count = UP.MaxCount;
857  while (count != 0 && TripCount % count != 0)
858  count--;
859  if (UP.AllowRemainder && count <= 1) {
860  // If there is no Count that is modulo of TripCount, set Count to
861  // largest power-of-two factor that satisfies the threshold limit.
862  // As we'll create fixup loop, do the type of unrolling only if
863  // remainder loop is allowed.
865  while (count != 0 &&
867  count >>= 1;
868  }
869  if (count < 2) {
870  count = 0;
871  }
872  } else {
873  count = TripCount;
874  }
875  if (count > UP.MaxCount)
876  count = UP.MaxCount;
877 
878  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
879 
880  return count;
881  }
882 
883  // if didn't return until here, should continue to other priorties
884  return None;
885 }
886 // Returns true if unroll count was set explicitly.
887 // Calculates unroll count and writes it to UP.Count.
888 // Unless IgnoreUser is true, will also use metadata and command-line options
889 // that are specific to to the LoopUnroll pass (which, for instance, are
890 // irrelevant for the LoopUnrollAndJam pass).
891 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
892 // many LoopUnroll-specific options. The shared functionality should be
893 // refactored into it own function.
895  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
896  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
897  OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
898  bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize,
900  TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
901 
902  UnrollCostEstimator UCE(*L, LoopSize);
903  Optional<unsigned> UnrollFactor;
904 
905  const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
906  const bool PragmaFullUnroll = hasUnrollFullPragma(L);
907  const unsigned PragmaCount = unrollCountPragmaValue(L);
908  const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
909 
910  const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
911  PragmaEnableUnroll || UserUnrollCount;
912 
913  PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
914  PragmaEnableUnroll);
915  // Use an explicit peel count that has been specified for testing. In this
916  // case it's not permitted to also specify an explicit unroll count.
917  if (PP.PeelCount) {
918  if (UnrollCount.getNumOccurrences() > 0) {
919  report_fatal_error("Cannot specify both explicit peel count and "
920  "explicit unroll count");
921  }
922  UP.Count = 1;
923  UP.Runtime = false;
924  return true;
925  }
926  // Check for explicit Count.
927  // 1st priority is unroll count set by "unroll-count" option.
928  // 2nd priority is unroll count set by pragma.
929  UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
930 
931  if (UnrollFactor) {
932  UP.Count = *UnrollFactor;
933 
934  if (UserUnrollCount || (PragmaCount > 0)) {
935  UP.AllowExpensiveTripCount = true;
936  UP.Force = true;
937  }
938  UP.Runtime |= (PragmaCount > 0);
939  return ExplicitUnroll;
940  } else {
941  if (ExplicitUnroll && TripCount != 0) {
942  // If the loop has an unrolling pragma, we want to be more aggressive with
943  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
944  // value which is larger than the default limits.
945  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
946  UP.PartialThreshold =
947  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
948  }
949  }
950 
951  // 3rd priority is full unroll count.
952  // Full unroll makes sense only when TripCount or its upper bound could be
953  // statically calculated.
954  // Also we need to check if we exceed FullUnrollMaxCount.
955 
956  // We can unroll by the upper bound amount if it's generally allowed or if
957  // we know that the loop is executed either the upper bound or zero times.
958  // (MaxOrZero unrolling keeps only the first loop test, so the number of
959  // loop tests remains the same compared to the non-unrolled version, whereas
960  // the generic upper bound unrolling keeps all but the last loop test so the
961  // number of loop tests goes up which may end up being worse on targets with
962  // constrained branch predictor resources so is controlled by an option.)
963  // In addition we only unroll small upper bounds.
964  unsigned FullUnrollMaxTripCount = MaxTripCount;
965  if (!(UP.UpperBound || MaxOrZero) ||
966  FullUnrollMaxTripCount > UnrollMaxUpperBound)
967  FullUnrollMaxTripCount = 0;
968 
969  // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
970  // compute the former when the latter is zero.
971  unsigned ExactTripCount = TripCount;
972  assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
973  "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
974 
975  unsigned FullUnrollTripCount =
976  ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
977  UP.Count = FullUnrollTripCount;
978 
979  UnrollFactor =
980  shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
981 
982  // if shouldFullUnroll can do the unrolling, some side parameteres should be
983  // set
984  if (UnrollFactor) {
985  UP.Count = *UnrollFactor;
986  UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
987  TripCount = FullUnrollTripCount;
988  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
989  return ExplicitUnroll;
990  } else {
991  UP.Count = FullUnrollTripCount;
992  }
993 
994  // 4th priority is loop peeling.
995  computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
996  if (PP.PeelCount) {
997  UP.Runtime = false;
998  UP.Count = 1;
999  return ExplicitUnroll;
1000  }
1001 
1002  // Before starting partial unrolling, set up.partial to true,
1003  // if user explicitly asked for unrolling
1004  if (TripCount)
1005  UP.Partial |= ExplicitUnroll;
1006 
1007  // 5th priority is partial unrolling.
1008  // Try partial unroll only when TripCount could be statically calculated.
1009  UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
1010 
1011  if (UnrollFactor) {
1012  UP.Count = *UnrollFactor;
1013 
1014  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1015  UP.Count != TripCount)
1016  ORE->emit([&]() {
1018  "FullUnrollAsDirectedTooLarge",
1019  L->getStartLoc(), L->getHeader())
1020  << "Unable to fully unroll loop as directed by unroll pragma "
1021  "because "
1022  "unrolled size is too large.";
1023  });
1024 
1025  if (UP.PartialThreshold != NoThreshold) {
1026  if (UP.Count == 0) {
1027  if (PragmaEnableUnroll)
1028  ORE->emit([&]() {
1030  "UnrollAsDirectedTooLarge",
1031  L->getStartLoc(), L->getHeader())
1032  << "Unable to unroll loop as directed by unroll(enable) "
1033  "pragma "
1034  "because unrolled size is too large.";
1035  });
1036  }
1037  }
1038  return ExplicitUnroll;
1039  }
1040  assert(TripCount == 0 &&
1041  "All cases when TripCount is constant should be covered here.");
1042  if (PragmaFullUnroll)
1043  ORE->emit([&]() {
1044  return OptimizationRemarkMissed(
1045  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
1046  L->getStartLoc(), L->getHeader())
1047  << "Unable to fully unroll loop as directed by unroll(full) "
1048  "pragma "
1049  "because loop has a runtime trip count.";
1050  });
1051 
1052  // 6th priority is runtime unrolling.
1053  // Don't unroll a runtime trip count loop when it is disabled.
1055  UP.Count = 0;
1056  return false;
1057  }
1058 
1059  // Don't unroll a small upper bound loop unless user or TTI asked to do so.
1060  if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
1061  UP.Count = 0;
1062  return false;
1063  }
1064 
1065  // Check if the runtime trip count is too small when profile is available.
1066  if (L->getHeader()->getParent()->hasProfileData()) {
1067  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1068  if (*ProfileTripCount < FlatLoopTripCountThreshold)
1069  return false;
1070  else
1071  UP.AllowExpensiveTripCount = true;
1072  }
1073  }
1074  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1075  if (!UP.Runtime) {
1076  LLVM_DEBUG(
1077  dbgs() << " will not try to unroll loop with runtime trip count "
1078  << "-unroll-runtime not given\n");
1079  UP.Count = 0;
1080  return false;
1081  }
1082  if (UP.Count == 0)
1084 
1085  // Reduce unroll count to be the largest power-of-two factor of
1086  // the original count which satisfies the threshold limit.
1087  while (UP.Count != 0 &&
1089  UP.Count >>= 1;
1090 
1091 #ifndef NDEBUG
1092  unsigned OrigCount = UP.Count;
1093 #endif
1094 
1095  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1096  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1097  UP.Count >>= 1;
1098  LLVM_DEBUG(
1099  dbgs() << "Remainder loop is restricted (that could architecture "
1100  "specific or because the loop contains a convergent "
1101  "instruction), so unroll count must divide the trip "
1102  "multiple, "
1103  << TripMultiple << ". Reducing unroll count from " << OrigCount
1104  << " to " << UP.Count << ".\n");
1105 
1106  using namespace ore;
1107 
1108  if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
1109  ORE->emit([&]() {
1111  "DifferentUnrollCountFromDirected",
1112  L->getStartLoc(), L->getHeader())
1113  << "Unable to unroll loop the number of times directed by "
1114  "unroll_count pragma because remainder loop is restricted "
1115  "(that could architecture specific or because the loop "
1116  "contains a convergent instruction) and so must have an "
1117  "unroll "
1118  "count that divides the loop trip multiple of "
1119  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1120  << NV("UnrollCount", UP.Count) << " time(s).";
1121  });
1122  }
1123 
1124  if (UP.Count > UP.MaxCount)
1125  UP.Count = UP.MaxCount;
1126 
1127  if (MaxTripCount && UP.Count > MaxTripCount)
1128  UP.Count = MaxTripCount;
1129 
1130  LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
1131  << "\n");
1132  if (UP.Count < 2)
1133  UP.Count = 0;
1134  return ExplicitUnroll;
1135 }
1136 
1138  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
1141  ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1142  bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
1143  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
1144  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
1145  Optional<bool> ProvidedAllowPeeling,
1146  Optional<bool> ProvidedAllowProfileBasedPeeling,
1147  Optional<unsigned> ProvidedFullUnrollMaxCount) {
1148  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1149  << L->getHeader()->getParent()->getName() << "] Loop %"
1150  << L->getHeader()->getName() << "\n");
1152  if (TM & TM_Disable)
1154  if (!L->isLoopSimplifyForm()) {
1155  LLVM_DEBUG(
1156  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1158  }
1159 
1160  // When automatic unrolling is disabled, do not unroll unless overridden for
1161  // this loop.
1162  if (OnlyWhenForced && !(TM & TM_Enable))
1164 
1165  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1166  unsigned NumInlineCandidates;
1167  bool NotDuplicatable;
1168  bool Convergent;
1170  L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1171  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1172  ProvidedFullUnrollMaxCount);
1174  L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1175 
1176  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1177  // as threshold later on.
1178  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1179  !OptForSize)
1181 
1183  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1184 
1185  unsigned LoopSize =
1186  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1187  TTI, EphValues, UP.BEInsns);
1188  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1189  if (NotDuplicatable) {
1190  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1191  << " instructions.\n");
1193  }
1194 
1195  // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1196  // later), to (fully) unroll loops, if it does not increase code size.
1197  if (OptForSize)
1198  UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1199 
1200  if (NumInlineCandidates != 0) {
1201  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1203  }
1204 
1205  // Find the smallest exact trip count for any exit. This is an upper bound
1206  // on the loop trip count, but an exit at an earlier iteration is still
1207  // possible. An unroll by the smallest exact trip count guarantees that all
1208  // brnaches relating to at least one exit can be eliminated. This is unlike
1209  // the max trip count, which only guarantees that the backedge can be broken.
1210  unsigned TripCount = 0;
1211  unsigned TripMultiple = 1;
1212  SmallVector<BasicBlock *, 8> ExitingBlocks;
1213  L->getExitingBlocks(ExitingBlocks);
1214  for (BasicBlock *ExitingBlock : ExitingBlocks)
1215  if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1216  if (!TripCount || TC < TripCount)
1217  TripCount = TripMultiple = TC;
1218 
1219  if (!TripCount) {
1220  // If no exact trip count is known, determine the trip multiple of either
1221  // the loop latch or the single exiting block.
1222  // TODO: Relax for multiple exits.
1223  BasicBlock *ExitingBlock = L->getLoopLatch();
1224  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1225  ExitingBlock = L->getExitingBlock();
1226  if (ExitingBlock)
1227  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1228  }
1229 
1230  // If the loop contains a convergent operation, the prelude we'd add
1231  // to do the first few instructions before we hit the unrolled loop
1232  // is unsafe -- it adds a control-flow dependency to the convergent
1233  // operation. Therefore restrict remainder loop (try unrolling without).
1234  //
1235  // TODO: This is quite conservative. In practice, convergent_op()
1236  // is likely to be called unconditionally in the loop. In this
1237  // case, the program would be ill-formed (on most architectures)
1238  // unless n were the same on all threads in a thread group.
1239  // Assuming n is the same on all threads, any kind of unrolling is
1240  // safe. But currently llvm's notion of convergence isn't powerful
1241  // enough to express this.
1242  if (Convergent)
1243  UP.AllowRemainder = false;
1244 
1245  // Try to find the trip count upper bound if we cannot find the exact trip
1246  // count.
1247  unsigned MaxTripCount = 0;
1248  bool MaxOrZero = false;
1249  if (!TripCount) {
1250  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1251  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1252  }
1253 
1254  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1255  // fully unroll the loop.
1256  bool UseUpperBound = false;
1257  bool IsCountSetExplicitly = computeUnrollCount(
1258  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
1259  TripMultiple, LoopSize, UP, PP, UseUpperBound);
1260  if (!UP.Count)
1262 
1263  if (PP.PeelCount) {
1264  assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1265  LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1266  << " with iteration count " << PP.PeelCount << "!\n");
1267  ORE.emit([&]() {
1268  return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1269  L->getHeader())
1270  << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1271  << " iterations";
1272  });
1273 
1274  if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
1275  simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
1276  // If the loop was peeled, we already "used up" the profile information
1277  // we had, so we don't want to unroll or peel again.
1278  if (PP.PeelProfiledIterations)
1281  }
1283  }
1284 
1285  // At this point, UP.Runtime indicates that run-time unrolling is allowed.
1286  // However, we only want to actually perform it if we don't know the trip
1287  // count and the unroll count doesn't divide the known trip multiple.
1288  // TODO: This decision should probably be pushed up into
1289  // computeUnrollCount().
1290  UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1291 
1292  // Save loop properties before it is transformed.
1293  MDNode *OrigLoopID = L->getLoopID();
1294 
1295  // Unroll the loop.
1296  Loop *RemainderLoop = nullptr;
1297  LoopUnrollResult UnrollResult = UnrollLoop(
1298  L,
1299  {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1300  UP.UnrollRemainder, ForgetAllSCEV},
1301  LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
1302  if (UnrollResult == LoopUnrollResult::Unmodified)
1304 
1305  if (RemainderLoop) {
1306  Optional<MDNode *> RemainderLoopID =
1309  if (RemainderLoopID.hasValue())
1310  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1311  }
1312 
1313  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1314  Optional<MDNode *> NewLoopID =
1317  if (NewLoopID.hasValue()) {
1318  L->setLoopID(NewLoopID.getValue());
1319 
1320  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1321  // explicitly.
1322  return UnrollResult;
1323  }
1324  }
1325 
1326  // If loop has an unroll count pragma or unrolled by explicitly set count
1327  // mark loop as unrolled to prevent unrolling beyond that requested.
1328  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1330 
1331  return UnrollResult;
1332 }
1333 
1334 namespace {
1335 
1336 class LoopUnroll : public LoopPass {
1337 public:
1338  static char ID; // Pass ID, replacement for typeid
1339 
1340  int OptLevel;
1341 
1342  /// If false, use a cost model to determine whether unrolling of a loop is
1343  /// profitable. If true, only loops that explicitly request unrolling via
1344  /// metadata are considered. All other loops are skipped.
1345  bool OnlyWhenForced;
1346 
1347  /// If false, when SCEV is invalidated, only forget everything in the
1348  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1349  /// Otherwise, forgetAllLoops and rebuild when needed next.
1350  bool ForgetAllSCEV;
1351 
1352  Optional<unsigned> ProvidedCount;
1353  Optional<unsigned> ProvidedThreshold;
1354  Optional<bool> ProvidedAllowPartial;
1355  Optional<bool> ProvidedRuntime;
1356  Optional<bool> ProvidedUpperBound;
1357  Optional<bool> ProvidedAllowPeeling;
1358  Optional<bool> ProvidedAllowProfileBasedPeeling;
1359  Optional<unsigned> ProvidedFullUnrollMaxCount;
1360 
1361  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1362  bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1363  Optional<unsigned> Count = None,
1364  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1365  Optional<bool> UpperBound = None,
1366  Optional<bool> AllowPeeling = None,
1367  Optional<bool> AllowProfileBasedPeeling = None,
1368  Optional<unsigned> ProvidedFullUnrollMaxCount = None)
1369  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1370  ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1371  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1372  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1373  ProvidedAllowPeeling(AllowPeeling),
1374  ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1375  ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1377  }
1378 
1379  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1380  if (skipLoop(L))
1381  return false;
1382 
1383  Function &F = *L->getHeader()->getParent();
1384 
1385  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1386  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1387  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1388  const TargetTransformInfo &TTI =
1389  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1390  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1391  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1392  // pass. Function analyses need to be preserved across loop transformations
1393  // but ORE cannot be preserved (see comment before the pass definition).
1395  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1396 
1398  L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1399  OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
1400  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1401  ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
1402  ProvidedFullUnrollMaxCount);
1403 
1404  if (Result == LoopUnrollResult::FullyUnrolled)
1405  LPM.markLoopAsDeleted(*L);
1406 
1408  }
1409 
1410  /// This transformation requires natural loop information & requires that
1411  /// loop preheaders be inserted into the CFG...
1412  void getAnalysisUsage(AnalysisUsage &AU) const override {
1415  // FIXME: Loop passes are required to preserve domtree, and for now we just
1416  // recreate dom info if anything gets unrolled.
1418  }
1419 };
1420 
1421 } // end anonymous namespace
1422 
1423 char LoopUnroll::ID = 0;
1424 
1425 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1429 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1430 
1431 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1432  bool ForgetAllSCEV, int Threshold, int Count,
1433  int AllowPartial, int Runtime, int UpperBound,
1434  int AllowPeeling) {
1435  // TODO: It would make more sense for this function to take the optionals
1436  // directly, but that's dangerous since it would silently break out of tree
1437  // callers.
1438  return new LoopUnroll(
1439  OptLevel, OnlyWhenForced, ForgetAllSCEV,
1441  Count == -1 ? None : Optional<unsigned>(Count),
1442  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1443  Runtime == -1 ? None : Optional<bool>(Runtime),
1444  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1445  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1446 }
1447 
1448 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1449  bool ForgetAllSCEV) {
1450  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1451  0, 0, 0, 1);
1452 }
1453 
1456  LPMUpdater &Updater) {
1457  // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1458  // pass. Function analyses need to be preserved across loop transformations
1459  // but ORE cannot be preserved (see comment before the pass definition).
1461 
1462  // Keep track of the previous loop structure so we can identify new loops
1463  // created by unrolling.
1464  Loop *ParentL = L.getParentLoop();
1465  SmallPtrSet<Loop *, 4> OldLoops;
1466  if (ParentL)
1467  OldLoops.insert(ParentL->begin(), ParentL->end());
1468  else
1469  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1470 
1471  std::string LoopName = std::string(L.getName());
1472 
1473  bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1474  /*BFI*/ nullptr, /*PSI*/ nullptr,
1475  /*PreserveLCSSA*/ true, OptLevel,
1476  OnlyWhenForced, ForgetSCEV, /*Count*/ None,
1477  /*Threshold*/ None, /*AllowPartial*/ false,
1478  /*Runtime*/ false, /*UpperBound*/ false,
1479  /*AllowPeeling*/ true,
1480  /*AllowProfileBasedPeeling*/ false,
1481  /*FullUnrollMaxCount*/ None) !=
1483  if (!Changed)
1484  return PreservedAnalyses::all();
1485 
1486  // The parent must not be damaged by unrolling!
1487 #ifndef NDEBUG
1488  if (ParentL)
1489  ParentL->verifyLoop();
1490 #endif
1491 
1492  // Unrolling can do several things to introduce new loops into a loop nest:
1493  // - Full unrolling clones child loops within the current loop but then
1494  // removes the current loop making all of the children appear to be new
1495  // sibling loops.
1496  //
1497  // When a new loop appears as a sibling loop after fully unrolling,
1498  // its nesting structure has fundamentally changed and we want to revisit
1499  // it to reflect that.
1500  //
1501  // When unrolling has removed the current loop, we need to tell the
1502  // infrastructure that it is gone.
1503  //
1504  // Finally, we support a debugging/testing mode where we revisit child loops
1505  // as well. These are not expected to require further optimizations as either
1506  // they or the loop they were cloned from have been directly visited already.
1507  // But the debugging mode allows us to check this assumption.
1508  bool IsCurrentLoopValid = false;
1509  SmallVector<Loop *, 4> SibLoops;
1510  if (ParentL)
1511  SibLoops.append(ParentL->begin(), ParentL->end());
1512  else
1513  SibLoops.append(AR.LI.begin(), AR.LI.end());
1514  erase_if(SibLoops, [&](Loop *SibLoop) {
1515  if (SibLoop == &L) {
1516  IsCurrentLoopValid = true;
1517  return true;
1518  }
1519 
1520  // Otherwise erase the loop from the list if it was in the old loops.
1521  return OldLoops.contains(SibLoop);
1522  });
1523  Updater.addSiblingLoops(SibLoops);
1524 
1525  if (!IsCurrentLoopValid) {
1526  Updater.markLoopAsDeleted(L, LoopName);
1527  } else {
1528  // We can only walk child loops if the current loop remained valid.
1530  // Walk *all* of the child loops.
1531  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1532  Updater.addChildLoops(ChildLoops);
1533  }
1534  }
1535 
1537 }
1538 
1541  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1542  auto &LI = AM.getResult<LoopAnalysis>(F);
1543  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1544  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1545  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1547 
1548  LoopAnalysisManager *LAM = nullptr;
1549  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1550  LAM = &LAMProxy->getManager();
1551 
1552  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1553  ProfileSummaryInfo *PSI =
1554  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1555  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1556  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1557 
1558  bool Changed = false;
1559 
1560  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1561  // Since simplification may add new inner loops, it has to run before the
1562  // legality and profitability checks. This means running the loop unroller
1563  // will simplify all loops, regardless of whether anything end up being
1564  // unrolled.
1565  for (auto &L : LI) {
1566  Changed |=
1567  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1568  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1569  }
1570 
1571  // Add the loop nests in the reverse order of LoopInfo. See method
1572  // declaration.
1574  appendLoopsToWorklist(LI, Worklist);
1575 
1576  while (!Worklist.empty()) {
1577  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1578  // from back to front so that we work forward across the CFG, which
1579  // for unrolling is only needed to get optimization remarks emitted in
1580  // a forward order.
1581  Loop &L = *Worklist.pop_back_val();
1582 #ifndef NDEBUG
1583  Loop *ParentL = L.getParentLoop();
1584 #endif
1585 
1586  // Check if the profile summary indicates that the profiled application
1587  // has a huge working set size, in which case we disable peeling to avoid
1588  // bloating it further.
1589  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1590  if (PSI && PSI->hasHugeWorkingSetSize())
1591  LocalAllowPeeling = false;
1592  std::string LoopName = std::string(L.getName());
1593  // The API here is quite complex to call and we allow to select some
1594  // flavors of unrolling during construction time (by setting UnrollOpts).
1596  &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1597  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1598  UnrollOpts.ForgetSCEV, /*Count*/ None,
1599  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1600  UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1601  UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
1602  Changed |= Result != LoopUnrollResult::Unmodified;
1603 
1604  // The parent must not be damaged by unrolling!
1605 #ifndef NDEBUG
1606  if (Result != LoopUnrollResult::Unmodified && ParentL)
1607  ParentL->verifyLoop();
1608 #endif
1609 
1610  // Clear any cached analysis results for L if we removed it completely.
1611  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1612  LAM->clear(L, LoopName);
1613  }
1614 
1615  if (!Changed)
1616  return PreservedAnalyses::all();
1617 
1619 }
1620 
1622  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1623  static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1624  OS, MapClassName2PassName);
1625  OS << "<";
1626  if (UnrollOpts.AllowPartial != None)
1627  OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
1628  if (UnrollOpts.AllowPeeling != None)
1629  OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
1630  if (UnrollOpts.AllowRuntime != None)
1631  OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
1632  if (UnrollOpts.AllowUpperBound != None)
1633  OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
1634  if (UnrollOpts.AllowProfileBasedPeeling != None)
1635  OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
1636  << "profile-peeling;";
1637  if (UnrollOpts.FullUnrollMaxCount != None)
1638  OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
1639  OS << "O" << UnrollOpts.OptLevel;
1640  OS << ">";
1641 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
shouldFullUnroll
static Optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollPass.cpp:804
FlatLoopTripCountThreshold
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1061
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:478
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:457
llvm::hasUnrollTransformation
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:360
unrollCountPragmaValue
static unsigned unrollCountPragmaValue(const Loop *L)
Definition: LoopUnrollPass.cpp:718
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:485
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:729
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2036
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:453
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
LoopSimplify.h
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
analyzeLoopUnrollCost
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
Definition: LoopUnrollPass.cpp:347
llvm::UnrolledInstAnalyzer
Definition: LoopUnrollAnalyzer.h:39
Optional.h
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:655
Metadata.h
UnrollFullMaxCount
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
UnrollPartialThreshold
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:469
getUnrollMetadataForLoop
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition: LoopUnrollPass.cpp:694
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
Scalar.h
llvm::PassInfoMixin< LoopUnrollPass >
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:7215
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
StringRef.h
llvm::LoopUnrollPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: LoopUnrollPass.cpp:1621
Pass.h
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:281
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
UnrollRevisitChildLoops
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::find
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:179
UnrollCostEstimator::getUnrolledLoopSize
uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, const unsigned CountOverwrite=0) const
Definition: LoopUnrollPass.cpp:761
SizeOpts.h
llvm::LLVMLoopUnrollFollowupRemainder
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
LazyBlockFrequencyInfo.h
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1732
shouldPartialUnroll
static Optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollPass.cpp:836
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
hasUnrollFullPragma
static bool hasUnrollFullPragma(const Loop *L)
Definition: LoopUnrollPass.cpp:701
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:271
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:504
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:497
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
UnrollRuntime
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:462
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::initializeLoopUnrollPass
void initializeLoopUnrollPass(PassRegistry &)
ScalarEvolution.h
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:481
llvm::LoopUnrollOptions::ForgetSCEV
const bool ForgetSCEV
If true, forget all loops when unrolling.
Definition: LoopUnrollPass.h:78
DenseMap.h
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:473
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:667
llvm::Optional< unsigned >
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:403
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:894
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
UnrollMaxPercentThresholdBoost
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
UnrollThresholdDefault
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
loops
loops
Definition: LoopInfo.cpp:1175
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:490
llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition: GCNRegPressure.cpp:55
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1118
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
LoopAnalysisManager.h
UnrollCount
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:320
UnrollThreshold
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LoopUnrollOptions::OnlyWhenForced
bool OnlyWhenForced
If false, use a cost model to determine whether unrolling of a loop is profitable.
Definition: LoopUnrollPass.h:73
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::end
iterator end()
Definition: DenseSet.h:174
Instruction.h
CommandLine.h
CodeMetrics.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::TM_Enable
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:272
llvm::DenseMapInfo< Instruction * >
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:548
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:539
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:826
Constants.h
UnrollMaxUpperBound
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:493
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::AnalysisManager::clear
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManagerImpl.h:36
llvm::LLVMLoopUnrollFollowupUnrolled
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:42
UnrollAllowRemainder
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: PriorityWorklist.h:154
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:155
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
DenseSet.h
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:507
false
Definition: StackSlotColoring.cpp:142
llvm::simplifyLoopAfterUnroll
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:204
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:499
llvm::LoopUnrollOptions::FullUnrollMaxCount
Optional< unsigned > FullUnrollMaxCount
Definition: LoopUnrollPass.h:67
llvm::Instruction
Definition: Instruction.h:45
llvm::LoopBase::verifyLoop
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:286
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
LoopUtils.h
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
llvm::LoopUnrollOptions::AllowPeeling
Optional< bool > AllowPeeling
Definition: LoopUnrollPass.h:63
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
Utils.h
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:866
llvm::None
const NoneType None
Definition: None.h:23
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::createSimpleLoopUnrollPass
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false)
Definition: LoopUnrollPass.cpp:1448
CFG.h
LoopInfo.h
llvm::LoopUnrollOptions::AllowUpperBound
Optional< bool > AllowUpperBound
Definition: LoopUnrollPass.h:65
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::LLVMLoopUnrollFollowupAll
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
UnrollMaxCount
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7179
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1112
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
BasicBlock.h
llvm::cl::opt< bool >
hasUnrollEnablePragma
static bool hasUnrollEnablePragma(const Loop *L)
Definition: LoopUnrollPass.cpp:707
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:258
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1634
uint64_t
ProfileSummaryInfo.h
tryToUnrollLoop
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling, Optional< bool > ProvidedAllowProfileBasedPeeling, Optional< unsigned > ProvidedFullUnrollMaxCount)
Definition: LoopUnrollPass.cpp:1137
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:249
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:256
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7195
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53
llvm::LoopUnrollOptions::OptLevel
int OptLevel
Definition: LoopUnrollPass.h:68
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:68
UnrollThresholdAggressive
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::TM_Disable
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:275
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
UnrollCostEstimator::UnrollCostEstimator
UnrollCostEstimator(Loop &L, unsigned LoopSize)
Definition: LoopUnrollPass.cpp:756
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
NoThreshold
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
Definition: LoopUnrollPass.cpp:181
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:276
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:942
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:269
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
LoopPassManager.h
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
UnrollAllowPartial
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
PragmaUnrollThreshold
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:203
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:539
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1083
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::TM_ForcedByUser
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:283
LoopPass.h
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1429
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:464
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
UnrollOptSizeThreshold
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::LoopUnrollOptions::AllowProfileBasedPeeling
Optional< bool > AllowProfileBasedPeeling
Definition: LoopUnrollPass.h:66
UnrollUnrollRemainder
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
LoopUnrollAnalyzer.h
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:671
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
UnrollCostEstimator
Definition: LoopUnrollPass.cpp:752
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:485
llvm::ForgetSCEVInLoopUnroll
cl::opt< bool > ForgetSCEVInLoopUnroll
Constant.h
llvm::LoopUnrollPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopUnrollPass.cpp:1539
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
std
Definition: BitVector.h:838
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:527
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:461
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:113
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:495
LoopUnrollPass.h
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:503
llvm::ScalarEvolution::isBackedgeTakenCountMaxOrZero
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
Definition: ScalarEvolution.cpp:7297
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:798
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:487
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
shouldPragmaUnroll
static Optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollPass.cpp:775
PassManager.h
UnrollMaxIterationsCountToAnalyze
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:719
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:302
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
isEqual
static bool isEqual(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:1935
llvm::LPMUpdater::addChildLoops
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
Definition: LoopPassManager.h:292
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopUnrollOptions::AllowPartial
Optional< bool > AllowPartial
Definition: LoopUnrollPass.h:62
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:943
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
hasRuntimeUnrollDisablePragma
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
Definition: LoopUnrollPass.cpp:712
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopUnrollPass.cpp:74
Dominators.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:436
UnrollLoop.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
DenseMapInfo.h
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:668
llvm::SmallPtrSetImpl< const Value * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:936
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:450
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::LPMUpdater::addSiblingLoops
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
Definition: LoopPassManager.h:319
LoopPeel.h
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:807
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:232
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
getFullUnrollBoostingFactor
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Definition: LoopUnrollPass.cpp:736
llvm::TransformationMode
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:266
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:612
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
SetVector.h
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:447
llvm::createLoopUnrollPass
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Definition: LoopUnrollPass.cpp:1431
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::LoopUnrollOptions::AllowRuntime
Optional< bool > AllowRuntime
Definition: LoopUnrollPass.h:64
llvm::LoopFullUnrollPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopUnrollPass.cpp:1454
llvm::omp::OMPScheduleType::Runtime
@ Runtime