LLVM  10.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constant.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
55 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <limits>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-unroll"
73 
75  "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
76  cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
77  " the current top-most loop. This is somtimes preferred to reduce"
78  " compile time."));
79 
80 static cl::opt<unsigned>
81  UnrollThreshold("unroll-threshold", cl::Hidden,
82  cl::desc("The cost threshold for loop unrolling"));
83 
85  "unroll-partial-threshold", cl::Hidden,
86  cl::desc("The cost threshold for partial loop unrolling"));
87 
89  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
90  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
91  "to the threshold when aggressively unrolling a loop due to the "
92  "dynamic cost savings. If completely unrolling a loop will reduce "
93  "the total runtime from X to Y, we boost the loop unroll "
94  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
95  "X/Y). This limit avoids excessive code bloat."));
96 
98  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
99  cl::desc("Don't allow loop unrolling to simulate more than this number of"
100  "iterations when checking full unroll profitability"));
101 
103  "unroll-count", cl::Hidden,
104  cl::desc("Use this unroll count for all loops including those with "
105  "unroll_count pragma values, for testing purposes"));
106 
108  "unroll-max-count", cl::Hidden,
109  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
110  "testing purposes"));
111 
113  "unroll-full-max-count", cl::Hidden,
114  cl::desc(
115  "Set the max unroll count for full unrolling, for testing purposes"));
116 
118  "unroll-peel-count", cl::Hidden,
119  cl::desc("Set the unroll peeling count, for testing purposes"));
120 
121 static cl::opt<bool>
122  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
123  cl::desc("Allows loops to be partially unrolled until "
124  "-unroll-threshold loop size is reached."));
125 
127  "unroll-allow-remainder", cl::Hidden,
128  cl::desc("Allow generation of a loop remainder (extra iterations) "
129  "when unrolling a loop."));
130 
131 static cl::opt<bool>
132  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
133  cl::desc("Unroll loops with run-time trip counts"));
134 
136  "unroll-max-upperbound", cl::init(8), cl::Hidden,
137  cl::desc(
138  "The max of trip count upper bound that is considered in unrolling"));
139 
141  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
142  cl::desc("Unrolled size limit for loops with an unroll(full) or "
143  "unroll_count pragma."));
144 
146  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
147  cl::desc("If the runtime tripcount for the loop is lower than the "
148  "threshold, the loop is considered as flat and will be less "
149  "aggressively unrolled."));
150 
151 static cl::opt<bool>
152  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
153  cl::desc("Allows loops to be peeled when the dynamic "
154  "trip count is known to be low."));
155 
157  "unroll-remainder", cl::Hidden,
158  cl::desc("Allow the loop remainder to be unrolled."));
159 
160 // This option isn't ever intended to be enabled, it serves to allow
161 // experiments to check the assumptions about when this kind of revisit is
162 // necessary.
164  "unroll-revisit-child-loops", cl::Hidden,
165  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
166  "This shouldn't typically be needed as child loops (or their "
167  "clones) were already visited."));
168 
169 /// A magic value for use with the Threshold parameter to indicate
170 /// that the loop unroll should be performed regardless of how much
171 /// code expansion would result.
173 
174 /// Gather the various unrolling parameters based on the defaults, compiler
175 /// flags, TTI overrides and user specified parameters.
177  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
178  BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
179  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
180  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
181  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
182  Optional<bool> UserAllowProfileBasedPeeling,
183  Optional<unsigned> UserFullUnrollMaxCount) {
185 
186  // Set up the defaults
187  UP.Threshold = OptLevel > 2 ? 300 : 150;
188  UP.MaxPercentThresholdBoost = 400;
189  UP.OptSizeThreshold = 0;
190  UP.PartialThreshold = 150;
192  UP.Count = 0;
193  UP.PeelCount = 0;
197  UP.BEInsns = 2;
198  UP.Partial = false;
199  UP.Runtime = false;
200  UP.AllowRemainder = true;
201  UP.UnrollRemainder = false;
202  UP.AllowExpensiveTripCount = false;
203  UP.Force = false;
204  UP.UpperBound = false;
205  UP.AllowPeeling = true;
206  UP.UnrollAndJam = false;
207  UP.PeelProfiledIterations = true;
209 
210  // Override with any target specific settings
211  TTI.getUnrollingPreferences(L, SE, UP);
212 
213  // Apply size attributes
214  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
216  if (OptForSize) {
217  UP.Threshold = UP.OptSizeThreshold;
219  UP.MaxPercentThresholdBoost = 100;
220  }
221 
222  // Apply any user values specified by cl::opt
223  if (UnrollThreshold.getNumOccurrences() > 0)
225  if (UnrollPartialThreshold.getNumOccurrences() > 0)
227  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
229  if (UnrollMaxCount.getNumOccurrences() > 0)
231  if (UnrollFullMaxCount.getNumOccurrences() > 0)
233  if (UnrollPeelCount.getNumOccurrences() > 0)
235  if (UnrollAllowPartial.getNumOccurrences() > 0)
237  if (UnrollAllowRemainder.getNumOccurrences() > 0)
239  if (UnrollRuntime.getNumOccurrences() > 0)
240  UP.Runtime = UnrollRuntime;
241  if (UnrollMaxUpperBound == 0)
242  UP.UpperBound = false;
243  if (UnrollAllowPeeling.getNumOccurrences() > 0)
245  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
247 
248  // Apply user values provided by argument
249  if (UserThreshold.hasValue()) {
250  UP.Threshold = *UserThreshold;
251  UP.PartialThreshold = *UserThreshold;
252  }
253  if (UserCount.hasValue())
254  UP.Count = *UserCount;
255  if (UserAllowPartial.hasValue())
256  UP.Partial = *UserAllowPartial;
257  if (UserRuntime.hasValue())
258  UP.Runtime = *UserRuntime;
259  if (UserUpperBound.hasValue())
260  UP.UpperBound = *UserUpperBound;
261  if (UserAllowPeeling.hasValue())
262  UP.AllowPeeling = *UserAllowPeeling;
263  if (UserAllowProfileBasedPeeling.hasValue())
264  UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
265  if (UserFullUnrollMaxCount.hasValue())
266  UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
267 
268  return UP;
269 }
270 
271 namespace {
272 
273 /// A struct to densely store the state of an instruction after unrolling at
274 /// each iteration.
275 ///
276 /// This is designed to work like a tuple of <Instruction *, int> for the
277 /// purposes of hashing and lookup, but to be able to associate two boolean
278 /// states with each key.
279 struct UnrolledInstState {
280  Instruction *I;
281  int Iteration : 30;
282  unsigned IsFree : 1;
283  unsigned IsCounted : 1;
284 };
285 
286 /// Hashing and equality testing for a set of the instruction states.
287 struct UnrolledInstStateKeyInfo {
288  using PtrInfo = DenseMapInfo<Instruction *>;
290 
291  static inline UnrolledInstState getEmptyKey() {
292  return {PtrInfo::getEmptyKey(), 0, 0, 0};
293  }
294 
295  static inline UnrolledInstState getTombstoneKey() {
296  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
297  }
298 
299  static inline unsigned getHashValue(const UnrolledInstState &S) {
300  return PairInfo::getHashValue({S.I, S.Iteration});
301  }
302 
303  static inline bool isEqual(const UnrolledInstState &LHS,
304  const UnrolledInstState &RHS) {
305  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
306  }
307 };
308 
309 struct EstimatedUnrollCost {
310  /// The estimated cost after unrolling.
311  unsigned UnrolledCost;
312 
313  /// The estimated dynamic cost of executing the instructions in the
314  /// rolled form.
315  unsigned RolledDynamicCost;
316 };
317 
318 } // end anonymous namespace
319 
320 /// Figure out if the loop is worth full unrolling.
321 ///
322 /// Complete loop unrolling can make some loads constant, and we need to know
323 /// if that would expose any further optimization opportunities. This routine
324 /// estimates this optimization. It computes cost of unrolled loop
325 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
326 /// dynamic cost we mean that we won't count costs of blocks that are known not
327 /// to be executed (i.e. if we have a branch in the loop and we know that at the
328 /// given iteration its condition would be resolved to true, we won't add up the
329 /// cost of the 'false'-block).
330 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
331 /// the analysis failed (no benefits expected from the unrolling, or the loop is
332 /// too big to analyze), the returned value is None.
334  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
335  const SmallPtrSetImpl<const Value *> &EphValues,
336  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
337  // We want to be able to scale offsets by the trip count and add more offsets
338  // to them without checking for overflows, and we already don't want to
339  // analyze *massive* trip counts, so we force the max to be reasonably small.
341  (unsigned)(std::numeric_limits<int>::max() / 2) &&
342  "The unroll iterations max is too large!");
343 
344  // Only analyze inner loops. We can't properly estimate cost of nested loops
345  // and we won't visit inner loops again anyway.
346  if (!L->empty())
347  return None;
348 
349  // Don't simulate loops with a big or unknown tripcount
350  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
352  return None;
353 
356  DenseMap<Value *, Constant *> SimplifiedValues;
357  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
358 
359  // The estimated cost of the unrolled form of the loop. We try to estimate
360  // this by simplifying as much as we can while computing the estimate.
361  unsigned UnrolledCost = 0;
362 
363  // We also track the estimated dynamic (that is, actually executed) cost in
364  // the rolled form. This helps identify cases when the savings from unrolling
365  // aren't just exposing dead control flows, but actual reduced dynamic
366  // instructions due to the simplifications which we expect to occur after
367  // unrolling.
368  unsigned RolledDynamicCost = 0;
369 
370  // We track the simplification of each instruction in each iteration. We use
371  // this to recursively merge costs into the unrolled cost on-demand so that
372  // we don't count the cost of any dead code. This is essentially a map from
373  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
375 
376  // A small worklist used to accumulate cost of instructions from each
377  // observable and reached root in the loop.
378  SmallVector<Instruction *, 16> CostWorklist;
379 
380  // PHI-used worklist used between iterations while accumulating cost.
381  SmallVector<Instruction *, 4> PHIUsedList;
382 
383  // Helper function to accumulate cost for instructions in the loop.
384  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
385  assert(Iteration >= 0 && "Cannot have a negative iteration!");
386  assert(CostWorklist.empty() && "Must start with an empty cost list");
387  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
388  CostWorklist.push_back(&RootI);
389  for (;; --Iteration) {
390  do {
391  Instruction *I = CostWorklist.pop_back_val();
392 
393  // InstCostMap only uses I and Iteration as a key, the other two values
394  // don't matter here.
395  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
396  if (CostIter == InstCostMap.end())
397  // If an input to a PHI node comes from a dead path through the loop
398  // we may have no cost data for it here. What that actually means is
399  // that it is free.
400  continue;
401  auto &Cost = *CostIter;
402  if (Cost.IsCounted)
403  // Already counted this instruction.
404  continue;
405 
406  // Mark that we are counting the cost of this instruction now.
407  Cost.IsCounted = true;
408 
409  // If this is a PHI node in the loop header, just add it to the PHI set.
410  if (auto *PhiI = dyn_cast<PHINode>(I))
411  if (PhiI->getParent() == L->getHeader()) {
412  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
413  "inherently simplify during unrolling.");
414  if (Iteration == 0)
415  continue;
416 
417  // Push the incoming value from the backedge into the PHI used list
418  // if it is an in-loop instruction. We'll use this to populate the
419  // cost worklist for the next iteration (as we count backwards).
420  if (auto *OpI = dyn_cast<Instruction>(
421  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
422  if (L->contains(OpI))
423  PHIUsedList.push_back(OpI);
424  continue;
425  }
426 
427  // First accumulate the cost of this instruction.
428  if (!Cost.IsFree) {
429  UnrolledCost += TTI.getUserCost(I);
430  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
431  << Iteration << "): ");
432  LLVM_DEBUG(I->dump());
433  }
434 
435  // We must count the cost of every operand which is not free,
436  // recursively. If we reach a loop PHI node, simply add it to the set
437  // to be considered on the next iteration (backwards!).
438  for (Value *Op : I->operands()) {
439  // Check whether this operand is free due to being a constant or
440  // outside the loop.
441  auto *OpI = dyn_cast<Instruction>(Op);
442  if (!OpI || !L->contains(OpI))
443  continue;
444 
445  // Otherwise accumulate its cost.
446  CostWorklist.push_back(OpI);
447  }
448  } while (!CostWorklist.empty());
449 
450  if (PHIUsedList.empty())
451  // We've exhausted the search.
452  break;
453 
454  assert(Iteration > 0 &&
455  "Cannot track PHI-used values past the first iteration!");
456  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
457  PHIUsedList.clear();
458  }
459  };
460 
461  // Ensure that we don't violate the loop structure invariants relied on by
462  // this analysis.
463  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
464  assert(L->isLCSSAForm(DT) &&
465  "Must have loops in LCSSA form to track live-out values.");
466 
467  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
468 
469  // Simulate execution of each iteration of the loop counting instructions,
470  // which would be simplified.
471  // Since the same load will take different values on different iterations,
472  // we literally have to go through all loop's iterations.
473  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
474  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
475 
476  // Prepare for the iteration by collecting any simplified entry or backedge
477  // inputs.
478  for (Instruction &I : *L->getHeader()) {
479  auto *PHI = dyn_cast<PHINode>(&I);
480  if (!PHI)
481  break;
482 
483  // The loop header PHI nodes must have exactly two input: one from the
484  // loop preheader and one from the loop latch.
485  assert(
486  PHI->getNumIncomingValues() == 2 &&
487  "Must have an incoming value only for the preheader and the latch.");
488 
489  Value *V = PHI->getIncomingValueForBlock(
490  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
491  Constant *C = dyn_cast<Constant>(V);
492  if (Iteration != 0 && !C)
493  C = SimplifiedValues.lookup(V);
494  if (C)
495  SimplifiedInputValues.push_back({PHI, C});
496  }
497 
498  // Now clear and re-populate the map for the next iteration.
499  SimplifiedValues.clear();
500  while (!SimplifiedInputValues.empty())
501  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
502 
503  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
504 
505  BBWorklist.clear();
506  BBWorklist.insert(L->getHeader());
507  // Note that we *must not* cache the size, this loop grows the worklist.
508  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
509  BasicBlock *BB = BBWorklist[Idx];
510 
511  // Visit all instructions in the given basic block and try to simplify
512  // it. We don't change the actual IR, just count optimization
513  // opportunities.
514  for (Instruction &I : *BB) {
515  // These won't get into the final code - don't even try calculating the
516  // cost for them.
517  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
518  continue;
519 
520  // Track this instruction's expected baseline cost when executing the
521  // rolled loop form.
522  RolledDynamicCost += TTI.getUserCost(&I);
523 
524  // Visit the instruction to analyze its loop cost after unrolling,
525  // and if the visitor returns true, mark the instruction as free after
526  // unrolling and continue.
527  bool IsFree = Analyzer.visit(I);
528  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
529  (unsigned)IsFree,
530  /*IsCounted*/ false}).second;
531  (void)Inserted;
532  assert(Inserted && "Cannot have a state for an unvisited instruction!");
533 
534  if (IsFree)
535  continue;
536 
537  // Can't properly model a cost of a call.
538  // FIXME: With a proper cost model we should be able to do it.
539  if (auto *CI = dyn_cast<CallInst>(&I)) {
540  const Function *Callee = CI->getCalledFunction();
541  if (!Callee || TTI.isLoweredToCall(Callee)) {
542  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
543  return None;
544  }
545  }
546 
547  // If the instruction might have a side-effect recursively account for
548  // the cost of it and all the instructions leading up to it.
549  if (I.mayHaveSideEffects())
550  AddCostRecursively(I, Iteration);
551 
552  // If unrolled body turns out to be too big, bail out.
553  if (UnrolledCost > MaxUnrolledLoopSize) {
554  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
555  << " UnrolledCost: " << UnrolledCost
556  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
557  << "\n");
558  return None;
559  }
560  }
561 
562  Instruction *TI = BB->getTerminator();
563 
564  // Add in the live successors by first checking whether we have terminator
565  // that may be simplified based on the values simplified by this call.
566  BasicBlock *KnownSucc = nullptr;
567  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
568  if (BI->isConditional()) {
569  if (Constant *SimpleCond =
570  SimplifiedValues.lookup(BI->getCondition())) {
571  // Just take the first successor if condition is undef
572  if (isa<UndefValue>(SimpleCond))
573  KnownSucc = BI->getSuccessor(0);
574  else if (ConstantInt *SimpleCondVal =
575  dyn_cast<ConstantInt>(SimpleCond))
576  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
577  }
578  }
579  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
580  if (Constant *SimpleCond =
581  SimplifiedValues.lookup(SI->getCondition())) {
582  // Just take the first successor if condition is undef
583  if (isa<UndefValue>(SimpleCond))
584  KnownSucc = SI->getSuccessor(0);
585  else if (ConstantInt *SimpleCondVal =
586  dyn_cast<ConstantInt>(SimpleCond))
587  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
588  }
589  }
590  if (KnownSucc) {
591  if (L->contains(KnownSucc))
592  BBWorklist.insert(KnownSucc);
593  else
594  ExitWorklist.insert({BB, KnownSucc});
595  continue;
596  }
597 
598  // Add BB's successors to the worklist.
599  for (BasicBlock *Succ : successors(BB))
600  if (L->contains(Succ))
601  BBWorklist.insert(Succ);
602  else
603  ExitWorklist.insert({BB, Succ});
604  AddCostRecursively(*TI, Iteration);
605  }
606 
607  // If we found no optimization opportunities on the first iteration, we
608  // won't find them on later ones too.
609  if (UnrolledCost == RolledDynamicCost) {
610  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
611  << " UnrolledCost: " << UnrolledCost << "\n");
612  return None;
613  }
614  }
615 
616  while (!ExitWorklist.empty()) {
617  BasicBlock *ExitingBB, *ExitBB;
618  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
619 
620  for (Instruction &I : *ExitBB) {
621  auto *PN = dyn_cast<PHINode>(&I);
622  if (!PN)
623  break;
624 
625  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
626  if (auto *OpI = dyn_cast<Instruction>(Op))
627  if (L->contains(OpI))
628  AddCostRecursively(*OpI, TripCount - 1);
629  }
630  }
631 
632  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
633  << "UnrolledCost: " << UnrolledCost << ", "
634  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
635  return {{UnrolledCost, RolledDynamicCost}};
636 }
637 
638 /// ApproximateLoopSize - Approximate the size of the loop.
640  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
641  const TargetTransformInfo &TTI,
642  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
644  for (BasicBlock *BB : L->blocks())
645  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
646  NumCalls = Metrics.NumInlineCandidates;
647  NotDuplicatable = Metrics.notDuplicatable;
648  Convergent = Metrics.convergent;
649 
650  unsigned LoopSize = Metrics.NumInsts;
651 
652  // Don't allow an estimate of size zero. This would allows unrolling of loops
653  // with huge iteration counts, which is a compile time problem even if it's
654  // not a problem for code quality. Also, the code using this size may assume
655  // that each loop has at least three instructions (likely a conditional
656  // branch, a comparison feeding that branch, and some kind of loop increment
657  // feeding that comparison instruction).
658  LoopSize = std::max(LoopSize, BEInsns + 1);
659 
660  return LoopSize;
661 }
662 
663 // Returns the loop hint metadata node with the given name (for example,
664 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
665 // returned.
667  if (MDNode *LoopID = L->getLoopID())
668  return GetUnrollMetadata(LoopID, Name);
669  return nullptr;
670 }
671 
672 // Returns true if the loop has an unroll(full) pragma.
673 static bool HasUnrollFullPragma(const Loop *L) {
674  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
675 }
676 
677 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
678 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
679 static bool HasUnrollEnablePragma(const Loop *L) {
680  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
681 }
682 
683 // Returns true if the loop has an runtime unroll(disable) pragma.
684 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
685  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
686 }
687 
688 // If loop has an unroll_count pragma return the (necessarily
689 // positive) value from the pragma. Otherwise return 0.
690 static unsigned UnrollCountPragmaValue(const Loop *L) {
691  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
692  if (MD) {
693  assert(MD->getNumOperands() == 2 &&
694  "Unroll count hint metadata should have two operands.");
695  unsigned Count =
696  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
697  assert(Count >= 1 && "Unroll count must be positive.");
698  return Count;
699  }
700  return 0;
701 }
702 
703 // Computes the boosting factor for complete unrolling.
704 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
705 // be beneficial to fully unroll the loop even if unrolledcost is large. We
706 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
707 // the unroll threshold.
708 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
709  unsigned MaxPercentThresholdBoost) {
710  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
711  return 100;
712  else if (Cost.UnrolledCost != 0)
713  // The boosting factor is RolledDynamicCost / UnrolledCost
714  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
715  MaxPercentThresholdBoost);
716  else
717  return MaxPercentThresholdBoost;
718 }
719 
720 // Returns loop size estimation for unrolled loop.
721 static uint64_t getUnrolledLoopSize(
722  unsigned LoopSize,
724  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
725  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
726 }
727 
728 // Returns true if unroll count was set explicitly.
729 // Calculates unroll count and writes it to UP.Count.
730 // Unless IgnoreUser is true, will also use metadata and command-line options
731 // that are specific to to the LoopUnroll pass (which, for instance, are
732 // irrelevant for the LoopUnrollAndJam pass).
733 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
734 // many LoopUnroll-specific options. The shared functionality should be
735 // refactored into it own function.
737  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
738  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
739  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
740  unsigned &TripMultiple, unsigned LoopSize,
741  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
742 
743  // Check for explicit Count.
744  // 1st priority is unroll count set by "unroll-count" option.
745  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
746  if (UserUnrollCount) {
747  UP.Count = UnrollCount;
748  UP.AllowExpensiveTripCount = true;
749  UP.Force = true;
750  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
751  return true;
752  }
753 
754  // 2nd priority is unroll count set by pragma.
755  unsigned PragmaCount = UnrollCountPragmaValue(L);
756  if (PragmaCount > 0) {
757  UP.Count = PragmaCount;
758  UP.Runtime = true;
759  UP.AllowExpensiveTripCount = true;
760  UP.Force = true;
761  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
763  return true;
764  }
765  bool PragmaFullUnroll = HasUnrollFullPragma(L);
766  if (PragmaFullUnroll && TripCount != 0) {
767  UP.Count = TripCount;
768  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
769  return false;
770  }
771 
772  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
773  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
774  PragmaEnableUnroll || UserUnrollCount;
775 
776  if (ExplicitUnroll && TripCount != 0) {
777  // If the loop has an unrolling pragma, we want to be more aggressive with
778  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
779  // value which is larger than the default limits.
780  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
781  UP.PartialThreshold =
782  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
783  }
784 
785  // 3rd priority is full unroll count.
786  // Full unroll makes sense only when TripCount or its upper bound could be
787  // statically calculated.
788  // Also we need to check if we exceed FullUnrollMaxCount.
789  // If using the upper bound to unroll, TripMultiple should be set to 1 because
790  // we do not know when loop may exit.
791  // MaxTripCount and ExactTripCount cannot both be non zero since we only
792  // compute the former when the latter is zero.
793  unsigned ExactTripCount = TripCount;
794  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
795  "ExtractTripCount and MaxTripCount cannot both be non zero.");
796  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
797  UP.Count = FullUnrollTripCount;
798  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
799  // When computing the unrolled size, note that BEInsns are not replicated
800  // like the rest of the loop body.
801  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
802  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
803  TripCount = FullUnrollTripCount;
804  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
805  return ExplicitUnroll;
806  } else {
807  // The loop isn't that small, but we still can fully unroll it if that
808  // helps to remove a significant number of instructions.
809  // To check that, run additional analysis on the loop.
811  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
812  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
813  unsigned Boost =
815  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
816  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
817  TripCount = FullUnrollTripCount;
818  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
819  return ExplicitUnroll;
820  }
821  }
822  }
823  }
824 
825  // 4th priority is loop peeling.
826  computePeelCount(L, LoopSize, UP, TripCount, SE);
827  if (UP.PeelCount) {
828  UP.Runtime = false;
829  UP.Count = 1;
830  return ExplicitUnroll;
831  }
832 
833  // 5th priority is partial unrolling.
834  // Try partial unroll only when TripCount could be statically calculated.
835  if (TripCount) {
836  UP.Partial |= ExplicitUnroll;
837  if (!UP.Partial) {
838  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
839  << "-unroll-allow-partial not given\n");
840  UP.Count = 0;
841  return false;
842  }
843  if (UP.Count == 0)
844  UP.Count = TripCount;
845  if (UP.PartialThreshold != NoThreshold) {
846  // Reduce unroll count to be modulo of TripCount for partial unrolling.
847  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
848  UP.Count =
849  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
850  (LoopSize - UP.BEInsns);
851  if (UP.Count > UP.MaxCount)
852  UP.Count = UP.MaxCount;
853  while (UP.Count != 0 && TripCount % UP.Count != 0)
854  UP.Count--;
855  if (UP.AllowRemainder && UP.Count <= 1) {
856  // If there is no Count that is modulo of TripCount, set Count to
857  // largest power-of-two factor that satisfies the threshold limit.
858  // As we'll create fixup loop, do the type of unrolling only if
859  // remainder loop is allowed.
861  while (UP.Count != 0 &&
862  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
863  UP.Count >>= 1;
864  }
865  if (UP.Count < 2) {
866  if (PragmaEnableUnroll)
867  ORE->emit([&]() {
869  "UnrollAsDirectedTooLarge",
870  L->getStartLoc(), L->getHeader())
871  << "Unable to unroll loop as directed by unroll(enable) "
872  "pragma "
873  "because unrolled size is too large.";
874  });
875  UP.Count = 0;
876  }
877  } else {
878  UP.Count = TripCount;
879  }
880  if (UP.Count > UP.MaxCount)
881  UP.Count = UP.MaxCount;
882  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
883  UP.Count != TripCount)
884  ORE->emit([&]() {
885  return OptimizationRemarkMissed(DEBUG_TYPE,
886  "FullUnrollAsDirectedTooLarge",
887  L->getStartLoc(), L->getHeader())
888  << "Unable to fully unroll loop as directed by unroll pragma "
889  "because "
890  "unrolled size is too large.";
891  });
892  return ExplicitUnroll;
893  }
894  assert(TripCount == 0 &&
895  "All cases when TripCount is constant should be covered here.");
896  if (PragmaFullUnroll)
897  ORE->emit([&]() {
899  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
900  L->getStartLoc(), L->getHeader())
901  << "Unable to fully unroll loop as directed by unroll(full) "
902  "pragma "
903  "because loop has a runtime trip count.";
904  });
905 
906  // 6th priority is runtime unrolling.
907  // Don't unroll a runtime trip count loop when it is disabled.
909  UP.Count = 0;
910  return false;
911  }
912 
913  // Check if the runtime trip count is too small when profile is available.
914  if (L->getHeader()->getParent()->hasProfileData()) {
915  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
916  if (*ProfileTripCount < FlatLoopTripCountThreshold)
917  return false;
918  else
919  UP.AllowExpensiveTripCount = true;
920  }
921  }
922 
923  // Reduce count based on the type of unrolling and the threshold values.
924  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
925  if (!UP.Runtime) {
926  LLVM_DEBUG(
927  dbgs() << " will not try to unroll loop with runtime trip count "
928  << "-unroll-runtime not given\n");
929  UP.Count = 0;
930  return false;
931  }
932  if (UP.Count == 0)
934 
935  // Reduce unroll count to be the largest power-of-two factor of
936  // the original count which satisfies the threshold limit.
937  while (UP.Count != 0 &&
938  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
939  UP.Count >>= 1;
940 
941 #ifndef NDEBUG
942  unsigned OrigCount = UP.Count;
943 #endif
944 
945  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
946  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
947  UP.Count >>= 1;
948  LLVM_DEBUG(
949  dbgs() << "Remainder loop is restricted (that could architecture "
950  "specific or because the loop contains a convergent "
951  "instruction), so unroll count must divide the trip "
952  "multiple, "
953  << TripMultiple << ". Reducing unroll count from " << OrigCount
954  << " to " << UP.Count << ".\n");
955 
956  using namespace ore;
957 
958  if (PragmaCount > 0 && !UP.AllowRemainder)
959  ORE->emit([&]() {
961  "DifferentUnrollCountFromDirected",
962  L->getStartLoc(), L->getHeader())
963  << "Unable to unroll loop the number of times directed by "
964  "unroll_count pragma because remainder loop is restricted "
965  "(that could architecture specific or because the loop "
966  "contains a convergent instruction) and so must have an "
967  "unroll "
968  "count that divides the loop trip multiple of "
969  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
970  << NV("UnrollCount", UP.Count) << " time(s).";
971  });
972  }
973 
974  if (UP.Count > UP.MaxCount)
975  UP.Count = UP.MaxCount;
976  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
977  << "\n");
978  if (UP.Count < 2)
979  UP.Count = 0;
980  return ExplicitUnroll;
981 }
982 
984  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
985  const TargetTransformInfo &TTI, AssumptionCache &AC,
987  ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
988  bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
989  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
990  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
991  Optional<bool> ProvidedAllowPeeling,
992  Optional<bool> ProvidedAllowProfileBasedPeeling,
993  Optional<unsigned> ProvidedFullUnrollMaxCount) {
994  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
995  << L->getHeader()->getParent()->getName() << "] Loop %"
996  << L->getHeader()->getName() << "\n");
998  if (TM & TM_Disable)
1000  if (!L->isLoopSimplifyForm()) {
1001  LLVM_DEBUG(
1002  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1004  }
1005 
1006  // When automtatic unrolling is disabled, do not unroll unless overridden for
1007  // this loop.
1008  if (OnlyWhenForced && !(TM & TM_Enable))
1010 
1011  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1012  unsigned NumInlineCandidates;
1013  bool NotDuplicatable;
1014  bool Convergent;
1016  L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
1017  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1018  ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
1019  ProvidedFullUnrollMaxCount);
1020 
1021  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1022  // as threshold later on.
1023  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1024  !OptForSize)
1026 
1028  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1029 
1030  unsigned LoopSize =
1031  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1032  TTI, EphValues, UP.BEInsns);
1033  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1034  if (NotDuplicatable) {
1035  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1036  << " instructions.\n");
1038  }
1039 
1040  // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1041  // later), to (fully) unroll loops, if it does not increase code size.
1042  if (OptForSize)
1043  UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1044 
1045  if (NumInlineCandidates != 0) {
1046  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1048  }
1049 
1050  // Find trip count and trip multiple if count is not available
1051  unsigned TripCount = 0;
1052  unsigned MaxTripCount = 0;
1053  unsigned TripMultiple = 1;
1054  // If there are multiple exiting blocks but one of them is the latch, use the
1055  // latch for the trip count estimation. Otherwise insist on a single exiting
1056  // block for the trip count estimation.
1057  BasicBlock *ExitingBlock = L->getLoopLatch();
1058  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1059  ExitingBlock = L->getExitingBlock();
1060  if (ExitingBlock) {
1061  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1062  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1063  }
1064 
1065  // If the loop contains a convergent operation, the prelude we'd add
1066  // to do the first few instructions before we hit the unrolled loop
1067  // is unsafe -- it adds a control-flow dependency to the convergent
1068  // operation. Therefore restrict remainder loop (try unrollig without).
1069  //
1070  // TODO: This is quite conservative. In practice, convergent_op()
1071  // is likely to be called unconditionally in the loop. In this
1072  // case, the program would be ill-formed (on most architectures)
1073  // unless n were the same on all threads in a thread group.
1074  // Assuming n is the same on all threads, any kind of unrolling is
1075  // safe. But currently llvm's notion of convergence isn't powerful
1076  // enough to express this.
1077  if (Convergent)
1078  UP.AllowRemainder = false;
1079 
1080  // Try to find the trip count upper bound if we cannot find the exact trip
1081  // count.
1082  bool MaxOrZero = false;
1083  if (!TripCount) {
1084  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1085  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1086  // We can unroll by the upper bound amount if it's generally allowed or if
1087  // we know that the loop is executed either the upper bound or zero times.
1088  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1089  // loop tests remains the same compared to the non-unrolled version, whereas
1090  // the generic upper bound unrolling keeps all but the last loop test so the
1091  // number of loop tests goes up which may end up being worse on targets with
1092  // constrained branch predictor resources so is controlled by an option.)
1093  // In addition we only unroll small upper bounds.
1094  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1095  MaxTripCount = 0;
1096  }
1097  }
1098 
1099  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1100  // fully unroll the loop.
1101  bool UseUpperBound = false;
1102  bool IsCountSetExplicitly = computeUnrollCount(
1103  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1104  TripMultiple, LoopSize, UP, UseUpperBound);
1105  if (!UP.Count)
1107  // Unroll factor (Count) must be less or equal to TripCount.
1108  if (TripCount && UP.Count > TripCount)
1109  UP.Count = TripCount;
1110 
1111  // Save loop properties before it is transformed.
1112  MDNode *OrigLoopID = L->getLoopID();
1113 
1114  // Unroll the loop.
1115  Loop *RemainderLoop = nullptr;
1116  LoopUnrollResult UnrollResult = UnrollLoop(
1117  L,
1118  {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1119  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1120  ForgetAllSCEV},
1121  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1122  if (UnrollResult == LoopUnrollResult::Unmodified)
1124 
1125  if (RemainderLoop) {
1126  Optional<MDNode *> RemainderLoopID =
1129  if (RemainderLoopID.hasValue())
1130  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1131  }
1132 
1133  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1134  Optional<MDNode *> NewLoopID =
1137  if (NewLoopID.hasValue()) {
1138  L->setLoopID(NewLoopID.getValue());
1139 
1140  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1141  // explicitly.
1142  return UnrollResult;
1143  }
1144  }
1145 
1146  // If loop has an unroll count pragma or unrolled by explicitly set count
1147  // mark loop as unrolled to prevent unrolling beyond that requested.
1148  // If the loop was peeled, we already "used up" the profile information
1149  // we had, so we don't want to unroll or peel again.
1150  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1151  (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
1153 
1154  return UnrollResult;
1155 }
1156 
1157 namespace {
1158 
1159 class LoopUnroll : public LoopPass {
1160 public:
1161  static char ID; // Pass ID, replacement for typeid
1162 
1163  int OptLevel;
1164 
1165  /// If false, use a cost model to determine whether unrolling of a loop is
1166  /// profitable. If true, only loops that explicitly request unrolling via
1167  /// metadata are considered. All other loops are skipped.
1168  bool OnlyWhenForced;
1169 
1170  /// If false, when SCEV is invalidated, only forget everything in the
1171  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1172  /// Otherwise, forgetAllLoops and rebuild when needed next.
1173  bool ForgetAllSCEV;
1174 
1175  Optional<unsigned> ProvidedCount;
1176  Optional<unsigned> ProvidedThreshold;
1177  Optional<bool> ProvidedAllowPartial;
1178  Optional<bool> ProvidedRuntime;
1179  Optional<bool> ProvidedUpperBound;
1180  Optional<bool> ProvidedAllowPeeling;
1181  Optional<bool> ProvidedAllowProfileBasedPeeling;
1182  Optional<unsigned> ProvidedFullUnrollMaxCount;
1183 
1184  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1185  bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1186  Optional<unsigned> Count = None,
1187  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1188  Optional<bool> UpperBound = None,
1189  Optional<bool> AllowPeeling = None,
1190  Optional<bool> AllowProfileBasedPeeling = None,
1191  Optional<unsigned> ProvidedFullUnrollMaxCount = None)
1192  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1193  ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1194  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1195  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1196  ProvidedAllowPeeling(AllowPeeling),
1197  ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1198  ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1200  }
1201 
1202  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1203  if (skipLoop(L))
1204  return false;
1205 
1206  Function &F = *L->getHeader()->getParent();
1207 
1208  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1209  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1210  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1211  const TargetTransformInfo &TTI =
1212  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1213  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1214  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1215  // pass. Function analyses need to be preserved across loop transformations
1216  // but ORE cannot be preserved (see comment before the pass definition).
1217  OptimizationRemarkEmitter ORE(&F);
1218  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1219 
1221  L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1222  OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
1223  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1224  ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
1225  ProvidedFullUnrollMaxCount);
1226 
1227  if (Result == LoopUnrollResult::FullyUnrolled)
1228  LPM.markLoopAsDeleted(*L);
1229 
1230  return Result != LoopUnrollResult::Unmodified;
1231  }
1232 
1233  /// This transformation requires natural loop information & requires that
1234  /// loop preheaders be inserted into the CFG...
1235  void getAnalysisUsage(AnalysisUsage &AU) const override {
1238  // FIXME: Loop passes are required to preserve domtree, and for now we just
1239  // recreate dom info if anything gets unrolled.
1241  }
1242 };
1243 
1244 } // end anonymous namespace
1245 
1246 char LoopUnroll::ID = 0;
1247 
1248 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1252 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1253 
1254 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1255  bool ForgetAllSCEV, int Threshold, int Count,
1256  int AllowPartial, int Runtime, int UpperBound,
1257  int AllowPeeling) {
1258  // TODO: It would make more sense for this function to take the optionals
1259  // directly, but that's dangerous since it would silently break out of tree
1260  // callers.
1261  return new LoopUnroll(
1262  OptLevel, OnlyWhenForced, ForgetAllSCEV,
1263  Threshold == -1 ? None : Optional<unsigned>(Threshold),
1264  Count == -1 ? None : Optional<unsigned>(Count),
1265  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1266  Runtime == -1 ? None : Optional<bool>(Runtime),
1267  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1268  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1269 }
1270 
1271 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1272  bool ForgetAllSCEV) {
1273  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1274  0, 0, 0, 0);
1275 }
1276 
1279  LPMUpdater &Updater) {
1280  const auto &FAM =
1281  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1282  Function *F = L.getHeader()->getParent();
1283 
1284  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1285  // FIXME: This should probably be optional rather than required.
1286  if (!ORE)
1288  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1289  "cached at a higher level");
1290 
1291  // Keep track of the previous loop structure so we can identify new loops
1292  // created by unrolling.
1293  Loop *ParentL = L.getParentLoop();
1294  SmallPtrSet<Loop *, 4> OldLoops;
1295  if (ParentL)
1296  OldLoops.insert(ParentL->begin(), ParentL->end());
1297  else
1298  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1299 
1300  std::string LoopName = L.getName();
1301 
1302  bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1303  /*BFI*/ nullptr, /*PSI*/ nullptr,
1304  /*PreserveLCSSA*/ true, OptLevel,
1305  OnlyWhenForced, ForgetSCEV, /*Count*/ None,
1306  /*Threshold*/ None, /*AllowPartial*/ false,
1307  /*Runtime*/ false, /*UpperBound*/ false,
1308  /*AllowPeeling*/ false,
1309  /*AllowProfileBasedPeeling*/ false,
1310  /*FullUnrollMaxCount*/ None) !=
1312  if (!Changed)
1313  return PreservedAnalyses::all();
1314 
1315  // The parent must not be damaged by unrolling!
1316 #ifndef NDEBUG
1317  if (ParentL)
1318  ParentL->verifyLoop();
1319 #endif
1320 
1321  // Unrolling can do several things to introduce new loops into a loop nest:
1322  // - Full unrolling clones child loops within the current loop but then
1323  // removes the current loop making all of the children appear to be new
1324  // sibling loops.
1325  //
1326  // When a new loop appears as a sibling loop after fully unrolling,
1327  // its nesting structure has fundamentally changed and we want to revisit
1328  // it to reflect that.
1329  //
1330  // When unrolling has removed the current loop, we need to tell the
1331  // infrastructure that it is gone.
1332  //
1333  // Finally, we support a debugging/testing mode where we revisit child loops
1334  // as well. These are not expected to require further optimizations as either
1335  // they or the loop they were cloned from have been directly visited already.
1336  // But the debugging mode allows us to check this assumption.
1337  bool IsCurrentLoopValid = false;
1338  SmallVector<Loop *, 4> SibLoops;
1339  if (ParentL)
1340  SibLoops.append(ParentL->begin(), ParentL->end());
1341  else
1342  SibLoops.append(AR.LI.begin(), AR.LI.end());
1343  erase_if(SibLoops, [&](Loop *SibLoop) {
1344  if (SibLoop == &L) {
1345  IsCurrentLoopValid = true;
1346  return true;
1347  }
1348 
1349  // Otherwise erase the loop from the list if it was in the old loops.
1350  return OldLoops.count(SibLoop) != 0;
1351  });
1352  Updater.addSiblingLoops(SibLoops);
1353 
1354  if (!IsCurrentLoopValid) {
1355  Updater.markLoopAsDeleted(L, LoopName);
1356  } else {
1357  // We can only walk child loops if the current loop remained valid.
1359  // Walk *all* of the child loops.
1360  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1361  Updater.addChildLoops(ChildLoops);
1362  }
1363  }
1364 
1366 }
1367 
1368 template <typename RangeT>
1370  SmallVector<Loop *, 8> Worklist;
1371  // We use an internal worklist to build up the preorder traversal without
1372  // recursion.
1373  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1374 
1375  for (Loop *RootL : Loops) {
1376  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1377  assert(PreOrderWorklist.empty() &&
1378  "Must start with an empty preorder walk worklist.");
1379  PreOrderWorklist.push_back(RootL);
1380  do {
1381  Loop *L = PreOrderWorklist.pop_back_val();
1382  PreOrderWorklist.append(L->begin(), L->end());
1383  PreOrderLoops.push_back(L);
1384  } while (!PreOrderWorklist.empty());
1385 
1386  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1387  PreOrderLoops.clear();
1388  }
1389  return Worklist;
1390 }
1391 
1394  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1395  auto &LI = AM.getResult<LoopAnalysis>(F);
1396  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1397  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1398  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1400 
1401  LoopAnalysisManager *LAM = nullptr;
1402  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1403  LAM = &LAMProxy->getManager();
1404 
1405  const ModuleAnalysisManager &MAM =
1407  ProfileSummaryInfo *PSI =
1409  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1410  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1411 
1412  bool Changed = false;
1413 
1414  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1415  // Since simplification may add new inner loops, it has to run before the
1416  // legality and profitability checks. This means running the loop unroller
1417  // will simplify all loops, regardless of whether anything end up being
1418  // unrolled.
1419  for (auto &L : LI) {
1420  Changed |=
1421  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1422  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1423  }
1424 
1426 
1427  while (!Worklist.empty()) {
1428  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1429  // from back to front so that we work forward across the CFG, which
1430  // for unrolling is only needed to get optimization remarks emitted in
1431  // a forward order.
1432  Loop &L = *Worklist.pop_back_val();
1433 #ifndef NDEBUG
1434  Loop *ParentL = L.getParentLoop();
1435 #endif
1436 
1437  // Check if the profile summary indicates that the profiled application
1438  // has a huge working set size, in which case we disable peeling to avoid
1439  // bloating it further.
1440  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1441  if (PSI && PSI->hasHugeWorkingSetSize())
1442  LocalAllowPeeling = false;
1443  std::string LoopName = L.getName();
1444  // The API here is quite complex to call and we allow to select some
1445  // flavors of unrolling during construction time (by setting UnrollOpts).
1447  &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1448  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1449  UnrollOpts.ForgetSCEV, /*Count*/ None,
1450  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1451  UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1452  UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
1453  Changed |= Result != LoopUnrollResult::Unmodified;
1454 
1455  // The parent must not be damaged by unrolling!
1456 #ifndef NDEBUG
1457  if (Result != LoopUnrollResult::Unmodified && ParentL)
1458  ParentL->verifyLoop();
1459 #endif
1460 
1461  // Clear any cached analysis results for L if we removed it completely.
1462  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1463  LAM->clear(L, LoopName);
1464  }
1465 
1466  if (!Changed)
1467  return PreservedAnalyses::all();
1468 
1470 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false)
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:209
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
cl::opt< bool > ForgetSCEVInLoopUnroll
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:47
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:444
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:68
Metadata node.
Definition: Metadata.h:863
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:44
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if function F is suggested to be size-optimized base on the profile. ...
Definition: SizeOpts.cpp:23
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:42
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4428
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Hexagon Hardware Loops
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
bool AllowPeeling
Allow peeling off loop iterations.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1183
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:105
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:228
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:509
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling, Optional< bool > ProvidedAllowProfileBasedPeeling, Optional< unsigned > ProvidedFullUnrollMaxCount)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool hasProfileSummary()
Returns true if profile summary is available.
The loop was fully unrolled into straight-line code.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:208
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:904
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:521
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:467
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:237
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:679
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:607
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
The transformation should not be applied.
Definition: LoopUtils.h:231
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
iterator begin() const
Definition: LoopInfo.h:147
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1332
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:302
iterator begin() const
Definition: LoopInfo.h:903
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:962
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:460
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:485
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:827
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:276
TransformationMode hasUnrollTransformation(Loop *L)
Definition: LoopUtils.cpp:391
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:138
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
iterator end() const
Definition: LoopInfo.h:148
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:222
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
bool empty() const
Definition: LoopInfo.h:151
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:279
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
The optimization diagnostic interface.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53