LLVM  9.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constant.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
55 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <limits>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-unroll"
73 
74 static cl::opt<unsigned>
75  UnrollThreshold("unroll-threshold", cl::Hidden,
76  cl::desc("The cost threshold for loop unrolling"));
77 
79  "unroll-partial-threshold", cl::Hidden,
80  cl::desc("The cost threshold for partial loop unrolling"));
81 
83  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
84  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
85  "to the threshold when aggressively unrolling a loop due to the "
86  "dynamic cost savings. If completely unrolling a loop will reduce "
87  "the total runtime from X to Y, we boost the loop unroll "
88  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
89  "X/Y). This limit avoids excessive code bloat."));
90 
92  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
93  cl::desc("Don't allow loop unrolling to simulate more than this number of"
94  "iterations when checking full unroll profitability"));
95 
97  "unroll-count", cl::Hidden,
98  cl::desc("Use this unroll count for all loops including those with "
99  "unroll_count pragma values, for testing purposes"));
100 
102  "unroll-max-count", cl::Hidden,
103  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
104  "testing purposes"));
105 
107  "unroll-full-max-count", cl::Hidden,
108  cl::desc(
109  "Set the max unroll count for full unrolling, for testing purposes"));
110 
112  "unroll-peel-count", cl::Hidden,
113  cl::desc("Set the unroll peeling count, for testing purposes"));
114 
115 static cl::opt<bool>
116  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
117  cl::desc("Allows loops to be partially unrolled until "
118  "-unroll-threshold loop size is reached."));
119 
121  "unroll-allow-remainder", cl::Hidden,
122  cl::desc("Allow generation of a loop remainder (extra iterations) "
123  "when unrolling a loop."));
124 
125 static cl::opt<bool>
126  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
127  cl::desc("Unroll loops with run-time trip counts"));
128 
130  "unroll-max-upperbound", cl::init(8), cl::Hidden,
131  cl::desc(
132  "The max of trip count upper bound that is considered in unrolling"));
133 
135  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
136  cl::desc("Unrolled size limit for loops with an unroll(full) or "
137  "unroll_count pragma."));
138 
140  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
141  cl::desc("If the runtime tripcount for the loop is lower than the "
142  "threshold, the loop is considered as flat and will be less "
143  "aggressively unrolled."));
144 
145 static cl::opt<bool>
146  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
147  cl::desc("Allows loops to be peeled when the dynamic "
148  "trip count is known to be low."));
149 
151  "unroll-remainder", cl::Hidden,
152  cl::desc("Allow the loop remainder to be unrolled."));
153 
154 // This option isn't ever intended to be enabled, it serves to allow
155 // experiments to check the assumptions about when this kind of revisit is
156 // necessary.
158  "unroll-revisit-child-loops", cl::Hidden,
159  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
160  "This shouldn't typically be needed as child loops (or their "
161  "clones) were already visited."));
162 
163 /// A magic value for use with the Threshold parameter to indicate
164 /// that the loop unroll should be performed regardless of how much
165 /// code expansion would result.
167 
168 /// Gather the various unrolling parameters based on the defaults, compiler
169 /// flags, TTI overrides and user specified parameters.
171  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
172  BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
173  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
174  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
175  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
177 
178  // Set up the defaults
179  UP.Threshold = OptLevel > 2 ? 300 : 150;
180  UP.MaxPercentThresholdBoost = 400;
181  UP.OptSizeThreshold = 0;
182  UP.PartialThreshold = 150;
184  UP.Count = 0;
185  UP.PeelCount = 0;
189  UP.BEInsns = 2;
190  UP.Partial = false;
191  UP.Runtime = false;
192  UP.AllowRemainder = true;
193  UP.UnrollRemainder = false;
194  UP.AllowExpensiveTripCount = false;
195  UP.Force = false;
196  UP.UpperBound = false;
197  UP.AllowPeeling = true;
198  UP.UnrollAndJam = false;
200 
201  // Override with any target specific settings
202  TTI.getUnrollingPreferences(L, SE, UP);
203 
204  // Apply size attributes
205  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
207  if (OptForSize) {
208  UP.Threshold = UP.OptSizeThreshold;
210  UP.MaxPercentThresholdBoost = 100;
211  }
212 
213  // Apply any user values specified by cl::opt
214  if (UnrollThreshold.getNumOccurrences() > 0)
216  if (UnrollPartialThreshold.getNumOccurrences() > 0)
218  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
220  if (UnrollMaxCount.getNumOccurrences() > 0)
222  if (UnrollFullMaxCount.getNumOccurrences() > 0)
224  if (UnrollPeelCount.getNumOccurrences() > 0)
226  if (UnrollAllowPartial.getNumOccurrences() > 0)
228  if (UnrollAllowRemainder.getNumOccurrences() > 0)
230  if (UnrollRuntime.getNumOccurrences() > 0)
231  UP.Runtime = UnrollRuntime;
232  if (UnrollMaxUpperBound == 0)
233  UP.UpperBound = false;
234  if (UnrollAllowPeeling.getNumOccurrences() > 0)
236  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
238 
239  // Apply user values provided by argument
240  if (UserThreshold.hasValue()) {
241  UP.Threshold = *UserThreshold;
242  UP.PartialThreshold = *UserThreshold;
243  }
244  if (UserCount.hasValue())
245  UP.Count = *UserCount;
246  if (UserAllowPartial.hasValue())
247  UP.Partial = *UserAllowPartial;
248  if (UserRuntime.hasValue())
249  UP.Runtime = *UserRuntime;
250  if (UserUpperBound.hasValue())
251  UP.UpperBound = *UserUpperBound;
252  if (UserAllowPeeling.hasValue())
253  UP.AllowPeeling = *UserAllowPeeling;
254 
255  return UP;
256 }
257 
258 namespace {
259 
260 /// A struct to densely store the state of an instruction after unrolling at
261 /// each iteration.
262 ///
263 /// This is designed to work like a tuple of <Instruction *, int> for the
264 /// purposes of hashing and lookup, but to be able to associate two boolean
265 /// states with each key.
266 struct UnrolledInstState {
267  Instruction *I;
268  int Iteration : 30;
269  unsigned IsFree : 1;
270  unsigned IsCounted : 1;
271 };
272 
273 /// Hashing and equality testing for a set of the instruction states.
274 struct UnrolledInstStateKeyInfo {
275  using PtrInfo = DenseMapInfo<Instruction *>;
277 
278  static inline UnrolledInstState getEmptyKey() {
279  return {PtrInfo::getEmptyKey(), 0, 0, 0};
280  }
281 
282  static inline UnrolledInstState getTombstoneKey() {
283  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
284  }
285 
286  static inline unsigned getHashValue(const UnrolledInstState &S) {
287  return PairInfo::getHashValue({S.I, S.Iteration});
288  }
289 
290  static inline bool isEqual(const UnrolledInstState &LHS,
291  const UnrolledInstState &RHS) {
292  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
293  }
294 };
295 
296 struct EstimatedUnrollCost {
297  /// The estimated cost after unrolling.
298  unsigned UnrolledCost;
299 
300  /// The estimated dynamic cost of executing the instructions in the
301  /// rolled form.
302  unsigned RolledDynamicCost;
303 };
304 
305 } // end anonymous namespace
306 
307 /// Figure out if the loop is worth full unrolling.
308 ///
309 /// Complete loop unrolling can make some loads constant, and we need to know
310 /// if that would expose any further optimization opportunities. This routine
311 /// estimates this optimization. It computes cost of unrolled loop
312 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
313 /// dynamic cost we mean that we won't count costs of blocks that are known not
314 /// to be executed (i.e. if we have a branch in the loop and we know that at the
315 /// given iteration its condition would be resolved to true, we won't add up the
316 /// cost of the 'false'-block).
317 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
318 /// the analysis failed (no benefits expected from the unrolling, or the loop is
319 /// too big to analyze), the returned value is None.
321  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
322  const SmallPtrSetImpl<const Value *> &EphValues,
323  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
324  // We want to be able to scale offsets by the trip count and add more offsets
325  // to them without checking for overflows, and we already don't want to
326  // analyze *massive* trip counts, so we force the max to be reasonably small.
328  (unsigned)(std::numeric_limits<int>::max() / 2) &&
329  "The unroll iterations max is too large!");
330 
331  // Only analyze inner loops. We can't properly estimate cost of nested loops
332  // and we won't visit inner loops again anyway.
333  if (!L->empty())
334  return None;
335 
336  // Don't simulate loops with a big or unknown tripcount
337  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
339  return None;
340 
343  DenseMap<Value *, Constant *> SimplifiedValues;
344  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
345 
346  // The estimated cost of the unrolled form of the loop. We try to estimate
347  // this by simplifying as much as we can while computing the estimate.
348  unsigned UnrolledCost = 0;
349 
350  // We also track the estimated dynamic (that is, actually executed) cost in
351  // the rolled form. This helps identify cases when the savings from unrolling
352  // aren't just exposing dead control flows, but actual reduced dynamic
353  // instructions due to the simplifications which we expect to occur after
354  // unrolling.
355  unsigned RolledDynamicCost = 0;
356 
357  // We track the simplification of each instruction in each iteration. We use
358  // this to recursively merge costs into the unrolled cost on-demand so that
359  // we don't count the cost of any dead code. This is essentially a map from
360  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
362 
363  // A small worklist used to accumulate cost of instructions from each
364  // observable and reached root in the loop.
365  SmallVector<Instruction *, 16> CostWorklist;
366 
367  // PHI-used worklist used between iterations while accumulating cost.
368  SmallVector<Instruction *, 4> PHIUsedList;
369 
370  // Helper function to accumulate cost for instructions in the loop.
371  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
372  assert(Iteration >= 0 && "Cannot have a negative iteration!");
373  assert(CostWorklist.empty() && "Must start with an empty cost list");
374  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
375  CostWorklist.push_back(&RootI);
376  for (;; --Iteration) {
377  do {
378  Instruction *I = CostWorklist.pop_back_val();
379 
380  // InstCostMap only uses I and Iteration as a key, the other two values
381  // don't matter here.
382  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
383  if (CostIter == InstCostMap.end())
384  // If an input to a PHI node comes from a dead path through the loop
385  // we may have no cost data for it here. What that actually means is
386  // that it is free.
387  continue;
388  auto &Cost = *CostIter;
389  if (Cost.IsCounted)
390  // Already counted this instruction.
391  continue;
392 
393  // Mark that we are counting the cost of this instruction now.
394  Cost.IsCounted = true;
395 
396  // If this is a PHI node in the loop header, just add it to the PHI set.
397  if (auto *PhiI = dyn_cast<PHINode>(I))
398  if (PhiI->getParent() == L->getHeader()) {
399  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
400  "inherently simplify during unrolling.");
401  if (Iteration == 0)
402  continue;
403 
404  // Push the incoming value from the backedge into the PHI used list
405  // if it is an in-loop instruction. We'll use this to populate the
406  // cost worklist for the next iteration (as we count backwards).
407  if (auto *OpI = dyn_cast<Instruction>(
408  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
409  if (L->contains(OpI))
410  PHIUsedList.push_back(OpI);
411  continue;
412  }
413 
414  // First accumulate the cost of this instruction.
415  if (!Cost.IsFree) {
416  UnrolledCost += TTI.getUserCost(I);
417  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
418  << Iteration << "): ");
419  LLVM_DEBUG(I->dump());
420  }
421 
422  // We must count the cost of every operand which is not free,
423  // recursively. If we reach a loop PHI node, simply add it to the set
424  // to be considered on the next iteration (backwards!).
425  for (Value *Op : I->operands()) {
426  // Check whether this operand is free due to being a constant or
427  // outside the loop.
428  auto *OpI = dyn_cast<Instruction>(Op);
429  if (!OpI || !L->contains(OpI))
430  continue;
431 
432  // Otherwise accumulate its cost.
433  CostWorklist.push_back(OpI);
434  }
435  } while (!CostWorklist.empty());
436 
437  if (PHIUsedList.empty())
438  // We've exhausted the search.
439  break;
440 
441  assert(Iteration > 0 &&
442  "Cannot track PHI-used values past the first iteration!");
443  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
444  PHIUsedList.clear();
445  }
446  };
447 
448  // Ensure that we don't violate the loop structure invariants relied on by
449  // this analysis.
450  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
451  assert(L->isLCSSAForm(DT) &&
452  "Must have loops in LCSSA form to track live-out values.");
453 
454  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
455 
456  // Simulate execution of each iteration of the loop counting instructions,
457  // which would be simplified.
458  // Since the same load will take different values on different iterations,
459  // we literally have to go through all loop's iterations.
460  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
461  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
462 
463  // Prepare for the iteration by collecting any simplified entry or backedge
464  // inputs.
465  for (Instruction &I : *L->getHeader()) {
466  auto *PHI = dyn_cast<PHINode>(&I);
467  if (!PHI)
468  break;
469 
470  // The loop header PHI nodes must have exactly two input: one from the
471  // loop preheader and one from the loop latch.
472  assert(
473  PHI->getNumIncomingValues() == 2 &&
474  "Must have an incoming value only for the preheader and the latch.");
475 
476  Value *V = PHI->getIncomingValueForBlock(
477  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
478  Constant *C = dyn_cast<Constant>(V);
479  if (Iteration != 0 && !C)
480  C = SimplifiedValues.lookup(V);
481  if (C)
482  SimplifiedInputValues.push_back({PHI, C});
483  }
484 
485  // Now clear and re-populate the map for the next iteration.
486  SimplifiedValues.clear();
487  while (!SimplifiedInputValues.empty())
488  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
489 
490  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
491 
492  BBWorklist.clear();
493  BBWorklist.insert(L->getHeader());
494  // Note that we *must not* cache the size, this loop grows the worklist.
495  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
496  BasicBlock *BB = BBWorklist[Idx];
497 
498  // Visit all instructions in the given basic block and try to simplify
499  // it. We don't change the actual IR, just count optimization
500  // opportunities.
501  for (Instruction &I : *BB) {
502  // These won't get into the final code - don't even try calculating the
503  // cost for them.
504  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
505  continue;
506 
507  // Track this instruction's expected baseline cost when executing the
508  // rolled loop form.
509  RolledDynamicCost += TTI.getUserCost(&I);
510 
511  // Visit the instruction to analyze its loop cost after unrolling,
512  // and if the visitor returns true, mark the instruction as free after
513  // unrolling and continue.
514  bool IsFree = Analyzer.visit(I);
515  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
516  (unsigned)IsFree,
517  /*IsCounted*/ false}).second;
518  (void)Inserted;
519  assert(Inserted && "Cannot have a state for an unvisited instruction!");
520 
521  if (IsFree)
522  continue;
523 
524  // Can't properly model a cost of a call.
525  // FIXME: With a proper cost model we should be able to do it.
526  if (auto *CI = dyn_cast<CallInst>(&I)) {
527  const Function *Callee = CI->getCalledFunction();
528  if (!Callee || TTI.isLoweredToCall(Callee)) {
529  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
530  return None;
531  }
532  }
533 
534  // If the instruction might have a side-effect recursively account for
535  // the cost of it and all the instructions leading up to it.
536  if (I.mayHaveSideEffects())
537  AddCostRecursively(I, Iteration);
538 
539  // If unrolled body turns out to be too big, bail out.
540  if (UnrolledCost > MaxUnrolledLoopSize) {
541  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
542  << " UnrolledCost: " << UnrolledCost
543  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
544  << "\n");
545  return None;
546  }
547  }
548 
549  Instruction *TI = BB->getTerminator();
550 
551  // Add in the live successors by first checking whether we have terminator
552  // that may be simplified based on the values simplified by this call.
553  BasicBlock *KnownSucc = nullptr;
554  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
555  if (BI->isConditional()) {
556  if (Constant *SimpleCond =
557  SimplifiedValues.lookup(BI->getCondition())) {
558  // Just take the first successor if condition is undef
559  if (isa<UndefValue>(SimpleCond))
560  KnownSucc = BI->getSuccessor(0);
561  else if (ConstantInt *SimpleCondVal =
562  dyn_cast<ConstantInt>(SimpleCond))
563  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
564  }
565  }
566  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
567  if (Constant *SimpleCond =
568  SimplifiedValues.lookup(SI->getCondition())) {
569  // Just take the first successor if condition is undef
570  if (isa<UndefValue>(SimpleCond))
571  KnownSucc = SI->getSuccessor(0);
572  else if (ConstantInt *SimpleCondVal =
573  dyn_cast<ConstantInt>(SimpleCond))
574  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
575  }
576  }
577  if (KnownSucc) {
578  if (L->contains(KnownSucc))
579  BBWorklist.insert(KnownSucc);
580  else
581  ExitWorklist.insert({BB, KnownSucc});
582  continue;
583  }
584 
585  // Add BB's successors to the worklist.
586  for (BasicBlock *Succ : successors(BB))
587  if (L->contains(Succ))
588  BBWorklist.insert(Succ);
589  else
590  ExitWorklist.insert({BB, Succ});
591  AddCostRecursively(*TI, Iteration);
592  }
593 
594  // If we found no optimization opportunities on the first iteration, we
595  // won't find them on later ones too.
596  if (UnrolledCost == RolledDynamicCost) {
597  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
598  << " UnrolledCost: " << UnrolledCost << "\n");
599  return None;
600  }
601  }
602 
603  while (!ExitWorklist.empty()) {
604  BasicBlock *ExitingBB, *ExitBB;
605  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
606 
607  for (Instruction &I : *ExitBB) {
608  auto *PN = dyn_cast<PHINode>(&I);
609  if (!PN)
610  break;
611 
612  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
613  if (auto *OpI = dyn_cast<Instruction>(Op))
614  if (L->contains(OpI))
615  AddCostRecursively(*OpI, TripCount - 1);
616  }
617  }
618 
619  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
620  << "UnrolledCost: " << UnrolledCost << ", "
621  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
622  return {{UnrolledCost, RolledDynamicCost}};
623 }
624 
625 /// ApproximateLoopSize - Approximate the size of the loop.
627  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
628  const TargetTransformInfo &TTI,
629  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
631  for (BasicBlock *BB : L->blocks())
632  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
633  NumCalls = Metrics.NumInlineCandidates;
634  NotDuplicatable = Metrics.notDuplicatable;
635  Convergent = Metrics.convergent;
636 
637  unsigned LoopSize = Metrics.NumInsts;
638 
639  // Don't allow an estimate of size zero. This would allows unrolling of loops
640  // with huge iteration counts, which is a compile time problem even if it's
641  // not a problem for code quality. Also, the code using this size may assume
642  // that each loop has at least three instructions (likely a conditional
643  // branch, a comparison feeding that branch, and some kind of loop increment
644  // feeding that comparison instruction).
645  LoopSize = std::max(LoopSize, BEInsns + 1);
646 
647  return LoopSize;
648 }
649 
650 // Returns the loop hint metadata node with the given name (for example,
651 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
652 // returned.
654  if (MDNode *LoopID = L->getLoopID())
655  return GetUnrollMetadata(LoopID, Name);
656  return nullptr;
657 }
658 
659 // Returns true if the loop has an unroll(full) pragma.
660 static bool HasUnrollFullPragma(const Loop *L) {
661  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
662 }
663 
664 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
665 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
666 static bool HasUnrollEnablePragma(const Loop *L) {
667  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
668 }
669 
670 // Returns true if the loop has an runtime unroll(disable) pragma.
671 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
672  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
673 }
674 
675 // If loop has an unroll_count pragma return the (necessarily
676 // positive) value from the pragma. Otherwise return 0.
677 static unsigned UnrollCountPragmaValue(const Loop *L) {
678  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
679  if (MD) {
680  assert(MD->getNumOperands() == 2 &&
681  "Unroll count hint metadata should have two operands.");
682  unsigned Count =
683  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
684  assert(Count >= 1 && "Unroll count must be positive.");
685  return Count;
686  }
687  return 0;
688 }
689 
690 // Computes the boosting factor for complete unrolling.
691 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
692 // be beneficial to fully unroll the loop even if unrolledcost is large. We
693 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
694 // the unroll threshold.
695 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
696  unsigned MaxPercentThresholdBoost) {
697  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
698  return 100;
699  else if (Cost.UnrolledCost != 0)
700  // The boosting factor is RolledDynamicCost / UnrolledCost
701  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
702  MaxPercentThresholdBoost);
703  else
704  return MaxPercentThresholdBoost;
705 }
706 
707 // Returns loop size estimation for unrolled loop.
708 static uint64_t getUnrolledLoopSize(
709  unsigned LoopSize,
711  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
712  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
713 }
714 
715 // Returns true if unroll count was set explicitly.
716 // Calculates unroll count and writes it to UP.Count.
717 // Unless IgnoreUser is true, will also use metadata and command-line options
718 // that are specific to to the LoopUnroll pass (which, for instance, are
719 // irrelevant for the LoopUnrollAndJam pass).
720 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
721 // many LoopUnroll-specific options. The shared functionality should be
722 // refactored into it own function.
724  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
725  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
726  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
727  unsigned &TripMultiple, unsigned LoopSize,
728  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
729 
730  // Check for explicit Count.
731  // 1st priority is unroll count set by "unroll-count" option.
732  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
733  if (UserUnrollCount) {
734  UP.Count = UnrollCount;
735  UP.AllowExpensiveTripCount = true;
736  UP.Force = true;
737  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
738  return true;
739  }
740 
741  // 2nd priority is unroll count set by pragma.
742  unsigned PragmaCount = UnrollCountPragmaValue(L);
743  if (PragmaCount > 0) {
744  UP.Count = PragmaCount;
745  UP.Runtime = true;
746  UP.AllowExpensiveTripCount = true;
747  UP.Force = true;
748  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
750  return true;
751  }
752  bool PragmaFullUnroll = HasUnrollFullPragma(L);
753  if (PragmaFullUnroll && TripCount != 0) {
754  UP.Count = TripCount;
755  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
756  return false;
757  }
758 
759  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
760  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
761  PragmaEnableUnroll || UserUnrollCount;
762 
763  if (ExplicitUnroll && TripCount != 0) {
764  // If the loop has an unrolling pragma, we want to be more aggressive with
765  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
766  // value which is larger than the default limits.
767  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
768  UP.PartialThreshold =
769  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
770  }
771 
772  // 3rd priority is full unroll count.
773  // Full unroll makes sense only when TripCount or its upper bound could be
774  // statically calculated.
775  // Also we need to check if we exceed FullUnrollMaxCount.
776  // If using the upper bound to unroll, TripMultiple should be set to 1 because
777  // we do not know when loop may exit.
778  // MaxTripCount and ExactTripCount cannot both be non zero since we only
779  // compute the former when the latter is zero.
780  unsigned ExactTripCount = TripCount;
781  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
782  "ExtractTripCount and MaxTripCount cannot both be non zero.");
783  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
784  UP.Count = FullUnrollTripCount;
785  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
786  // When computing the unrolled size, note that BEInsns are not replicated
787  // like the rest of the loop body.
788  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
789  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
790  TripCount = FullUnrollTripCount;
791  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
792  return ExplicitUnroll;
793  } else {
794  // The loop isn't that small, but we still can fully unroll it if that
795  // helps to remove a significant number of instructions.
796  // To check that, run additional analysis on the loop.
798  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
799  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
800  unsigned Boost =
802  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
803  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
804  TripCount = FullUnrollTripCount;
805  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
806  return ExplicitUnroll;
807  }
808  }
809  }
810  }
811 
812  // 4th priority is loop peeling.
813  computePeelCount(L, LoopSize, UP, TripCount, SE);
814  if (UP.PeelCount) {
815  UP.Runtime = false;
816  UP.Count = 1;
817  return ExplicitUnroll;
818  }
819 
820  // 5th priority is partial unrolling.
821  // Try partial unroll only when TripCount could be statically calculated.
822  if (TripCount) {
823  UP.Partial |= ExplicitUnroll;
824  if (!UP.Partial) {
825  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
826  << "-unroll-allow-partial not given\n");
827  UP.Count = 0;
828  return false;
829  }
830  if (UP.Count == 0)
831  UP.Count = TripCount;
832  if (UP.PartialThreshold != NoThreshold) {
833  // Reduce unroll count to be modulo of TripCount for partial unrolling.
834  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
835  UP.Count =
836  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
837  (LoopSize - UP.BEInsns);
838  if (UP.Count > UP.MaxCount)
839  UP.Count = UP.MaxCount;
840  while (UP.Count != 0 && TripCount % UP.Count != 0)
841  UP.Count--;
842  if (UP.AllowRemainder && UP.Count <= 1) {
843  // If there is no Count that is modulo of TripCount, set Count to
844  // largest power-of-two factor that satisfies the threshold limit.
845  // As we'll create fixup loop, do the type of unrolling only if
846  // remainder loop is allowed.
848  while (UP.Count != 0 &&
849  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
850  UP.Count >>= 1;
851  }
852  if (UP.Count < 2) {
853  if (PragmaEnableUnroll)
854  ORE->emit([&]() {
856  "UnrollAsDirectedTooLarge",
857  L->getStartLoc(), L->getHeader())
858  << "Unable to unroll loop as directed by unroll(enable) "
859  "pragma "
860  "because unrolled size is too large.";
861  });
862  UP.Count = 0;
863  }
864  } else {
865  UP.Count = TripCount;
866  }
867  if (UP.Count > UP.MaxCount)
868  UP.Count = UP.MaxCount;
869  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
870  UP.Count != TripCount)
871  ORE->emit([&]() {
872  return OptimizationRemarkMissed(DEBUG_TYPE,
873  "FullUnrollAsDirectedTooLarge",
874  L->getStartLoc(), L->getHeader())
875  << "Unable to fully unroll loop as directed by unroll pragma "
876  "because "
877  "unrolled size is too large.";
878  });
879  return ExplicitUnroll;
880  }
881  assert(TripCount == 0 &&
882  "All cases when TripCount is constant should be covered here.");
883  if (PragmaFullUnroll)
884  ORE->emit([&]() {
886  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
887  L->getStartLoc(), L->getHeader())
888  << "Unable to fully unroll loop as directed by unroll(full) "
889  "pragma "
890  "because loop has a runtime trip count.";
891  });
892 
893  // 6th priority is runtime unrolling.
894  // Don't unroll a runtime trip count loop when it is disabled.
896  UP.Count = 0;
897  return false;
898  }
899 
900  // Check if the runtime trip count is too small when profile is available.
901  if (L->getHeader()->getParent()->hasProfileData()) {
902  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
903  if (*ProfileTripCount < FlatLoopTripCountThreshold)
904  return false;
905  else
906  UP.AllowExpensiveTripCount = true;
907  }
908  }
909 
910  // Reduce count based on the type of unrolling and the threshold values.
911  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
912  if (!UP.Runtime) {
913  LLVM_DEBUG(
914  dbgs() << " will not try to unroll loop with runtime trip count "
915  << "-unroll-runtime not given\n");
916  UP.Count = 0;
917  return false;
918  }
919  if (UP.Count == 0)
921 
922  // Reduce unroll count to be the largest power-of-two factor of
923  // the original count which satisfies the threshold limit.
924  while (UP.Count != 0 &&
925  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
926  UP.Count >>= 1;
927 
928 #ifndef NDEBUG
929  unsigned OrigCount = UP.Count;
930 #endif
931 
932  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
933  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
934  UP.Count >>= 1;
935  LLVM_DEBUG(
936  dbgs() << "Remainder loop is restricted (that could architecture "
937  "specific or because the loop contains a convergent "
938  "instruction), so unroll count must divide the trip "
939  "multiple, "
940  << TripMultiple << ". Reducing unroll count from " << OrigCount
941  << " to " << UP.Count << ".\n");
942 
943  using namespace ore;
944 
945  if (PragmaCount > 0 && !UP.AllowRemainder)
946  ORE->emit([&]() {
948  "DifferentUnrollCountFromDirected",
949  L->getStartLoc(), L->getHeader())
950  << "Unable to unroll loop the number of times directed by "
951  "unroll_count pragma because remainder loop is restricted "
952  "(that could architecture specific or because the loop "
953  "contains a convergent instruction) and so must have an "
954  "unroll "
955  "count that divides the loop trip multiple of "
956  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
957  << NV("UnrollCount", UP.Count) << " time(s).";
958  });
959  }
960 
961  if (UP.Count > UP.MaxCount)
962  UP.Count = UP.MaxCount;
963  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
964  << "\n");
965  if (UP.Count < 2)
966  UP.Count = 0;
967  return ExplicitUnroll;
968 }
969 
971  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
972  const TargetTransformInfo &TTI, AssumptionCache &AC,
975  bool PreserveLCSSA, int OptLevel,
976  bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
977  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
978  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
979  Optional<bool> ProvidedAllowPeeling) {
980  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
981  << L->getHeader()->getParent()->getName() << "] Loop %"
982  << L->getHeader()->getName() << "\n");
984  if (TM & TM_Disable)
986  if (!L->isLoopSimplifyForm()) {
987  LLVM_DEBUG(
988  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
990  }
991 
992  // When automtatic unrolling is disabled, do not unroll unless overridden for
993  // this loop.
994  if (OnlyWhenForced && !(TM & TM_Enable))
996 
997  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
998  unsigned NumInlineCandidates;
999  bool NotDuplicatable;
1000  bool Convergent;
1002  L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
1003  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1004  ProvidedAllowPeeling);
1005 
1006  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1007  // as threshold later on.
1008  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1009  !OptForSize)
1011 
1013  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1014 
1015  unsigned LoopSize =
1016  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1017  TTI, EphValues, UP.BEInsns);
1018  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1019  if (NotDuplicatable) {
1020  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1021  << " instructions.\n");
1023  }
1024 
1025  // When optimizing for size, use LoopSize as threshold, to (fully) unroll
1026  // loops, if it does not increase code size.
1027  if (OptForSize)
1028  UP.Threshold = std::max(UP.Threshold, LoopSize);
1029 
1030  if (NumInlineCandidates != 0) {
1031  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1033  }
1034 
1035  // Find trip count and trip multiple if count is not available
1036  unsigned TripCount = 0;
1037  unsigned MaxTripCount = 0;
1038  unsigned TripMultiple = 1;
1039  // If there are multiple exiting blocks but one of them is the latch, use the
1040  // latch for the trip count estimation. Otherwise insist on a single exiting
1041  // block for the trip count estimation.
1042  BasicBlock *ExitingBlock = L->getLoopLatch();
1043  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1044  ExitingBlock = L->getExitingBlock();
1045  if (ExitingBlock) {
1046  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1047  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1048  }
1049 
1050  // If the loop contains a convergent operation, the prelude we'd add
1051  // to do the first few instructions before we hit the unrolled loop
1052  // is unsafe -- it adds a control-flow dependency to the convergent
1053  // operation. Therefore restrict remainder loop (try unrollig without).
1054  //
1055  // TODO: This is quite conservative. In practice, convergent_op()
1056  // is likely to be called unconditionally in the loop. In this
1057  // case, the program would be ill-formed (on most architectures)
1058  // unless n were the same on all threads in a thread group.
1059  // Assuming n is the same on all threads, any kind of unrolling is
1060  // safe. But currently llvm's notion of convergence isn't powerful
1061  // enough to express this.
1062  if (Convergent)
1063  UP.AllowRemainder = false;
1064 
1065  // Try to find the trip count upper bound if we cannot find the exact trip
1066  // count.
1067  bool MaxOrZero = false;
1068  if (!TripCount) {
1069  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1070  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1071  // We can unroll by the upper bound amount if it's generally allowed or if
1072  // we know that the loop is executed either the upper bound or zero times.
1073  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1074  // loop tests remains the same compared to the non-unrolled version, whereas
1075  // the generic upper bound unrolling keeps all but the last loop test so the
1076  // number of loop tests goes up which may end up being worse on targets with
1077  // constrained branch predictor resources so is controlled by an option.)
1078  // In addition we only unroll small upper bounds.
1079  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1080  MaxTripCount = 0;
1081  }
1082  }
1083 
1084  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1085  // fully unroll the loop.
1086  bool UseUpperBound = false;
1087  bool IsCountSetExplicitly = computeUnrollCount(
1088  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1089  TripMultiple, LoopSize, UP, UseUpperBound);
1090  if (!UP.Count)
1092  // Unroll factor (Count) must be less or equal to TripCount.
1093  if (TripCount && UP.Count > TripCount)
1094  UP.Count = TripCount;
1095 
1096  // Save loop properties before it is transformed.
1097  MDNode *OrigLoopID = L->getLoopID();
1098 
1099  // Unroll the loop.
1100  Loop *RemainderLoop = nullptr;
1101  LoopUnrollResult UnrollResult = UnrollLoop(
1102  L,
1103  {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1104  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1105  ForgetAllSCEV},
1106  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1107  if (UnrollResult == LoopUnrollResult::Unmodified)
1109 
1110  if (RemainderLoop) {
1111  Optional<MDNode *> RemainderLoopID =
1114  if (RemainderLoopID.hasValue())
1115  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1116  }
1117 
1118  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1119  Optional<MDNode *> NewLoopID =
1122  if (NewLoopID.hasValue()) {
1123  L->setLoopID(NewLoopID.getValue());
1124 
1125  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1126  // explicitly.
1127  return UnrollResult;
1128  }
1129  }
1130 
1131  // If loop has an unroll count pragma or unrolled by explicitly set count
1132  // mark loop as unrolled to prevent unrolling beyond that requested.
1133  // If the loop was peeled, we already "used up" the profile information
1134  // we had, so we don't want to unroll or peel again.
1135  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1136  (IsCountSetExplicitly || UP.PeelCount))
1138 
1139  return UnrollResult;
1140 }
1141 
1142 namespace {
1143 
1144 class LoopUnroll : public LoopPass {
1145 public:
1146  static char ID; // Pass ID, replacement for typeid
1147 
1148  int OptLevel;
1149 
1150  /// If false, use a cost model to determine whether unrolling of a loop is
1151  /// profitable. If true, only loops that explicitly request unrolling via
1152  /// metadata are considered. All other loops are skipped.
1153  bool OnlyWhenForced;
1154 
1155  /// If false, when SCEV is invalidated, only forget everything in the
1156  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1157  /// Otherwise, forgetAllLoops and rebuild when needed next.
1158  bool ForgetAllSCEV;
1159 
1160  Optional<unsigned> ProvidedCount;
1161  Optional<unsigned> ProvidedThreshold;
1162  Optional<bool> ProvidedAllowPartial;
1163  Optional<bool> ProvidedRuntime;
1164  Optional<bool> ProvidedUpperBound;
1165  Optional<bool> ProvidedAllowPeeling;
1166 
1167  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1168  bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1169  Optional<unsigned> Count = None,
1170  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1171  Optional<bool> UpperBound = None,
1172  Optional<bool> AllowPeeling = None)
1173  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1174  ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1175  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1176  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1177  ProvidedAllowPeeling(AllowPeeling) {
1179  }
1180 
1181  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1182  if (skipLoop(L))
1183  return false;
1184 
1185  Function &F = *L->getHeader()->getParent();
1186 
1187  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1188  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1189  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1190  const TargetTransformInfo &TTI =
1191  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1192  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1193  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1194  // pass. Function analyses need to be preserved across loop transformations
1195  // but ORE cannot be preserved (see comment before the pass definition).
1196  OptimizationRemarkEmitter ORE(&F);
1197  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1198 
1200  L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
1201  PreserveLCSSA, OptLevel, OnlyWhenForced,
1202  ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
1203  ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
1204 
1205  if (Result == LoopUnrollResult::FullyUnrolled)
1206  LPM.markLoopAsDeleted(*L);
1207 
1208  return Result != LoopUnrollResult::Unmodified;
1209  }
1210 
1211  /// This transformation requires natural loop information & requires that
1212  /// loop preheaders be inserted into the CFG...
1213  void getAnalysisUsage(AnalysisUsage &AU) const override {
1216  // FIXME: Loop passes are required to preserve domtree, and for now we just
1217  // recreate dom info if anything gets unrolled.
1219  }
1220 };
1221 
1222 } // end anonymous namespace
1223 
1224 char LoopUnroll::ID = 0;
1225 
1226 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1230 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1231 
1232 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1233  bool ForgetAllSCEV, int Threshold, int Count,
1234  int AllowPartial, int Runtime, int UpperBound,
1235  int AllowPeeling) {
1236  // TODO: It would make more sense for this function to take the optionals
1237  // directly, but that's dangerous since it would silently break out of tree
1238  // callers.
1239  return new LoopUnroll(
1240  OptLevel, OnlyWhenForced, ForgetAllSCEV,
1241  Threshold == -1 ? None : Optional<unsigned>(Threshold),
1242  Count == -1 ? None : Optional<unsigned>(Count),
1243  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1244  Runtime == -1 ? None : Optional<bool>(Runtime),
1245  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1246  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1247 }
1248 
1249 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1250  bool ForgetAllSCEV) {
1251  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1252  0, 0, 0, 0);
1253 }
1254 
1257  LPMUpdater &Updater) {
1258  const auto &FAM =
1259  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1260  Function *F = L.getHeader()->getParent();
1261 
1262  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1263  // FIXME: This should probably be optional rather than required.
1264  if (!ORE)
1266  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1267  "cached at a higher level");
1268 
1269  // Keep track of the previous loop structure so we can identify new loops
1270  // created by unrolling.
1271  Loop *ParentL = L.getParentLoop();
1272  SmallPtrSet<Loop *, 4> OldLoops;
1273  if (ParentL)
1274  OldLoops.insert(ParentL->begin(), ParentL->end());
1275  else
1276  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1277 
1278  std::string LoopName = L.getName();
1279 
1280  bool Changed =
1281  tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1282  /*BFI*/ nullptr, /*PSI*/ nullptr,
1283  /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
1284  /*ForgetAllSCEV*/ false, /*Count*/ None,
1285  /*Threshold*/ None, /*AllowPartial*/ false,
1286  /*Runtime*/ false, /*UpperBound*/ false,
1287  /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
1288  if (!Changed)
1289  return PreservedAnalyses::all();
1290 
1291  // The parent must not be damaged by unrolling!
1292 #ifndef NDEBUG
1293  if (ParentL)
1294  ParentL->verifyLoop();
1295 #endif
1296 
1297  // Unrolling can do several things to introduce new loops into a loop nest:
1298  // - Full unrolling clones child loops within the current loop but then
1299  // removes the current loop making all of the children appear to be new
1300  // sibling loops.
1301  //
1302  // When a new loop appears as a sibling loop after fully unrolling,
1303  // its nesting structure has fundamentally changed and we want to revisit
1304  // it to reflect that.
1305  //
1306  // When unrolling has removed the current loop, we need to tell the
1307  // infrastructure that it is gone.
1308  //
1309  // Finally, we support a debugging/testing mode where we revisit child loops
1310  // as well. These are not expected to require further optimizations as either
1311  // they or the loop they were cloned from have been directly visited already.
1312  // But the debugging mode allows us to check this assumption.
1313  bool IsCurrentLoopValid = false;
1314  SmallVector<Loop *, 4> SibLoops;
1315  if (ParentL)
1316  SibLoops.append(ParentL->begin(), ParentL->end());
1317  else
1318  SibLoops.append(AR.LI.begin(), AR.LI.end());
1319  erase_if(SibLoops, [&](Loop *SibLoop) {
1320  if (SibLoop == &L) {
1321  IsCurrentLoopValid = true;
1322  return true;
1323  }
1324 
1325  // Otherwise erase the loop from the list if it was in the old loops.
1326  return OldLoops.count(SibLoop) != 0;
1327  });
1328  Updater.addSiblingLoops(SibLoops);
1329 
1330  if (!IsCurrentLoopValid) {
1331  Updater.markLoopAsDeleted(L, LoopName);
1332  } else {
1333  // We can only walk child loops if the current loop remained valid.
1335  // Walk *all* of the child loops.
1336  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1337  Updater.addChildLoops(ChildLoops);
1338  }
1339  }
1340 
1342 }
1343 
1344 template <typename RangeT>
1346  SmallVector<Loop *, 8> Worklist;
1347  // We use an internal worklist to build up the preorder traversal without
1348  // recursion.
1349  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1350 
1351  for (Loop *RootL : Loops) {
1352  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1353  assert(PreOrderWorklist.empty() &&
1354  "Must start with an empty preorder walk worklist.");
1355  PreOrderWorklist.push_back(RootL);
1356  do {
1357  Loop *L = PreOrderWorklist.pop_back_val();
1358  PreOrderWorklist.append(L->begin(), L->end());
1359  PreOrderLoops.push_back(L);
1360  } while (!PreOrderWorklist.empty());
1361 
1362  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1363  PreOrderLoops.clear();
1364  }
1365  return Worklist;
1366 }
1367 
1370  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1371  auto &LI = AM.getResult<LoopAnalysis>(F);
1372  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1373  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1374  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1376 
1377  LoopAnalysisManager *LAM = nullptr;
1378  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1379  LAM = &LAMProxy->getManager();
1380 
1381  const ModuleAnalysisManager &MAM =
1383  ProfileSummaryInfo *PSI =
1385  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1386  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1387 
1388  bool Changed = false;
1389 
1390  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1391  // Since simplification may add new inner loops, it has to run before the
1392  // legality and profitability checks. This means running the loop unroller
1393  // will simplify all loops, regardless of whether anything end up being
1394  // unrolled.
1395  for (auto &L : LI) {
1396  Changed |=
1397  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1398  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1399  }
1400 
1402 
1403  while (!Worklist.empty()) {
1404  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1405  // from back to front so that we work forward across the CFG, which
1406  // for unrolling is only needed to get optimization remarks emitted in
1407  // a forward order.
1408  Loop &L = *Worklist.pop_back_val();
1409 #ifndef NDEBUG
1410  Loop *ParentL = L.getParentLoop();
1411 #endif
1412 
1413  // Check if the profile summary indicates that the profiled application
1414  // has a huge working set size, in which case we disable peeling to avoid
1415  // bloating it further.
1416  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1417  if (PSI && PSI->hasHugeWorkingSetSize())
1418  LocalAllowPeeling = false;
1419  std::string LoopName = L.getName();
1420  // The API here is quite complex to call and we allow to select some
1421  // flavors of unrolling during construction time (by setting UnrollOpts).
1423  &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1424  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1425  /*ForgetAllSCEV*/ false, /*Count*/ None,
1426  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1427  UnrollOpts.AllowUpperBound, LocalAllowPeeling);
1428  Changed |= Result != LoopUnrollResult::Unmodified;
1429 
1430  // The parent must not be damaged by unrolling!
1431 #ifndef NDEBUG
1432  if (Result != LoopUnrollResult::Unmodified && ParentL)
1433  ParentL->verifyLoop();
1434 #endif
1435 
1436  // Clear any cached analysis results for L if we removed it completely.
1437  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1438  LAM->clear(L, LoopName);
1439  }
1440 
1441  if (!Changed)
1442  return PreservedAnalyses::all();
1443 
1445 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false)
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:47
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:603
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:195
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
void push_back(const T &Elt)
Definition: SmallVector.h:211
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:68
Metadata node.
Definition: Metadata.h:863
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:44
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if function F is suggested to be size-optimized base on the profile. ...
Definition: SizeOpts.cpp:23
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:42
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4362
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Hexagon Hardware Loops
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:952
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:100
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:224
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:257
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool hasProfileSummary()
Returns true if profile summary is available.
The loop was fully unrolled into straight-line code.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:203
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:673
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:269
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:467
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:237
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:623
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:355
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
The transformation should not be applied.
Definition: LoopUtils.h:227
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:142
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1384
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:250
iterator begin() const
Definition: LoopInfo.h:672
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:902
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:211
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:233
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:334
TransformationMode hasUnrollTransformation(Loop *L)
Definition: LoopUtils.cpp:335
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:218
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
bool empty() const
Definition: LoopInfo.h:146
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:294
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
The optimization diagnostic interface.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53