LLVM  13.0.0git
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constant.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/InitializePasses.h"
50 #include "llvm/Pass.h"
51 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
58 #include "llvm/Transforms/Utils.h"
64 #include <algorithm>
65 #include <cassert>
66 #include <cstdint>
67 #include <limits>
68 #include <string>
69 #include <tuple>
70 #include <utility>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "loop-unroll"
75 
77  "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
78  cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
79  " the current top-most loop. This is sometimes preferred to reduce"
80  " compile time."));
81 
82 static cl::opt<unsigned>
83  UnrollThreshold("unroll-threshold", cl::Hidden,
84  cl::desc("The cost threshold for loop unrolling"));
85 
86 static cl::opt<unsigned>
88  "unroll-optsize-threshold", cl::init(0), cl::Hidden,
89  cl::desc("The cost threshold for loop unrolling when optimizing for "
90  "size"));
91 
93  "unroll-partial-threshold", cl::Hidden,
94  cl::desc("The cost threshold for partial loop unrolling"));
95 
97  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
98  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
99  "to the threshold when aggressively unrolling a loop due to the "
100  "dynamic cost savings. If completely unrolling a loop will reduce "
101  "the total runtime from X to Y, we boost the loop unroll "
102  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103  "X/Y). This limit avoids excessive code bloat."));
104 
106  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
107  cl::desc("Don't allow loop unrolling to simulate more than this number of"
108  "iterations when checking full unroll profitability"));
109 
111  "unroll-count", cl::Hidden,
112  cl::desc("Use this unroll count for all loops including those with "
113  "unroll_count pragma values, for testing purposes"));
114 
116  "unroll-max-count", cl::Hidden,
117  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
118  "testing purposes"));
119 
121  "unroll-full-max-count", cl::Hidden,
122  cl::desc(
123  "Set the max unroll count for full unrolling, for testing purposes"));
124 
125 static cl::opt<bool>
126  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
127  cl::desc("Allows loops to be partially unrolled until "
128  "-unroll-threshold loop size is reached."));
129 
131  "unroll-allow-remainder", cl::Hidden,
132  cl::desc("Allow generation of a loop remainder (extra iterations) "
133  "when unrolling a loop."));
134 
135 static cl::opt<bool>
136  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
137  cl::desc("Unroll loops with run-time trip counts"));
138 
140  "unroll-max-upperbound", cl::init(8), cl::Hidden,
141  cl::desc(
142  "The max of trip count upper bound that is considered in unrolling"));
143 
145  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
146  cl::desc("Unrolled size limit for loops with an unroll(full) or "
147  "unroll_count pragma."));
148 
150  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
151  cl::desc("If the runtime tripcount for the loop is lower than the "
152  "threshold, the loop is considered as flat and will be less "
153  "aggressively unrolled."));
154 
156  "unroll-remainder", cl::Hidden,
157  cl::desc("Allow the loop remainder to be unrolled."));
158 
159 // This option isn't ever intended to be enabled, it serves to allow
160 // experiments to check the assumptions about when this kind of revisit is
161 // necessary.
163  "unroll-revisit-child-loops", cl::Hidden,
164  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
165  "This shouldn't typically be needed as child loops (or their "
166  "clones) were already visited."));
167 
169  "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
170  cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
171  "optimizations"));
172 static cl::opt<unsigned>
173  UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
174  cl::Hidden,
175  cl::desc("Default threshold (max size of unrolled "
176  "loop), used in all but O3 optimizations"));
177 
178 /// A magic value for use with the Threshold parameter to indicate
179 /// that the loop unroll should be performed regardless of how much
180 /// code expansion would result.
182 
183 /// Gather the various unrolling parameters based on the defaults, compiler
184 /// flags, TTI overrides and user specified parameters.
187  BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
188  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
189  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
190  Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
192 
193  // Set up the defaults
194  UP.Threshold =
196  UP.MaxPercentThresholdBoost = 400;
198  UP.PartialThreshold = 150;
200  UP.Count = 0;
204  UP.BEInsns = 2;
205  UP.Partial = false;
206  UP.Runtime = false;
207  UP.AllowRemainder = true;
208  UP.UnrollRemainder = false;
209  UP.AllowExpensiveTripCount = false;
210  UP.Force = false;
211  UP.UpperBound = false;
212  UP.UnrollAndJam = false;
215 
216  // Override with any target specific settings
217  TTI.getUnrollingPreferences(L, SE, UP);
218 
219  // Apply size attributes
220  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
221  // Let unroll hints / pragmas take precedence over PGSO.
225  if (OptForSize) {
226  UP.Threshold = UP.OptSizeThreshold;
228  UP.MaxPercentThresholdBoost = 100;
229  }
230 
231  // Apply any user values specified by cl::opt
232  if (UnrollThreshold.getNumOccurrences() > 0)
234  if (UnrollPartialThreshold.getNumOccurrences() > 0)
236  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
238  if (UnrollMaxCount.getNumOccurrences() > 0)
240  if (UnrollFullMaxCount.getNumOccurrences() > 0)
247  UP.Runtime = UnrollRuntime;
248  if (UnrollMaxUpperBound == 0)
249  UP.UpperBound = false;
252  if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
254 
255  // Apply user values provided by argument
256  if (UserThreshold.hasValue()) {
257  UP.Threshold = *UserThreshold;
258  UP.PartialThreshold = *UserThreshold;
259  }
260  if (UserCount.hasValue())
261  UP.Count = *UserCount;
262  if (UserAllowPartial.hasValue())
263  UP.Partial = *UserAllowPartial;
264  if (UserRuntime.hasValue())
265  UP.Runtime = *UserRuntime;
266  if (UserUpperBound.hasValue())
267  UP.UpperBound = *UserUpperBound;
268  if (UserFullUnrollMaxCount.hasValue())
269  UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
270 
271  return UP;
272 }
273 
274 namespace {
275 
276 /// A struct to densely store the state of an instruction after unrolling at
277 /// each iteration.
278 ///
279 /// This is designed to work like a tuple of <Instruction *, int> for the
280 /// purposes of hashing and lookup, but to be able to associate two boolean
281 /// states with each key.
282 struct UnrolledInstState {
283  Instruction *I;
284  int Iteration : 30;
285  unsigned IsFree : 1;
286  unsigned IsCounted : 1;
287 };
288 
289 /// Hashing and equality testing for a set of the instruction states.
290 struct UnrolledInstStateKeyInfo {
291  using PtrInfo = DenseMapInfo<Instruction *>;
293 
294  static inline UnrolledInstState getEmptyKey() {
295  return {PtrInfo::getEmptyKey(), 0, 0, 0};
296  }
297 
298  static inline UnrolledInstState getTombstoneKey() {
299  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
300  }
301 
302  static inline unsigned getHashValue(const UnrolledInstState &S) {
303  return PairInfo::getHashValue({S.I, S.Iteration});
304  }
305 
306  static inline bool isEqual(const UnrolledInstState &LHS,
307  const UnrolledInstState &RHS) {
308  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
309  }
310 };
311 
312 struct EstimatedUnrollCost {
313  /// The estimated cost after unrolling.
314  unsigned UnrolledCost;
315 
316  /// The estimated dynamic cost of executing the instructions in the
317  /// rolled form.
318  unsigned RolledDynamicCost;
319 };
320 
321 } // end anonymous namespace
322 
323 /// Figure out if the loop is worth full unrolling.
324 ///
325 /// Complete loop unrolling can make some loads constant, and we need to know
326 /// if that would expose any further optimization opportunities. This routine
327 /// estimates this optimization. It computes cost of unrolled loop
328 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
329 /// dynamic cost we mean that we won't count costs of blocks that are known not
330 /// to be executed (i.e. if we have a branch in the loop and we know that at the
331 /// given iteration its condition would be resolved to true, we won't add up the
332 /// cost of the 'false'-block).
333 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
334 /// the analysis failed (no benefits expected from the unrolling, or the loop is
335 /// too big to analyze), the returned value is None.
337  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
338  const SmallPtrSetImpl<const Value *> &EphValues,
339  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
340  unsigned MaxIterationsCountToAnalyze) {
341  // We want to be able to scale offsets by the trip count and add more offsets
342  // to them without checking for overflows, and we already don't want to
343  // analyze *massive* trip counts, so we force the max to be reasonably small.
344  assert(MaxIterationsCountToAnalyze <
345  (unsigned)(std::numeric_limits<int>::max() / 2) &&
346  "The unroll iterations max is too large!");
347 
348  // Only analyze inner loops. We can't properly estimate cost of nested loops
349  // and we won't visit inner loops again anyway.
350  if (!L->isInnermost())
351  return None;
352 
353  // Don't simulate loops with a big or unknown tripcount
354  if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
355  return None;
356 
359  DenseMap<Value *, Value *> SimplifiedValues;
360  SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
361 
362  // The estimated cost of the unrolled form of the loop. We try to estimate
363  // this by simplifying as much as we can while computing the estimate.
364  InstructionCost UnrolledCost = 0;
365 
366  // We also track the estimated dynamic (that is, actually executed) cost in
367  // the rolled form. This helps identify cases when the savings from unrolling
368  // aren't just exposing dead control flows, but actual reduced dynamic
369  // instructions due to the simplifications which we expect to occur after
370  // unrolling.
371  InstructionCost RolledDynamicCost = 0;
372 
373  // We track the simplification of each instruction in each iteration. We use
374  // this to recursively merge costs into the unrolled cost on-demand so that
375  // we don't count the cost of any dead code. This is essentially a map from
376  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
378 
379  // A small worklist used to accumulate cost of instructions from each
380  // observable and reached root in the loop.
381  SmallVector<Instruction *, 16> CostWorklist;
382 
383  // PHI-used worklist used between iterations while accumulating cost.
384  SmallVector<Instruction *, 4> PHIUsedList;
385 
386  // Helper function to accumulate cost for instructions in the loop.
387  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
388  assert(Iteration >= 0 && "Cannot have a negative iteration!");
389  assert(CostWorklist.empty() && "Must start with an empty cost list");
390  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
391  CostWorklist.push_back(&RootI);
393  RootI.getFunction()->hasMinSize() ?
396  for (;; --Iteration) {
397  do {
398  Instruction *I = CostWorklist.pop_back_val();
399 
400  // InstCostMap only uses I and Iteration as a key, the other two values
401  // don't matter here.
402  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
403  if (CostIter == InstCostMap.end())
404  // If an input to a PHI node comes from a dead path through the loop
405  // we may have no cost data for it here. What that actually means is
406  // that it is free.
407  continue;
408  auto &Cost = *CostIter;
409  if (Cost.IsCounted)
410  // Already counted this instruction.
411  continue;
412 
413  // Mark that we are counting the cost of this instruction now.
414  Cost.IsCounted = true;
415 
416  // If this is a PHI node in the loop header, just add it to the PHI set.
417  if (auto *PhiI = dyn_cast<PHINode>(I))
418  if (PhiI->getParent() == L->getHeader()) {
419  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
420  "inherently simplify during unrolling.");
421  if (Iteration == 0)
422  continue;
423 
424  // Push the incoming value from the backedge into the PHI used list
425  // if it is an in-loop instruction. We'll use this to populate the
426  // cost worklist for the next iteration (as we count backwards).
427  if (auto *OpI = dyn_cast<Instruction>(
428  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
429  if (L->contains(OpI))
430  PHIUsedList.push_back(OpI);
431  continue;
432  }
433 
434  // First accumulate the cost of this instruction.
435  if (!Cost.IsFree) {
436  UnrolledCost += TTI.getUserCost(I, CostKind);
437  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
438  << Iteration << "): ");
439  LLVM_DEBUG(I->dump());
440  }
441 
442  // We must count the cost of every operand which is not free,
443  // recursively. If we reach a loop PHI node, simply add it to the set
444  // to be considered on the next iteration (backwards!).
445  for (Value *Op : I->operands()) {
446  // Check whether this operand is free due to being a constant or
447  // outside the loop.
448  auto *OpI = dyn_cast<Instruction>(Op);
449  if (!OpI || !L->contains(OpI))
450  continue;
451 
452  // Otherwise accumulate its cost.
453  CostWorklist.push_back(OpI);
454  }
455  } while (!CostWorklist.empty());
456 
457  if (PHIUsedList.empty())
458  // We've exhausted the search.
459  break;
460 
461  assert(Iteration > 0 &&
462  "Cannot track PHI-used values past the first iteration!");
463  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
464  PHIUsedList.clear();
465  }
466  };
467 
468  // Ensure that we don't violate the loop structure invariants relied on by
469  // this analysis.
470  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
471  assert(L->isLCSSAForm(DT) &&
472  "Must have loops in LCSSA form to track live-out values.");
473 
474  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
475 
477  L->getHeader()->getParent()->hasMinSize() ?
479  // Simulate execution of each iteration of the loop counting instructions,
480  // which would be simplified.
481  // Since the same load will take different values on different iterations,
482  // we literally have to go through all loop's iterations.
483  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
484  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
485 
486  // Prepare for the iteration by collecting any simplified entry or backedge
487  // inputs.
488  for (Instruction &I : *L->getHeader()) {
489  auto *PHI = dyn_cast<PHINode>(&I);
490  if (!PHI)
491  break;
492 
493  // The loop header PHI nodes must have exactly two input: one from the
494  // loop preheader and one from the loop latch.
495  assert(
496  PHI->getNumIncomingValues() == 2 &&
497  "Must have an incoming value only for the preheader and the latch.");
498 
499  Value *V = PHI->getIncomingValueForBlock(
500  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
501  if (Iteration != 0 && SimplifiedValues.count(V))
502  V = SimplifiedValues.lookup(V);
503  SimplifiedInputValues.push_back({PHI, V});
504  }
505 
506  // Now clear and re-populate the map for the next iteration.
507  SimplifiedValues.clear();
508  while (!SimplifiedInputValues.empty())
509  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
510 
511  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
512 
513  BBWorklist.clear();
514  BBWorklist.insert(L->getHeader());
515  // Note that we *must not* cache the size, this loop grows the worklist.
516  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
517  BasicBlock *BB = BBWorklist[Idx];
518 
519  // Visit all instructions in the given basic block and try to simplify
520  // it. We don't change the actual IR, just count optimization
521  // opportunities.
522  for (Instruction &I : *BB) {
523  // These won't get into the final code - don't even try calculating the
524  // cost for them.
525  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
526  continue;
527 
528  // Track this instruction's expected baseline cost when executing the
529  // rolled loop form.
530  RolledDynamicCost += TTI.getUserCost(&I, CostKind);
531 
532  // Visit the instruction to analyze its loop cost after unrolling,
533  // and if the visitor returns true, mark the instruction as free after
534  // unrolling and continue.
535  bool IsFree = Analyzer.visit(I);
536  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
537  (unsigned)IsFree,
538  /*IsCounted*/ false}).second;
539  (void)Inserted;
540  assert(Inserted && "Cannot have a state for an unvisited instruction!");
541 
542  if (IsFree)
543  continue;
544 
545  // Can't properly model a cost of a call.
546  // FIXME: With a proper cost model we should be able to do it.
547  if (auto *CI = dyn_cast<CallInst>(&I)) {
548  const Function *Callee = CI->getCalledFunction();
549  if (!Callee || TTI.isLoweredToCall(Callee)) {
550  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
551  return None;
552  }
553  }
554 
555  // If the instruction might have a side-effect recursively account for
556  // the cost of it and all the instructions leading up to it.
557  if (I.mayHaveSideEffects())
558  AddCostRecursively(I, Iteration);
559 
560  // If unrolled body turns out to be too big, bail out.
561  if (UnrolledCost > MaxUnrolledLoopSize) {
562  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
563  << " UnrolledCost: " << UnrolledCost
564  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
565  << "\n");
566  return None;
567  }
568  }
569 
570  Instruction *TI = BB->getTerminator();
571 
572  auto getSimplifiedConstant = [&](Value *V) -> Constant * {
573  if (SimplifiedValues.count(V))
574  V = SimplifiedValues.lookup(V);
575  return dyn_cast<Constant>(V);
576  };
577 
578  // Add in the live successors by first checking whether we have terminator
579  // that may be simplified based on the values simplified by this call.
580  BasicBlock *KnownSucc = nullptr;
581  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
582  if (BI->isConditional()) {
583  if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
584  // Just take the first successor if condition is undef
585  if (isa<UndefValue>(SimpleCond))
586  KnownSucc = BI->getSuccessor(0);
587  else if (ConstantInt *SimpleCondVal =
588  dyn_cast<ConstantInt>(SimpleCond))
589  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
590  }
591  }
592  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
593  if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
594  // Just take the first successor if condition is undef
595  if (isa<UndefValue>(SimpleCond))
596  KnownSucc = SI->getSuccessor(0);
597  else if (ConstantInt *SimpleCondVal =
598  dyn_cast<ConstantInt>(SimpleCond))
599  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
600  }
601  }
602  if (KnownSucc) {
603  if (L->contains(KnownSucc))
604  BBWorklist.insert(KnownSucc);
605  else
606  ExitWorklist.insert({BB, KnownSucc});
607  continue;
608  }
609 
610  // Add BB's successors to the worklist.
611  for (BasicBlock *Succ : successors(BB))
612  if (L->contains(Succ))
613  BBWorklist.insert(Succ);
614  else
615  ExitWorklist.insert({BB, Succ});
616  AddCostRecursively(*TI, Iteration);
617  }
618 
619  // If we found no optimization opportunities on the first iteration, we
620  // won't find them on later ones too.
621  if (UnrolledCost == RolledDynamicCost) {
622  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
623  << " UnrolledCost: " << UnrolledCost << "\n");
624  return None;
625  }
626  }
627 
628  while (!ExitWorklist.empty()) {
629  BasicBlock *ExitingBB, *ExitBB;
630  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
631 
632  for (Instruction &I : *ExitBB) {
633  auto *PN = dyn_cast<PHINode>(&I);
634  if (!PN)
635  break;
636 
637  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
638  if (auto *OpI = dyn_cast<Instruction>(Op))
639  if (L->contains(OpI))
640  AddCostRecursively(*OpI, TripCount - 1);
641  }
642  }
643 
644  assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
645  "All instructions must have a valid cost, whether the "
646  "loop is rolled or unrolled.");
647 
648  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
649  << "UnrolledCost: " << UnrolledCost << ", "
650  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
651  return {{unsigned(*UnrolledCost.getValue()),
652  unsigned(*RolledDynamicCost.getValue())}};
653 }
654 
655 /// ApproximateLoopSize - Approximate the size of the loop.
657  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
658  const TargetTransformInfo &TTI,
659  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
661  for (BasicBlock *BB : L->blocks())
662  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
663  NumCalls = Metrics.NumInlineCandidates;
664  NotDuplicatable = Metrics.notDuplicatable;
665  Convergent = Metrics.convergent;
666 
667  unsigned LoopSize = Metrics.NumInsts;
668 
669  // Don't allow an estimate of size zero. This would allows unrolling of loops
670  // with huge iteration counts, which is a compile time problem even if it's
671  // not a problem for code quality. Also, the code using this size may assume
672  // that each loop has at least three instructions (likely a conditional
673  // branch, a comparison feeding that branch, and some kind of loop increment
674  // feeding that comparison instruction).
675  LoopSize = std::max(LoopSize, BEInsns + 1);
676 
677  return LoopSize;
678 }
679 
680 // Returns the loop hint metadata node with the given name (for example,
681 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
682 // returned.
684  if (MDNode *LoopID = L->getLoopID())
685  return GetUnrollMetadata(LoopID, Name);
686  return nullptr;
687 }
688 
689 // Returns true if the loop has an unroll(full) pragma.
690 static bool hasUnrollFullPragma(const Loop *L) {
691  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
692 }
693 
694 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
695 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
696 static bool hasUnrollEnablePragma(const Loop *L) {
697  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
698 }
699 
700 // Returns true if the loop has an runtime unroll(disable) pragma.
701 static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
702  return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
703 }
704 
705 // If loop has an unroll_count pragma return the (necessarily
706 // positive) value from the pragma. Otherwise return 0.
707 static unsigned unrollCountPragmaValue(const Loop *L) {
708  MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
709  if (MD) {
710  assert(MD->getNumOperands() == 2 &&
711  "Unroll count hint metadata should have two operands.");
712  unsigned Count =
713  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
714  assert(Count >= 1 && "Unroll count must be positive.");
715  return Count;
716  }
717  return 0;
718 }
719 
720 // Computes the boosting factor for complete unrolling.
721 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
722 // be beneficial to fully unroll the loop even if unrolledcost is large. We
723 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
724 // the unroll threshold.
725 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
726  unsigned MaxPercentThresholdBoost) {
727  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
728  return 100;
729  else if (Cost.UnrolledCost != 0)
730  // The boosting factor is RolledDynamicCost / UnrolledCost
731  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
732  MaxPercentThresholdBoost);
733  else
734  return MaxPercentThresholdBoost;
735 }
736 
737 // Produce an estimate of the unrolled cost of the specified loop. This
738 // is used to a) produce a cost estimate for partial unrolling and b) to
739 // cheaply estimate cost for full unrolling when we don't want to symbolically
740 // evaluate all iterations.
742  const unsigned LoopSize;
743 
744 public:
745  UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
746 
747  // Returns loop size estimation for unrolled loop, given the unrolling
748  // configuration specified by UP.
750  assert(LoopSize >= UP.BEInsns &&
751  "LoopSize should not be less than BEInsns!");
752  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
753  }
754 };
755 
756 // Returns true if unroll count was set explicitly.
757 // Calculates unroll count and writes it to UP.Count.
758 // Unless IgnoreUser is true, will also use metadata and command-line options
759 // that are specific to to the LoopUnroll pass (which, for instance, are
760 // irrelevant for the LoopUnrollAndJam pass).
761 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
762 // many LoopUnroll-specific options. The shared functionality should be
763 // refactored into it own function.
765  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
766  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
767  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
768  bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
770  TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
771 
772  UnrollCostEstimator UCE(*L, LoopSize);
773 
774  // Use an explicit peel count that has been specified for testing. In this
775  // case it's not permitted to also specify an explicit unroll count.
776  if (PP.PeelCount) {
777  if (UnrollCount.getNumOccurrences() > 0) {
778  report_fatal_error("Cannot specify both explicit peel count and "
779  "explicit unroll count");
780  }
781  UP.Count = 1;
782  UP.Runtime = false;
783  return true;
784  }
785 
786  // Check for explicit Count.
787  // 1st priority is unroll count set by "unroll-count" option.
788  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
789  if (UserUnrollCount) {
790  UP.Count = UnrollCount;
791  UP.AllowExpensiveTripCount = true;
792  UP.Force = true;
793  if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
794  return true;
795  }
796 
797  // 2nd priority is unroll count set by pragma.
798  unsigned PragmaCount = unrollCountPragmaValue(L);
799  if (PragmaCount > 0) {
800  UP.Count = PragmaCount;
801  UP.Runtime = true;
802  UP.AllowExpensiveTripCount = true;
803  UP.Force = true;
804  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
806  return true;
807  }
808  bool PragmaFullUnroll = hasUnrollFullPragma(L);
809  if (PragmaFullUnroll && TripCount != 0) {
810  UP.Count = TripCount;
812  return false;
813  }
814 
815  bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
816  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
817  PragmaEnableUnroll || UserUnrollCount;
818 
819  if (ExplicitUnroll && TripCount != 0) {
820  // If the loop has an unrolling pragma, we want to be more aggressive with
821  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
822  // value which is larger than the default limits.
823  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
824  UP.PartialThreshold =
825  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
826  }
827 
828  // 3rd priority is full unroll count.
829  // Full unroll makes sense only when TripCount or its upper bound could be
830  // statically calculated.
831  // Also we need to check if we exceed FullUnrollMaxCount.
832  // If using the upper bound to unroll, TripMultiple should be set to 1 because
833  // we do not know when loop may exit.
834 
835  // We can unroll by the upper bound amount if it's generally allowed or if
836  // we know that the loop is executed either the upper bound or zero times.
837  // (MaxOrZero unrolling keeps only the first loop test, so the number of
838  // loop tests remains the same compared to the non-unrolled version, whereas
839  // the generic upper bound unrolling keeps all but the last loop test so the
840  // number of loop tests goes up which may end up being worse on targets with
841  // constrained branch predictor resources so is controlled by an option.)
842  // In addition we only unroll small upper bounds.
843  unsigned FullUnrollMaxTripCount = MaxTripCount;
844  if (!(UP.UpperBound || MaxOrZero) ||
845  FullUnrollMaxTripCount > UnrollMaxUpperBound)
846  FullUnrollMaxTripCount = 0;
847 
848  // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
849  // compute the former when the latter is zero.
850  unsigned ExactTripCount = TripCount;
851  assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
852  "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
853 
854  unsigned FullUnrollTripCount =
855  ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
856  UP.Count = FullUnrollTripCount;
857  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
858  // When computing the unrolled size, note that BEInsns are not replicated
859  // like the rest of the loop body.
860  if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
861  UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
862  TripCount = FullUnrollTripCount;
863  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
864  return ExplicitUnroll;
865  } else {
866  // The loop isn't that small, but we still can fully unroll it if that
867  // helps to remove a significant number of instructions.
868  // To check that, run additional analysis on the loop.
870  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
871  UP.Threshold * UP.MaxPercentThresholdBoost / 100,
873  unsigned Boost =
875  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
876  UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
877  TripCount = FullUnrollTripCount;
878  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
879  return ExplicitUnroll;
880  }
881  }
882  }
883  }
884 
885  // 4th priority is loop peeling.
886  computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
887  if (PP.PeelCount) {
888  UP.Runtime = false;
889  UP.Count = 1;
890  return ExplicitUnroll;
891  }
892 
893  // 5th priority is partial unrolling.
894  // Try partial unroll only when TripCount could be statically calculated.
895  if (TripCount) {
896  UP.Partial |= ExplicitUnroll;
897  if (!UP.Partial) {
898  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
899  << "-unroll-allow-partial not given\n");
900  UP.Count = 0;
901  return false;
902  }
903  if (UP.Count == 0)
904  UP.Count = TripCount;
905  if (UP.PartialThreshold != NoThreshold) {
906  // Reduce unroll count to be modulo of TripCount for partial unrolling.
907  if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
908  UP.Count =
909  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
910  (LoopSize - UP.BEInsns);
911  if (UP.Count > UP.MaxCount)
912  UP.Count = UP.MaxCount;
913  while (UP.Count != 0 && TripCount % UP.Count != 0)
914  UP.Count--;
915  if (UP.AllowRemainder && UP.Count <= 1) {
916  // If there is no Count that is modulo of TripCount, set Count to
917  // largest power-of-two factor that satisfies the threshold limit.
918  // As we'll create fixup loop, do the type of unrolling only if
919  // remainder loop is allowed.
921  while (UP.Count != 0 &&
923  UP.Count >>= 1;
924  }
925  if (UP.Count < 2) {
926  if (PragmaEnableUnroll)
927  ORE->emit([&]() {
929  "UnrollAsDirectedTooLarge",
930  L->getStartLoc(), L->getHeader())
931  << "Unable to unroll loop as directed by unroll(enable) "
932  "pragma "
933  "because unrolled size is too large.";
934  });
935  UP.Count = 0;
936  }
937  } else {
938  UP.Count = TripCount;
939  }
940  if (UP.Count > UP.MaxCount)
941  UP.Count = UP.MaxCount;
942  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
943  UP.Count != TripCount)
944  ORE->emit([&]() {
946  "FullUnrollAsDirectedTooLarge",
947  L->getStartLoc(), L->getHeader())
948  << "Unable to fully unroll loop as directed by unroll pragma "
949  "because "
950  "unrolled size is too large.";
951  });
952  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
953  << "\n");
954  return ExplicitUnroll;
955  }
956  assert(TripCount == 0 &&
957  "All cases when TripCount is constant should be covered here.");
958  if (PragmaFullUnroll)
959  ORE->emit([&]() {
961  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
962  L->getStartLoc(), L->getHeader())
963  << "Unable to fully unroll loop as directed by unroll(full) "
964  "pragma "
965  "because loop has a runtime trip count.";
966  });
967 
968  // 6th priority is runtime unrolling.
969  // Don't unroll a runtime trip count loop when it is disabled.
971  UP.Count = 0;
972  return false;
973  }
974 
975  // Don't unroll a small upper bound loop unless user or TTI asked to do so.
976  if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
977  UP.Count = 0;
978  return false;
979  }
980 
981  // Check if the runtime trip count is too small when profile is available.
982  if (L->getHeader()->getParent()->hasProfileData()) {
983  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
984  if (*ProfileTripCount < FlatLoopTripCountThreshold)
985  return false;
986  else
987  UP.AllowExpensiveTripCount = true;
988  }
989  }
990 
991  // Reduce count based on the type of unrolling and the threshold values.
992  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
993  if (!UP.Runtime) {
994  LLVM_DEBUG(
995  dbgs() << " will not try to unroll loop with runtime trip count "
996  << "-unroll-runtime not given\n");
997  UP.Count = 0;
998  return false;
999  }
1000  if (UP.Count == 0)
1002 
1003  // Reduce unroll count to be the largest power-of-two factor of
1004  // the original count which satisfies the threshold limit.
1005  while (UP.Count != 0 &&
1007  UP.Count >>= 1;
1008 
1009 #ifndef NDEBUG
1010  unsigned OrigCount = UP.Count;
1011 #endif
1012 
1013  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1014  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1015  UP.Count >>= 1;
1016  LLVM_DEBUG(
1017  dbgs() << "Remainder loop is restricted (that could architecture "
1018  "specific or because the loop contains a convergent "
1019  "instruction), so unroll count must divide the trip "
1020  "multiple, "
1021  << TripMultiple << ". Reducing unroll count from " << OrigCount
1022  << " to " << UP.Count << ".\n");
1023 
1024  using namespace ore;
1025 
1026  if (PragmaCount > 0 && !UP.AllowRemainder)
1027  ORE->emit([&]() {
1029  "DifferentUnrollCountFromDirected",
1030  L->getStartLoc(), L->getHeader())
1031  << "Unable to unroll loop the number of times directed by "
1032  "unroll_count pragma because remainder loop is restricted "
1033  "(that could architecture specific or because the loop "
1034  "contains a convergent instruction) and so must have an "
1035  "unroll "
1036  "count that divides the loop trip multiple of "
1037  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1038  << NV("UnrollCount", UP.Count) << " time(s).";
1039  });
1040  }
1041 
1042  if (UP.Count > UP.MaxCount)
1043  UP.Count = UP.MaxCount;
1044 
1045  if (MaxTripCount && UP.Count > MaxTripCount)
1046  UP.Count = MaxTripCount;
1047 
1048  LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
1049  << "\n");
1050  if (UP.Count < 2)
1051  UP.Count = 0;
1052  return ExplicitUnroll;
1053 }
1054 
1056  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
1059  ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1060  bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
1061  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
1062  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
1063  Optional<bool> ProvidedAllowPeeling,
1064  Optional<bool> ProvidedAllowProfileBasedPeeling,
1065  Optional<unsigned> ProvidedFullUnrollMaxCount) {
1066  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1067  << L->getHeader()->getParent()->getName() << "] Loop %"
1068  << L->getHeader()->getName() << "\n");
1070  if (TM & TM_Disable)
1072  if (!L->isLoopSimplifyForm()) {
1073  LLVM_DEBUG(
1074  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1076  }
1077 
1078  // When automatic unrolling is disabled, do not unroll unless overridden for
1079  // this loop.
1080  if (OnlyWhenForced && !(TM & TM_Enable))
1082 
1083  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1084  unsigned NumInlineCandidates;
1085  bool NotDuplicatable;
1086  bool Convergent;
1088  L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
1089  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1090  ProvidedFullUnrollMaxCount);
1092  L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1093 
1094  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1095  // as threshold later on.
1096  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1097  !OptForSize)
1099 
1101  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1102 
1103  unsigned LoopSize =
1104  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1105  TTI, EphValues, UP.BEInsns);
1106  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1107  if (NotDuplicatable) {
1108  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1109  << " instructions.\n");
1111  }
1112 
1113  // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1114  // later), to (fully) unroll loops, if it does not increase code size.
1115  if (OptForSize)
1116  UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1117 
1118  if (NumInlineCandidates != 0) {
1119  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1121  }
1122 
1123  // Find trip count and trip multiple if count is not available
1124  unsigned TripCount = 0;
1125  unsigned TripMultiple = 1;
1126  // If there are multiple exiting blocks but one of them is the latch, use the
1127  // latch for the trip count estimation. Otherwise insist on a single exiting
1128  // block for the trip count estimation.
1129  BasicBlock *ExitingBlock = L->getLoopLatch();
1130  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1131  ExitingBlock = L->getExitingBlock();
1132  if (ExitingBlock) {
1133  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1134  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1135  }
1136 
1137  // If the loop contains a convergent operation, the prelude we'd add
1138  // to do the first few instructions before we hit the unrolled loop
1139  // is unsafe -- it adds a control-flow dependency to the convergent
1140  // operation. Therefore restrict remainder loop (try unrolling without).
1141  //
1142  // TODO: This is quite conservative. In practice, convergent_op()
1143  // is likely to be called unconditionally in the loop. In this
1144  // case, the program would be ill-formed (on most architectures)
1145  // unless n were the same on all threads in a thread group.
1146  // Assuming n is the same on all threads, any kind of unrolling is
1147  // safe. But currently llvm's notion of convergence isn't powerful
1148  // enough to express this.
1149  if (Convergent)
1150  UP.AllowRemainder = false;
1151 
1152  // Try to find the trip count upper bound if we cannot find the exact trip
1153  // count.
1154  unsigned MaxTripCount = 0;
1155  bool MaxOrZero = false;
1156  if (!TripCount) {
1157  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1158  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1159  }
1160 
1161  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1162  // fully unroll the loop.
1163  bool UseUpperBound = false;
1164  bool IsCountSetExplicitly = computeUnrollCount(
1165  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
1166  TripMultiple, LoopSize, UP, PP, UseUpperBound);
1167  if (!UP.Count)
1169  // Unroll factor (Count) must be less or equal to TripCount.
1170  if (TripCount && UP.Count > TripCount)
1171  UP.Count = TripCount;
1172 
1173  if (PP.PeelCount) {
1174  assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1175  LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1176  << " with iteration count " << PP.PeelCount << "!\n");
1177  ORE.emit([&]() {
1178  return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1179  L->getHeader())
1180  << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1181  << " iterations";
1182  });
1183 
1184  if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
1185  simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
1186  // If the loop was peeled, we already "used up" the profile information
1187  // we had, so we don't want to unroll or peel again.
1188  if (PP.PeelProfiledIterations)
1191  }
1193  }
1194 
1195  // Save loop properties before it is transformed.
1196  MDNode *OrigLoopID = L->getLoopID();
1197 
1198  // Unroll the loop.
1199  Loop *RemainderLoop = nullptr;
1200  LoopUnrollResult UnrollResult = UnrollLoop(
1201  L,
1202  {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1203  TripMultiple, UP.UnrollRemainder, ForgetAllSCEV},
1204  LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
1205  if (UnrollResult == LoopUnrollResult::Unmodified)
1207 
1208  if (RemainderLoop) {
1209  Optional<MDNode *> RemainderLoopID =
1212  if (RemainderLoopID.hasValue())
1213  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1214  }
1215 
1216  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1217  Optional<MDNode *> NewLoopID =
1220  if (NewLoopID.hasValue()) {
1221  L->setLoopID(NewLoopID.getValue());
1222 
1223  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1224  // explicitly.
1225  return UnrollResult;
1226  }
1227  }
1228 
1229  // If loop has an unroll count pragma or unrolled by explicitly set count
1230  // mark loop as unrolled to prevent unrolling beyond that requested.
1231  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1233 
1234  return UnrollResult;
1235 }
1236 
1237 namespace {
1238 
1239 class LoopUnroll : public LoopPass {
1240 public:
1241  static char ID; // Pass ID, replacement for typeid
1242 
1243  int OptLevel;
1244 
1245  /// If false, use a cost model to determine whether unrolling of a loop is
1246  /// profitable. If true, only loops that explicitly request unrolling via
1247  /// metadata are considered. All other loops are skipped.
1248  bool OnlyWhenForced;
1249 
1250  /// If false, when SCEV is invalidated, only forget everything in the
1251  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1252  /// Otherwise, forgetAllLoops and rebuild when needed next.
1253  bool ForgetAllSCEV;
1254 
1255  Optional<unsigned> ProvidedCount;
1256  Optional<unsigned> ProvidedThreshold;
1257  Optional<bool> ProvidedAllowPartial;
1258  Optional<bool> ProvidedRuntime;
1259  Optional<bool> ProvidedUpperBound;
1260  Optional<bool> ProvidedAllowPeeling;
1261  Optional<bool> ProvidedAllowProfileBasedPeeling;
1262  Optional<unsigned> ProvidedFullUnrollMaxCount;
1263 
1264  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1265  bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1266  Optional<unsigned> Count = None,
1267  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1268  Optional<bool> UpperBound = None,
1269  Optional<bool> AllowPeeling = None,
1270  Optional<bool> AllowProfileBasedPeeling = None,
1271  Optional<unsigned> ProvidedFullUnrollMaxCount = None)
1272  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1273  ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1274  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1275  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1276  ProvidedAllowPeeling(AllowPeeling),
1277  ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1278  ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1280  }
1281 
1282  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1283  if (skipLoop(L))
1284  return false;
1285 
1286  Function &F = *L->getHeader()->getParent();
1287 
1288  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1289  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1290  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1291  const TargetTransformInfo &TTI =
1292  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1293  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1294  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1295  // pass. Function analyses need to be preserved across loop transformations
1296  // but ORE cannot be preserved (see comment before the pass definition).
1298  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1299 
1301  L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1302  OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
1303  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1304  ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
1305  ProvidedFullUnrollMaxCount);
1306 
1307  if (Result == LoopUnrollResult::FullyUnrolled)
1308  LPM.markLoopAsDeleted(*L);
1309 
1311  }
1312 
1313  /// This transformation requires natural loop information & requires that
1314  /// loop preheaders be inserted into the CFG...
1315  void getAnalysisUsage(AnalysisUsage &AU) const override {
1318  // FIXME: Loop passes are required to preserve domtree, and for now we just
1319  // recreate dom info if anything gets unrolled.
1321  }
1322 };
1323 
1324 } // end anonymous namespace
1325 
1326 char LoopUnroll::ID = 0;
1327 
1328 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1332 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1333 
1334 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1335  bool ForgetAllSCEV, int Threshold, int Count,
1336  int AllowPartial, int Runtime, int UpperBound,
1337  int AllowPeeling) {
1338  // TODO: It would make more sense for this function to take the optionals
1339  // directly, but that's dangerous since it would silently break out of tree
1340  // callers.
1341  return new LoopUnroll(
1342  OptLevel, OnlyWhenForced, ForgetAllSCEV,
1344  Count == -1 ? None : Optional<unsigned>(Count),
1345  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1346  Runtime == -1 ? None : Optional<bool>(Runtime),
1347  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1348  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1349 }
1350 
1351 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1352  bool ForgetAllSCEV) {
1353  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1354  0, 0, 0, 1);
1355 }
1356 
1359  LPMUpdater &Updater) {
1360  // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1361  // pass. Function analyses need to be preserved across loop transformations
1362  // but ORE cannot be preserved (see comment before the pass definition).
1364 
1365  // Keep track of the previous loop structure so we can identify new loops
1366  // created by unrolling.
1367  Loop *ParentL = L.getParentLoop();
1368  SmallPtrSet<Loop *, 4> OldLoops;
1369  if (ParentL)
1370  OldLoops.insert(ParentL->begin(), ParentL->end());
1371  else
1372  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1373 
1374  std::string LoopName = std::string(L.getName());
1375 
1376  bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1377  /*BFI*/ nullptr, /*PSI*/ nullptr,
1378  /*PreserveLCSSA*/ true, OptLevel,
1379  OnlyWhenForced, ForgetSCEV, /*Count*/ None,
1380  /*Threshold*/ None, /*AllowPartial*/ false,
1381  /*Runtime*/ false, /*UpperBound*/ false,
1382  /*AllowPeeling*/ true,
1383  /*AllowProfileBasedPeeling*/ false,
1384  /*FullUnrollMaxCount*/ None) !=
1386  if (!Changed)
1387  return PreservedAnalyses::all();
1388 
1389  // The parent must not be damaged by unrolling!
1390 #ifndef NDEBUG
1391  if (ParentL)
1392  ParentL->verifyLoop();
1393 #endif
1394 
1395  // Unrolling can do several things to introduce new loops into a loop nest:
1396  // - Full unrolling clones child loops within the current loop but then
1397  // removes the current loop making all of the children appear to be new
1398  // sibling loops.
1399  //
1400  // When a new loop appears as a sibling loop after fully unrolling,
1401  // its nesting structure has fundamentally changed and we want to revisit
1402  // it to reflect that.
1403  //
1404  // When unrolling has removed the current loop, we need to tell the
1405  // infrastructure that it is gone.
1406  //
1407  // Finally, we support a debugging/testing mode where we revisit child loops
1408  // as well. These are not expected to require further optimizations as either
1409  // they or the loop they were cloned from have been directly visited already.
1410  // But the debugging mode allows us to check this assumption.
1411  bool IsCurrentLoopValid = false;
1412  SmallVector<Loop *, 4> SibLoops;
1413  if (ParentL)
1414  SibLoops.append(ParentL->begin(), ParentL->end());
1415  else
1416  SibLoops.append(AR.LI.begin(), AR.LI.end());
1417  erase_if(SibLoops, [&](Loop *SibLoop) {
1418  if (SibLoop == &L) {
1419  IsCurrentLoopValid = true;
1420  return true;
1421  }
1422 
1423  // Otherwise erase the loop from the list if it was in the old loops.
1424  return OldLoops.contains(SibLoop);
1425  });
1426  Updater.addSiblingLoops(SibLoops);
1427 
1428  if (!IsCurrentLoopValid) {
1429  Updater.markLoopAsDeleted(L, LoopName);
1430  } else {
1431  // We can only walk child loops if the current loop remained valid.
1433  // Walk *all* of the child loops.
1434  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1435  Updater.addChildLoops(ChildLoops);
1436  }
1437  }
1438 
1440 }
1441 
1444  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1445  auto &LI = AM.getResult<LoopAnalysis>(F);
1446  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1447  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1448  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1450 
1451  LoopAnalysisManager *LAM = nullptr;
1452  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1453  LAM = &LAMProxy->getManager();
1454 
1455  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1456  ProfileSummaryInfo *PSI =
1457  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1458  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1459  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1460 
1461  bool Changed = false;
1462 
1463  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1464  // Since simplification may add new inner loops, it has to run before the
1465  // legality and profitability checks. This means running the loop unroller
1466  // will simplify all loops, regardless of whether anything end up being
1467  // unrolled.
1468  for (auto &L : LI) {
1469  Changed |=
1470  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1471  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1472  }
1473 
1474  // Add the loop nests in the reverse order of LoopInfo. See method
1475  // declaration.
1477  appendLoopsToWorklist(LI, Worklist);
1478 
1479  while (!Worklist.empty()) {
1480  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1481  // from back to front so that we work forward across the CFG, which
1482  // for unrolling is only needed to get optimization remarks emitted in
1483  // a forward order.
1484  Loop &L = *Worklist.pop_back_val();
1485 #ifndef NDEBUG
1486  Loop *ParentL = L.getParentLoop();
1487 #endif
1488 
1489  // Check if the profile summary indicates that the profiled application
1490  // has a huge working set size, in which case we disable peeling to avoid
1491  // bloating it further.
1492  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1493  if (PSI && PSI->hasHugeWorkingSetSize())
1494  LocalAllowPeeling = false;
1495  std::string LoopName = std::string(L.getName());
1496  // The API here is quite complex to call and we allow to select some
1497  // flavors of unrolling during construction time (by setting UnrollOpts).
1499  &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1500  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1501  UnrollOpts.ForgetSCEV, /*Count*/ None,
1502  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1503  UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1504  UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
1505  Changed |= Result != LoopUnrollResult::Unmodified;
1506 
1507  // The parent must not be damaged by unrolling!
1508 #ifndef NDEBUG
1509  if (Result != LoopUnrollResult::Unmodified && ParentL)
1510  ParentL->verifyLoop();
1511 #endif
1512 
1513  // Clear any cached analysis results for L if we removed it completely.
1514  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1515  LAM->clear(L, LoopName);
1516  }
1517 
1518  if (!Changed)
1519  return PreservedAnalyses::all();
1520 
1522 }
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
FlatLoopTripCountThreshold
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1051
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:473
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:452
llvm::hasUnrollTransformation
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:421
unrollCountPragmaValue
static unsigned unrollCountPragmaValue(const Loop *L)
Definition: LoopUnrollPass.cpp:707
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2320
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:480
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:729
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:210
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2105
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:448
llvm
Definition: AllocatorList.h:23
LoopSimplify.h
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
analyzeLoopUnrollCost
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
Definition: LoopUnrollPass.cpp:336
llvm::UnrolledInstAnalyzer
Definition: LoopUnrollAnalyzer.h:39
Optional.h
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:68
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:667
Metadata.h
UnrollFullMaxCount
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
UnrollPartialThreshold
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:464
getUnrollMetadataForLoop
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition: LoopUnrollPass.cpp:683
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
Scalar.h
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:7079
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
StringRef.h
Pass.h
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:293
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
UnrollRevisitChildLoops
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::find
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:179
SizeOpts.h
llvm::LLVMLoopUnrollFollowupRemainder
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
LazyBlockFrequencyInfo.h
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1656
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
hasUnrollFullPragma
static bool hasUnrollFullPragma(const Loop *L)
Definition: LoopUnrollPass.cpp:690
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:328
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:499
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:492
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
UnrollRuntime
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:457
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:213
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::initializeLoopUnrollPass
void initializeLoopUnrollPass(PassRegistry &)
ScalarEvolution.h
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:476
llvm::LoopUnrollOptions::ForgetSCEV
const bool ForgetSCEV
If true, forget all loops when unrolling.
Definition: LoopUnrollPass.h:78
DenseMap.h
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:468
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:656
llvm::Optional< unsigned >
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:403
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:634
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
UnrollMaxPercentThresholdBoost
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
UnrollThresholdDefault
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
loops
loops
Definition: LoopInfo.cpp:1104
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:485
llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition: GCNRegPressure.cpp:55
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
LoopAnalysisManager.h
UnrollCount
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
UnrollThreshold
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::LoopUnrollOptions::OnlyWhenForced
bool OnlyWhenForced
If false, use a cost model to determine whether unrolling of a loop is profitable.
Definition: LoopUnrollPass.h:73
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::end
iterator end()
Definition: DenseSet.h:174
Instruction.h
CommandLine.h
CodeMetrics.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::TM_Enable
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:277
llvm::DenseMapInfo
Definition: APInt.h:34
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:542
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:539
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:857
Constants.h
UnrollMaxUpperBound
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:647
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:488
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::AnalysisManager::clear
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManagerImpl.h:36
llvm::LLVMLoopUnrollFollowupUnrolled
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:42
UnrollAllowRemainder
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: PriorityWorklist.h:154
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:155
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
DenseSet.h
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:502
false
Definition: StackSlotColoring.cpp:142
llvm::simplifyLoopAfterUnroll
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:204
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:494
llvm::LoopUnrollOptions::FullUnrollMaxCount
Optional< unsigned > FullUnrollMaxCount
Definition: LoopUnrollPass.h:67
llvm::Instruction
Definition: Instruction.h:45
llvm::LoopBase::verifyLoop
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:286
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
UnrollCostEstimator::getUnrolledLoopSize
uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollPass.cpp:749
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
LoopUtils.h
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:319
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
llvm::LoopUnrollOptions::AllowPeeling
Optional< bool > AllowPeeling
Definition: LoopUnrollPass.h:63
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
Utils.h
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:863
llvm::None
const NoneType None
Definition: None.h:23
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::createSimpleLoopUnrollPass
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false)
Definition: LoopUnrollPass.cpp:1351
CFG.h
LoopInfo.h
llvm::LoopUnrollOptions::AllowUpperBound
Optional< bool > AllowUpperBound
Definition: LoopUnrollPass.h:65
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::LLVMLoopUnrollFollowupAll
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
UnrollMaxCount
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7043
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1102
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
BasicBlock.h
llvm::cl::opt< bool >
hasUnrollEnablePragma
static bool hasUnrollEnablePragma(const Loop *L)
Definition: LoopUnrollPass.cpp:696
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:268
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
ProfileSummaryInfo.h
tryToUnrollLoop
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling, Optional< bool > ProvidedAllowProfileBasedPeeling, Optional< unsigned > ProvidedFullUnrollMaxCount)
Definition: LoopUnrollPass.cpp:1055
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2376
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:240
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:256
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7059
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:423
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53
llvm::LoopUnrollOptions::OptLevel
int OptLevel
Definition: LoopUnrollPass.h:68
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:68
UnrollThresholdAggressive
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::TM_Disable
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:280
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
UnrollCostEstimator::UnrollCostEstimator
UnrollCostEstimator(Loop &L, unsigned LoopSize)
Definition: LoopUnrollPass.cpp:745
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
NoThreshold
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
Definition: LoopUnrollPass.cpp:181
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:275
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:939
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:260
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
LoopPassManager.h
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
UnrollAllowPartial
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
PragmaUnrollThreshold
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:203
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:533
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1080
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:214
llvm::TM_ForcedByUser
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:288
LoopPass.h
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1532
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:223
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:459
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
UnrollOptSizeThreshold
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:60
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:418
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:294
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::LoopUnrollOptions::AllowProfileBasedPeeling
Optional< bool > AllowProfileBasedPeeling
Definition: LoopUnrollPass.h:66
UnrollUnrollRemainder
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
LoopUnrollAnalyzer.h
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:716
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:764
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
UnrollCostEstimator
Definition: LoopUnrollPass.cpp:741
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:485
llvm::ForgetSCEVInLoopUnroll
cl::opt< bool > ForgetSCEVInLoopUnroll
Constant.h
llvm::LoopUnrollPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopUnrollPass.cpp:1442
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
std
Definition: BitVector.h:838
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:527
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:461
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:113
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:490
LoopUnrollPass.h
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:503
llvm::ScalarEvolution::isBackedgeTakenCountMaxOrZero
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
Definition: ScalarEvolution.cpp:7161
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:788
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:482
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
PassManager.h
UnrollMaxIterationsCountToAnalyze
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:719
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:340
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
isEqual
static bool isEqual(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2130
llvm::LPMUpdater::addChildLoops
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
Definition: LoopPassManager.h:283
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopUnrollOptions::AllowPartial
Optional< bool > AllowPartial
Definition: LoopUnrollPass.h:62
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:940
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
hasRuntimeUnrollDisablePragma
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
Definition: LoopUnrollPass.cpp:701
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopUnrollPass.cpp:74
Dominators.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:431
UnrollLoop.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
DenseMapInfo.h
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:713
llvm::SmallPtrSetImpl< const Value * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:926
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:445
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3179
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3035
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::LPMUpdater::addSiblingLoops
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
Definition: LoopPassManager.h:310
LoopPeel.h
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:831
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:232
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
getFullUnrollBoostingFactor
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Definition: LoopUnrollPass.cpp:725
llvm::TransformationMode
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:271
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:624
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1233
SetVector.h
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:442
llvm::createLoopUnrollPass
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Definition: LoopUnrollPass.cpp:1334
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::LoopUnrollOptions::AllowRuntime
Optional< bool > AllowRuntime
Definition: LoopUnrollPass.h:64
llvm::LoopFullUnrollPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopUnrollPass.cpp:1357
llvm::omp::OMPScheduleType::Runtime
@ Runtime