LLVM  13.0.0git
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop reroller.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/LoopPass.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Module.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Use.h"
44 #include "llvm/IR/User.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/InitializePasses.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Debug.h"
52 #include "llvm/Transforms/Scalar.h"
54 #include "llvm/Transforms/Utils.h"
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <cstdlib>
63 #include <iterator>
64 #include <map>
65 #include <utility>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "loop-reroll"
70 
71 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
72 
73 static cl::opt<unsigned>
74 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
75  cl::Hidden,
76  cl::desc("The maximum number of failures to tolerate"
77  " during fuzzy matching. (default: 400)"));
78 
79 // This loop re-rolling transformation aims to transform loops like this:
80 //
81 // int foo(int a);
82 // void bar(int *x) {
83 // for (int i = 0; i < 500; i += 3) {
84 // foo(i);
85 // foo(i+1);
86 // foo(i+2);
87 // }
88 // }
89 //
90 // into a loop like this:
91 //
92 // void bar(int *x) {
93 // for (int i = 0; i < 500; ++i)
94 // foo(i);
95 // }
96 //
97 // It does this by looking for loops that, besides the latch code, are composed
98 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
99 // to the induction variable, and where each DAG is isomorphic to the DAG
100 // rooted at the induction variable (excepting the sub-DAGs which root the
101 // other induction-variable increments). In other words, we're looking for loop
102 // bodies of the form:
103 //
104 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
105 // f(%iv)
106 // %iv.1 = add %iv, 1 <-- a root increment
107 // f(%iv.1)
108 // %iv.2 = add %iv, 2 <-- a root increment
109 // f(%iv.2)
110 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
111 // f(%iv.scale_m_1)
112 // ...
113 // %iv.next = add %iv, scale
114 // %cmp = icmp(%iv, ...)
115 // br %cmp, header, exit
116 //
117 // where each f(i) is a set of instructions that, collectively, are a function
118 // only of i (and other loop-invariant values).
119 //
120 // As a special case, we can also reroll loops like this:
121 //
122 // int foo(int);
123 // void bar(int *x) {
124 // for (int i = 0; i < 500; ++i) {
125 // x[3*i] = foo(0);
126 // x[3*i+1] = foo(0);
127 // x[3*i+2] = foo(0);
128 // }
129 // }
130 //
131 // into this:
132 //
133 // void bar(int *x) {
134 // for (int i = 0; i < 1500; ++i)
135 // x[i] = foo(0);
136 // }
137 //
138 // in which case, we're looking for inputs like this:
139 //
140 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
141 // %scaled.iv = mul %iv, scale
142 // f(%scaled.iv)
143 // %scaled.iv.1 = add %scaled.iv, 1
144 // f(%scaled.iv.1)
145 // %scaled.iv.2 = add %scaled.iv, 2
146 // f(%scaled.iv.2)
147 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
148 // f(%scaled.iv.scale_m_1)
149 // ...
150 // %iv.next = add %iv, 1
151 // %cmp = icmp(%iv, ...)
152 // br %cmp, header, exit
153 
154 namespace {
155 
156  enum IterationLimits {
157  /// The maximum number of iterations that we'll try and reroll.
158  IL_MaxRerollIterations = 32,
159  /// The bitvector index used by loop induction variables and other
160  /// instructions that belong to all iterations.
161  IL_All,
162  IL_End
163  };
164 
165  class LoopRerollLegacyPass : public LoopPass {
166  public:
167  static char ID; // Pass ID, replacement for typeid
168 
169  LoopRerollLegacyPass() : LoopPass(ID) {
171  }
172 
173  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
174 
175  void getAnalysisUsage(AnalysisUsage &AU) const override {
178  }
179  };
180 
181  class LoopReroll {
182  public:
183  LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
184  TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
185  : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
186  PreserveLCSSA(PreserveLCSSA) {}
187  bool runOnLoop(Loop *L);
188 
189  protected:
190  AliasAnalysis *AA;
191  LoopInfo *LI;
192  ScalarEvolution *SE;
193  TargetLibraryInfo *TLI;
194  DominatorTree *DT;
195  bool PreserveLCSSA;
196 
197  using SmallInstructionVector = SmallVector<Instruction *, 16>;
198  using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
199 
200  // Map between induction variable and its increment
202 
203  // For loop with multiple induction variable, remember the one used only to
204  // control the loop.
205  Instruction *LoopControlIV;
206 
207  // A chain of isomorphic instructions, identified by a single-use PHI
208  // representing a reduction. Only the last value may be used outside the
209  // loop.
210  struct SimpleLoopReduction {
211  SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
212  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
213  add(L);
214  }
215 
216  bool valid() const {
217  return Valid;
218  }
219 
220  Instruction *getPHI() const {
221  assert(Valid && "Using invalid reduction");
222  return Instructions.front();
223  }
224 
225  Instruction *getReducedValue() const {
226  assert(Valid && "Using invalid reduction");
227  return Instructions.back();
228  }
229 
230  Instruction *get(size_t i) const {
231  assert(Valid && "Using invalid reduction");
232  return Instructions[i+1];
233  }
234 
235  Instruction *operator [] (size_t i) const { return get(i); }
236 
237  // The size, ignoring the initial PHI.
238  size_t size() const {
239  assert(Valid && "Using invalid reduction");
240  return Instructions.size()-1;
241  }
242 
243  using iterator = SmallInstructionVector::iterator;
244  using const_iterator = SmallInstructionVector::const_iterator;
245 
246  iterator begin() {
247  assert(Valid && "Using invalid reduction");
248  return std::next(Instructions.begin());
249  }
250 
251  const_iterator begin() const {
252  assert(Valid && "Using invalid reduction");
253  return std::next(Instructions.begin());
254  }
255 
256  iterator end() { return Instructions.end(); }
257  const_iterator end() const { return Instructions.end(); }
258 
259  protected:
260  bool Valid = false;
261  SmallInstructionVector Instructions;
262 
263  void add(Loop *L);
264  };
265 
266  // The set of all reductions, and state tracking of possible reductions
267  // during loop instruction processing.
268  struct ReductionTracker {
269  using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
270 
271  // Add a new possible reduction.
272  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
273 
274  // Setup to track possible reductions corresponding to the provided
275  // rerolling scale. Only reductions with a number of non-PHI instructions
276  // that is divisible by the scale are considered. Three instructions sets
277  // are filled in:
278  // - A set of all possible instructions in eligible reductions.
279  // - A set of all PHIs in eligible reductions
280  // - A set of all reduced values (last instructions) in eligible
281  // reductions.
282  void restrictToScale(uint64_t Scale,
283  SmallInstructionSet &PossibleRedSet,
284  SmallInstructionSet &PossibleRedPHISet,
285  SmallInstructionSet &PossibleRedLastSet) {
286  PossibleRedIdx.clear();
287  PossibleRedIter.clear();
288  Reds.clear();
289 
290  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
291  if (PossibleReds[i].size() % Scale == 0) {
292  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
293  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
294 
295  PossibleRedSet.insert(PossibleReds[i].getPHI());
296  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
297  for (Instruction *J : PossibleReds[i]) {
298  PossibleRedSet.insert(J);
299  PossibleRedIdx[J] = i;
300  }
301  }
302  }
303 
304  // The functions below are used while processing the loop instructions.
305 
306  // Are the two instructions both from reductions, and furthermore, from
307  // the same reduction?
308  bool isPairInSame(Instruction *J1, Instruction *J2) {
309  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
310  if (J1I != PossibleRedIdx.end()) {
311  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
312  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
313  return true;
314  }
315 
316  return false;
317  }
318 
319  // The two provided instructions, the first from the base iteration, and
320  // the second from iteration i, form a matched pair. If these are part of
321  // a reduction, record that fact.
322  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
323  if (PossibleRedIdx.count(J1)) {
324  assert(PossibleRedIdx.count(J2) &&
325  "Recording reduction vs. non-reduction instruction?");
326 
327  PossibleRedIter[J1] = 0;
328  PossibleRedIter[J2] = i;
329 
330  int Idx = PossibleRedIdx[J1];
331  assert(Idx == PossibleRedIdx[J2] &&
332  "Recording pair from different reductions?");
333  Reds.insert(Idx);
334  }
335  }
336 
337  // The functions below can be called after we've finished processing all
338  // instructions in the loop, and we know which reductions were selected.
339 
340  bool validateSelected();
341  void replaceSelected();
342 
343  protected:
344  // The vector of all possible reductions (for any scale).
345  SmallReductionVector PossibleReds;
346 
347  DenseMap<Instruction *, int> PossibleRedIdx;
348  DenseMap<Instruction *, int> PossibleRedIter;
349  DenseSet<int> Reds;
350  };
351 
352  // A DAGRootSet models an induction variable being used in a rerollable
353  // loop. For example,
354  //
355  // x[i*3+0] = y1
356  // x[i*3+1] = y2
357  // x[i*3+2] = y3
358  //
359  // Base instruction -> i*3
360  // +---+----+
361  // / | \
362  // ST[y1] +1 +2 <-- Roots
363  // | |
364  // ST[y2] ST[y3]
365  //
366  // There may be multiple DAGRoots, for example:
367  //
368  // x[i*2+0] = ... (1)
369  // x[i*2+1] = ... (1)
370  // x[i*2+4] = ... (2)
371  // x[i*2+5] = ... (2)
372  // x[(i+1234)*2+5678] = ... (3)
373  // x[(i+1234)*2+5679] = ... (3)
374  //
375  // The loop will be rerolled by adding a new loop induction variable,
376  // one for the Base instruction in each DAGRootSet.
377  //
378  struct DAGRootSet {
379  Instruction *BaseInst;
380  SmallInstructionVector Roots;
381 
382  // The instructions between IV and BaseInst (but not including BaseInst).
383  SmallInstructionSet SubsumedInsts;
384  };
385 
386  // The set of all DAG roots, and state tracking of all roots
387  // for a particular induction variable.
388  struct DAGRootTracker {
389  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
392  bool PreserveLCSSA,
394  Instruction *LoopCtrlIV)
395  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
396  PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
397  LoopControlIV(LoopCtrlIV) {}
398 
399  /// Stage 1: Find all the DAG roots for the induction variable.
400  bool findRoots();
401 
402  /// Stage 2: Validate if the found roots are valid.
403  bool validate(ReductionTracker &Reductions);
404 
405  /// Stage 3: Assuming validate() returned true, perform the
406  /// replacement.
407  /// @param BackedgeTakenCount The backedge-taken count of L.
408  void replace(const SCEV *BackedgeTakenCount);
409 
410  protected:
412 
413  void findRootsRecursive(Instruction *IVU,
414  SmallInstructionSet SubsumedInsts);
415  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
416  bool collectPossibleRoots(Instruction *Base,
417  std::map<int64_t,Instruction*> &Roots);
418  bool validateRootSet(DAGRootSet &DRS);
419 
420  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
421  void collectInLoopUserSet(const SmallInstructionVector &Roots,
422  const SmallInstructionSet &Exclude,
423  const SmallInstructionSet &Final,
425  void collectInLoopUserSet(Instruction *Root,
426  const SmallInstructionSet &Exclude,
427  const SmallInstructionSet &Final,
429 
430  UsesTy::iterator nextInstr(int Val, UsesTy &In,
431  const SmallInstructionSet &Exclude,
432  UsesTy::iterator *StartI=nullptr);
433  bool isBaseInst(Instruction *I);
434  bool isRootInst(Instruction *I);
435  bool instrDependsOn(Instruction *I,
436  UsesTy::iterator Start,
437  UsesTy::iterator End);
438  void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr);
439 
440  LoopReroll *Parent;
441 
442  // Members of Parent, replicated here for brevity.
443  Loop *L;
444  ScalarEvolution *SE;
445  AliasAnalysis *AA;
446  TargetLibraryInfo *TLI;
447  DominatorTree *DT;
448  LoopInfo *LI;
449  bool PreserveLCSSA;
450 
451  // The loop induction variable.
452  Instruction *IV;
453 
454  // Loop step amount.
455  int64_t Inc;
456 
457  // Loop reroll count; if Inc == 1, this records the scaling applied
458  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
459  // If Inc is not 1, Scale = Inc.
460  uint64_t Scale;
461 
462  // The roots themselves.
464 
465  // All increment instructions for IV.
466  SmallInstructionVector LoopIncs;
467 
468  // Map of all instructions in the loop (in order) to the iterations
469  // they are used in (or specially, IL_All for instructions
470  // used in the loop increment mechanism).
471  UsesTy Uses;
472 
473  // Map between induction variable and its increment
475 
476  Instruction *LoopControlIV;
477  };
478 
479  // Check if it is a compare-like instruction whose user is a branch
480  bool isCompareUsedByBranch(Instruction *I) {
481  auto *TI = I->getParent()->getTerminator();
482  if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
483  return false;
484  return I->hasOneUse() && TI->getOperand(0) == I;
485  };
486 
487  bool isLoopControlIV(Loop *L, Instruction *IV);
488  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
489  void collectPossibleReductions(Loop *L,
490  ReductionTracker &Reductions);
491  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header,
492  const SCEV *BackedgeTakenCount, ReductionTracker &Reductions);
493  };
494 
495 } // end anonymous namespace
496 
497 char LoopRerollLegacyPass::ID = 0;
498 
499 INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
500  false, false)
503 INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
504  false)
505 
506 Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
507 
508 // Returns true if the provided instruction is used outside the given loop.
509 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
510 // non-loop blocks to be outside the loop.
512  for (User *U : I->users()) {
513  if (!L->contains(cast<Instruction>(U)))
514  return true;
515  }
516  return false;
517 }
518 
519 // Check if an IV is only used to control the loop. There are two cases:
520 // 1. It only has one use which is loop increment, and the increment is only
521 // used by comparison and the PHI (could has sext with nsw in between), and the
522 // comparison is only used by branch.
523 // 2. It is used by loop increment and the comparison, the loop increment is
524 // only used by the PHI, and the comparison is used only by the branch.
525 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
526  unsigned IVUses = IV->getNumUses();
527  if (IVUses != 2 && IVUses != 1)
528  return false;
529 
530  for (auto *User : IV->users()) {
531  int32_t IncOrCmpUses = User->getNumUses();
532  bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
533 
534  // User can only have one or two uses.
535  if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
536  return false;
537 
538  // Case 1
539  if (IVUses == 1) {
540  // The only user must be the loop increment.
541  // The loop increment must have two uses.
542  if (IsCompInst || IncOrCmpUses != 2)
543  return false;
544  }
545 
546  // Case 2
547  if (IVUses == 2 && IncOrCmpUses != 1)
548  return false;
549 
550  // The users of the IV must be a binary operation or a comparison
551  if (auto *BO = dyn_cast<BinaryOperator>(User)) {
552  if (BO->getOpcode() == Instruction::Add) {
553  // Loop Increment
554  // User of Loop Increment should be either PHI or CMP
555  for (auto *UU : User->users()) {
556  if (PHINode *PN = dyn_cast<PHINode>(UU)) {
557  if (PN != IV)
558  return false;
559  }
560  // Must be a CMP or an ext (of a value with nsw) then CMP
561  else {
562  Instruction *UUser = dyn_cast<Instruction>(UU);
563  // Skip SExt if we are extending an nsw value
564  // TODO: Allow ZExt too
565  if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
566  isa<SExtInst>(UUser))
567  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
568  if (!isCompareUsedByBranch(UUser))
569  return false;
570  }
571  }
572  } else
573  return false;
574  // Compare : can only have one use, and must be branch
575  } else if (!IsCompInst)
576  return false;
577  }
578  return true;
579 }
580 
581 // Collect the list of loop induction variables with respect to which it might
582 // be possible to reroll the loop.
583 void LoopReroll::collectPossibleIVs(Loop *L,
584  SmallInstructionVector &PossibleIVs) {
585  BasicBlock *Header = L->getHeader();
586  for (BasicBlock::iterator I = Header->begin(),
587  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
588  if (!isa<PHINode>(I))
589  continue;
590  if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
591  continue;
592 
593  if (const SCEVAddRecExpr *PHISCEV =
594  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
595  if (PHISCEV->getLoop() != L)
596  continue;
597  if (!PHISCEV->isAffine())
598  continue;
599  auto IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
600  if (IncSCEV) {
601  IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
602  LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
603  << "\n");
604 
605  if (isLoopControlIV(L, &*I)) {
606  assert(!LoopControlIV && "Found two loop control only IV");
607  LoopControlIV = &(*I);
608  LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I
609  << " = " << *PHISCEV << "\n");
610  } else
611  PossibleIVs.push_back(&*I);
612  }
613  }
614  }
615 }
616 
617 // Add the remainder of the reduction-variable chain to the instruction vector
618 // (the initial PHINode has already been added). If successful, the object is
619 // marked as valid.
621  assert(!Valid && "Cannot add to an already-valid chain");
622 
623  // The reduction variable must be a chain of single-use instructions
624  // (including the PHI), except for the last value (which is used by the PHI
625  // and also outside the loop).
626  Instruction *C = Instructions.front();
627  if (C->user_empty())
628  return;
629 
630  do {
631  C = cast<Instruction>(*C->user_begin());
632  if (C->hasOneUse()) {
633  if (!C->isBinaryOp())
634  return;
635 
636  if (!(isa<PHINode>(Instructions.back()) ||
637  C->isSameOperationAs(Instructions.back())))
638  return;
639 
640  Instructions.push_back(C);
641  }
642  } while (C->hasOneUse());
643 
644  if (Instructions.size() < 2 ||
645  !C->isSameOperationAs(Instructions.back()) ||
646  C->use_empty())
647  return;
648 
649  // C is now the (potential) last instruction in the reduction chain.
650  for (User *U : C->users()) {
651  // The only in-loop user can be the initial PHI.
652  if (L->contains(cast<Instruction>(U)))
653  if (cast<Instruction>(U) != Instructions.front())
654  return;
655  }
656 
657  Instructions.push_back(C);
658  Valid = true;
659 }
660 
661 // Collect the vector of possible reduction variables.
662 void LoopReroll::collectPossibleReductions(Loop *L,
663  ReductionTracker &Reductions) {
664  BasicBlock *Header = L->getHeader();
665  for (BasicBlock::iterator I = Header->begin(),
666  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
667  if (!isa<PHINode>(I))
668  continue;
669  if (!I->getType()->isSingleValueType())
670  continue;
671 
672  SimpleLoopReduction SLR(&*I, L);
673  if (!SLR.valid())
674  continue;
675 
676  LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with "
677  << SLR.size() << " chained instructions)\n");
678  Reductions.addSLR(SLR);
679  }
680 }
681 
682 // Collect the set of all users of the provided root instruction. This set of
683 // users contains not only the direct users of the root instruction, but also
684 // all users of those users, and so on. There are two exceptions:
685 //
686 // 1. Instructions in the set of excluded instructions are never added to the
687 // use set (even if they are users). This is used, for example, to exclude
688 // including root increments in the use set of the primary IV.
689 //
690 // 2. Instructions in the set of final instructions are added to the use set
691 // if they are users, but their users are not added. This is used, for
692 // example, to prevent a reduction update from forcing all later reduction
693 // updates into the use set.
694 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
695  Instruction *Root, const SmallInstructionSet &Exclude,
696  const SmallInstructionSet &Final,
698  SmallInstructionVector Queue(1, Root);
699  while (!Queue.empty()) {
700  Instruction *I = Queue.pop_back_val();
701  if (!Users.insert(I).second)
702  continue;
703 
704  if (!Final.count(I))
705  for (Use &U : I->uses()) {
706  Instruction *User = cast<Instruction>(U.getUser());
707  if (PHINode *PN = dyn_cast<PHINode>(User)) {
708  // Ignore "wrap-around" uses to PHIs of this loop's header.
709  if (PN->getIncomingBlock(U) == L->getHeader())
710  continue;
711  }
712 
713  if (L->contains(User) && !Exclude.count(User)) {
714  Queue.push_back(User);
715  }
716  }
717 
718  // We also want to collect single-user "feeder" values.
719  for (Use &U : I->operands()) {
720  if (Instruction *Op = dyn_cast<Instruction>(U))
721  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
722  !Final.count(Op))
723  Queue.push_back(Op);
724  }
725  }
726 }
727 
728 // Collect all of the users of all of the provided root instructions (combined
729 // into a single set).
730 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
731  const SmallInstructionVector &Roots,
732  const SmallInstructionSet &Exclude,
733  const SmallInstructionSet &Final,
735  for (Instruction *Root : Roots)
736  collectInLoopUserSet(Root, Exclude, Final, Users);
737 }
738 
740  if (LoadInst *LI = dyn_cast<LoadInst>(I))
741  return LI->isUnordered();
742  if (StoreInst *SI = dyn_cast<StoreInst>(I))
743  return SI->isUnordered();
744  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
745  return !MI->isVolatile();
746  return false;
747 }
748 
749 /// Return true if IVU is a "simple" arithmetic operation.
750 /// This is used for narrowing the search space for DAGRoots; only arithmetic
751 /// and GEPs can be part of a DAGRoot.
752 static bool isSimpleArithmeticOp(User *IVU) {
753  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
754  switch (I->getOpcode()) {
755  default: return false;
756  case Instruction::Add:
757  case Instruction::Sub:
758  case Instruction::Mul:
759  case Instruction::Shl:
760  case Instruction::AShr:
761  case Instruction::LShr:
762  case Instruction::GetElementPtr:
763  case Instruction::Trunc:
764  case Instruction::ZExt:
765  case Instruction::SExt:
766  return true;
767  }
768  }
769  return false;
770 }
771 
772 static bool isLoopIncrement(User *U, Instruction *IV) {
773  BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
774 
775  if ((BO && BO->getOpcode() != Instruction::Add) ||
776  (!BO && !isa<GetElementPtrInst>(U)))
777  return false;
778 
779  for (auto *UU : U->users()) {
780  PHINode *PN = dyn_cast<PHINode>(UU);
781  if (PN && PN == IV)
782  return true;
783  }
784  return false;
785 }
786 
787 bool LoopReroll::DAGRootTracker::
788 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
789  SmallInstructionVector BaseUsers;
790 
791  for (auto *I : Base->users()) {
792  ConstantInt *CI = nullptr;
793 
794  if (isLoopIncrement(I, IV)) {
795  LoopIncs.push_back(cast<Instruction>(I));
796  continue;
797  }
798 
799  // The root nodes must be either GEPs, ORs or ADDs.
800  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
801  if (BO->getOpcode() == Instruction::Add ||
802  BO->getOpcode() == Instruction::Or)
803  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
804  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
805  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
806  CI = dyn_cast<ConstantInt>(LastOperand);
807  }
808 
809  if (!CI) {
810  if (Instruction *II = dyn_cast<Instruction>(I)) {
811  BaseUsers.push_back(II);
812  continue;
813  } else {
814  LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I
815  << "\n");
816  return false;
817  }
818  }
819 
820  int64_t V = std::abs(CI->getValue().getSExtValue());
821  if (Roots.find(V) != Roots.end())
822  // No duplicates, please.
823  return false;
824 
825  Roots[V] = cast<Instruction>(I);
826  }
827 
828  // Make sure we have at least two roots.
829  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
830  return false;
831 
832  // If we found non-loop-inc, non-root users of Base, assume they are
833  // for the zeroth root index. This is because "add %a, 0" gets optimized
834  // away.
835  if (BaseUsers.size()) {
836  if (Roots.find(0) != Roots.end()) {
837  LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
838  return false;
839  }
840  Roots[0] = Base;
841  }
842 
843  // Calculate the number of users of the base, or lowest indexed, iteration.
844  unsigned NumBaseUses = BaseUsers.size();
845  if (NumBaseUses == 0)
846  NumBaseUses = Roots.begin()->second->getNumUses();
847 
848  // Check that every node has the same number of users.
849  for (auto &KV : Roots) {
850  if (KV.first == 0)
851  continue;
852  if (!KV.second->hasNUses(NumBaseUses)) {
853  LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
854  << "#Base=" << NumBaseUses
855  << ", #Root=" << KV.second->getNumUses() << "\n");
856  return false;
857  }
858  }
859 
860  return true;
861 }
862 
863 void LoopReroll::DAGRootTracker::
864 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
865  // Does the user look like it could be part of a root set?
866  // All its users must be simple arithmetic ops.
867  if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
868  return;
869 
870  if (I != IV && findRootsBase(I, SubsumedInsts))
871  return;
872 
873  SubsumedInsts.insert(I);
874 
875  for (User *V : I->users()) {
876  Instruction *I = cast<Instruction>(V);
877  if (is_contained(LoopIncs, I))
878  continue;
879 
880  if (!isSimpleArithmeticOp(I))
881  continue;
882 
883  // The recursive call makes a copy of SubsumedInsts.
884  findRootsRecursive(I, SubsumedInsts);
885  }
886 }
887 
888 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
889  if (DRS.Roots.empty())
890  return false;
891 
892  // If the value of the base instruction is used outside the loop, we cannot
893  // reroll the loop. Check for other root instructions is unnecessary because
894  // they don't match any base instructions if their values are used outside.
895  if (hasUsesOutsideLoop(DRS.BaseInst, L))
896  return false;
897 
898  // Consider a DAGRootSet with N-1 roots (so N different values including
899  // BaseInst).
900  // Define d = Roots[0] - BaseInst, which should be the same as
901  // Roots[I] - Roots[I-1] for all I in [1..N).
902  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
903  // loop iteration J.
904  //
905  // Now, For the loop iterations to be consecutive:
906  // D = d * N
907  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
908  if (!ADR)
909  return false;
910 
911  // Check that the first root is evenly spaced.
912  unsigned N = DRS.Roots.size() + 1;
913  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
914  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
915  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
916  return false;
917 
918  // Check that the remainling roots are evenly spaced.
919  for (unsigned i = 1; i < N - 1; ++i) {
920  const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
921  SE->getSCEV(DRS.Roots[i-1]));
922  if (NewStepSCEV != StepSCEV)
923  return false;
924  }
925 
926  return true;
927 }
928 
929 bool LoopReroll::DAGRootTracker::
930 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
931  // The base of a RootSet must be an AddRec, so it can be erased.
932  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
933  if (!IVU_ADR || IVU_ADR->getLoop() != L)
934  return false;
935 
936  std::map<int64_t, Instruction*> V;
937  if (!collectPossibleRoots(IVU, V))
938  return false;
939 
940  // If we didn't get a root for index zero, then IVU must be
941  // subsumed.
942  if (V.find(0) == V.end())
943  SubsumedInsts.insert(IVU);
944 
945  // Partition the vector into monotonically increasing indexes.
946  DAGRootSet DRS;
947  DRS.BaseInst = nullptr;
948 
949  SmallVector<DAGRootSet, 16> PotentialRootSets;
950 
951  for (auto &KV : V) {
952  if (!DRS.BaseInst) {
953  DRS.BaseInst = KV.second;
954  DRS.SubsumedInsts = SubsumedInsts;
955  } else if (DRS.Roots.empty()) {
956  DRS.Roots.push_back(KV.second);
957  } else if (V.find(KV.first - 1) != V.end()) {
958  DRS.Roots.push_back(KV.second);
959  } else {
960  // Linear sequence terminated.
961  if (!validateRootSet(DRS))
962  return false;
963 
964  // Construct a new DAGRootSet with the next sequence.
965  PotentialRootSets.push_back(DRS);
966  DRS.BaseInst = KV.second;
967  DRS.Roots.clear();
968  }
969  }
970 
971  if (!validateRootSet(DRS))
972  return false;
973 
974  PotentialRootSets.push_back(DRS);
975 
976  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
977 
978  return true;
979 }
980 
981 bool LoopReroll::DAGRootTracker::findRoots() {
982  Inc = IVToIncMap[IV];
983 
984  assert(RootSets.empty() && "Unclean state!");
985  if (std::abs(Inc) == 1) {
986  for (auto *IVU : IV->users()) {
987  if (isLoopIncrement(IVU, IV))
988  LoopIncs.push_back(cast<Instruction>(IVU));
989  }
990  findRootsRecursive(IV, SmallInstructionSet());
991  LoopIncs.push_back(IV);
992  } else {
993  if (!findRootsBase(IV, SmallInstructionSet()))
994  return false;
995  }
996 
997  // Ensure all sets have the same size.
998  if (RootSets.empty()) {
999  LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
1000  return false;
1001  }
1002  for (auto &V : RootSets) {
1003  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
1004  LLVM_DEBUG(
1005  dbgs()
1006  << "LRR: Aborting because not all root sets have the same size\n");
1007  return false;
1008  }
1009  }
1010 
1011  Scale = RootSets[0].Roots.size() + 1;
1012 
1013  if (Scale > IL_MaxRerollIterations) {
1014  LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1015  << "#Found=" << Scale
1016  << ", #Max=" << IL_MaxRerollIterations << "\n");
1017  return false;
1018  }
1019 
1020  LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale
1021  << "\n");
1022 
1023  return true;
1024 }
1025 
1026 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1027  // Populate the MapVector with all instructions in the block, in order first,
1028  // so we can iterate over the contents later in perfect order.
1029  for (auto &I : *L->getHeader()) {
1030  Uses[&I].resize(IL_End);
1031  }
1032 
1033  SmallInstructionSet Exclude;
1034  for (auto &DRS : RootSets) {
1035  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1036  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1037  Exclude.insert(DRS.BaseInst);
1038  }
1039  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1040 
1041  for (auto &DRS : RootSets) {
1042  DenseSet<Instruction*> VBase;
1043  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1044  for (auto *I : VBase) {
1045  Uses[I].set(0);
1046  }
1047 
1048  unsigned Idx = 1;
1049  for (auto *Root : DRS.Roots) {
1051  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1052 
1053  // While we're here, check the use sets are the same size.
1054  if (V.size() != VBase.size()) {
1055  LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1056  return false;
1057  }
1058 
1059  for (auto *I : V) {
1060  Uses[I].set(Idx);
1061  }
1062  ++Idx;
1063  }
1064 
1065  // Make sure our subsumed instructions are remembered too.
1066  for (auto *I : DRS.SubsumedInsts) {
1067  Uses[I].set(IL_All);
1068  }
1069  }
1070 
1071  // Make sure the loop increments are also accounted for.
1072 
1073  Exclude.clear();
1074  for (auto &DRS : RootSets) {
1075  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1076  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1077  Exclude.insert(DRS.BaseInst);
1078  }
1079 
1081  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1082  for (auto *I : V) {
1083  if (I->mayHaveSideEffects()) {
1084  LLVM_DEBUG(dbgs() << "LRR: Aborting - "
1085  << "An instruction which does not belong to any root "
1086  << "sets must not have side effects: " << *I);
1087  return false;
1088  }
1089  Uses[I].set(IL_All);
1090  }
1091 
1092  return true;
1093 }
1094 
1095 /// Get the next instruction in "In" that is a member of set Val.
1096 /// Start searching from StartI, and do not return anything in Exclude.
1097 /// If StartI is not given, start from In.begin().
1099 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1100  const SmallInstructionSet &Exclude,
1101  UsesTy::iterator *StartI) {
1102  UsesTy::iterator I = StartI ? *StartI : In.begin();
1103  while (I != In.end() && (I->second.test(Val) == 0 ||
1104  Exclude.contains(I->first)))
1105  ++I;
1106  return I;
1107 }
1108 
1109 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1110  for (auto &DRS : RootSets) {
1111  if (DRS.BaseInst == I)
1112  return true;
1113  }
1114  return false;
1115 }
1116 
1117 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1118  for (auto &DRS : RootSets) {
1119  if (is_contained(DRS.Roots, I))
1120  return true;
1121  }
1122  return false;
1123 }
1124 
1125 /// Return true if instruction I depends on any instruction between
1126 /// Start and End.
1127 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1128  UsesTy::iterator Start,
1129  UsesTy::iterator End) {
1130  for (auto *U : I->users()) {
1131  for (auto It = Start; It != End; ++It)
1132  if (U == It->first)
1133  return true;
1134  }
1135  return false;
1136 }
1137 
1138 static bool isIgnorableInst(const Instruction *I) {
1139  if (isa<DbgInfoIntrinsic>(I))
1140  return true;
1141  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1142  if (!II)
1143  return false;
1144  switch (II->getIntrinsicID()) {
1145  default:
1146  return false;
1147  case Intrinsic::annotation:
1148  case Intrinsic::ptr_annotation:
1149  case Intrinsic::var_annotation:
1150  // TODO: the following intrinsics may also be allowed:
1151  // lifetime_start, lifetime_end, invariant_start, invariant_end
1152  return true;
1153  }
1154  return false;
1155 }
1156 
1157 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1158  // We now need to check for equivalence of the use graph of each root with
1159  // that of the primary induction variable (excluding the roots). Our goal
1160  // here is not to solve the full graph isomorphism problem, but rather to
1161  // catch common cases without a lot of work. As a result, we will assume
1162  // that the relative order of the instructions in each unrolled iteration
1163  // is the same (although we will not make an assumption about how the
1164  // different iterations are intermixed). Note that while the order must be
1165  // the same, the instructions may not be in the same basic block.
1166 
1167  // An array of just the possible reductions for this scale factor. When we
1168  // collect the set of all users of some root instructions, these reduction
1169  // instructions are treated as 'final' (their uses are not considered).
1170  // This is important because we don't want the root use set to search down
1171  // the reduction chain.
1172  SmallInstructionSet PossibleRedSet;
1173  SmallInstructionSet PossibleRedLastSet;
1174  SmallInstructionSet PossibleRedPHISet;
1175  Reductions.restrictToScale(Scale, PossibleRedSet,
1176  PossibleRedPHISet, PossibleRedLastSet);
1177 
1178  // Populate "Uses" with where each instruction is used.
1179  if (!collectUsedInstructions(PossibleRedSet))
1180  return false;
1181 
1182  // Make sure we mark the reduction PHIs as used in all iterations.
1183  for (auto *I : PossibleRedPHISet) {
1184  Uses[I].set(IL_All);
1185  }
1186 
1187  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1188  // comment above LoopReroll::isLoopControlIV for more information.
1189  BasicBlock *Header = L->getHeader();
1190  if (LoopControlIV && LoopControlIV != IV) {
1191  for (auto *U : LoopControlIV->users()) {
1192  Instruction *IVUser = dyn_cast<Instruction>(U);
1193  // IVUser could be loop increment or compare
1194  Uses[IVUser].set(IL_All);
1195  for (auto *UU : IVUser->users()) {
1196  Instruction *UUser = dyn_cast<Instruction>(UU);
1197  // UUser could be compare, PHI or branch
1198  Uses[UUser].set(IL_All);
1199  // Skip SExt
1200  if (isa<SExtInst>(UUser)) {
1201  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1202  Uses[UUser].set(IL_All);
1203  }
1204  // Is UUser a compare instruction?
1205  if (UU->hasOneUse()) {
1206  Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1207  if (BI == cast<BranchInst>(Header->getTerminator()))
1208  Uses[BI].set(IL_All);
1209  }
1210  }
1211  }
1212  }
1213 
1214  // Make sure all instructions in the loop are in one and only one
1215  // set.
1216  for (auto &KV : Uses) {
1217  if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1218  LLVM_DEBUG(
1219  dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1220  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1221  return false;
1222  }
1223  }
1224 
1225  LLVM_DEBUG(for (auto &KV
1226  : Uses) {
1227  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1228  });
1229 
1230  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1231  // In addition to regular aliasing information, we need to look for
1232  // instructions from later (future) iterations that have side effects
1233  // preventing us from reordering them past other instructions with side
1234  // effects.
1235  bool FutureSideEffects = false;
1236  AliasSetTracker AST(*AA);
1237  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1239 
1240  // Compare iteration Iter to the base.
1241  SmallInstructionSet Visited;
1242  auto BaseIt = nextInstr(0, Uses, Visited);
1243  auto RootIt = nextInstr(Iter, Uses, Visited);
1244  auto LastRootIt = Uses.begin();
1245 
1246  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1247  Instruction *BaseInst = BaseIt->first;
1248  Instruction *RootInst = RootIt->first;
1249 
1250  // Skip over the IV or root instructions; only match their users.
1251  bool Continue = false;
1252  if (isBaseInst(BaseInst)) {
1253  Visited.insert(BaseInst);
1254  BaseIt = nextInstr(0, Uses, Visited);
1255  Continue = true;
1256  }
1257  if (isRootInst(RootInst)) {
1258  LastRootIt = RootIt;
1259  Visited.insert(RootInst);
1260  RootIt = nextInstr(Iter, Uses, Visited);
1261  Continue = true;
1262  }
1263  if (Continue) continue;
1264 
1265  if (!BaseInst->isSameOperationAs(RootInst)) {
1266  // Last chance saloon. We don't try and solve the full isomorphism
1267  // problem, but try and at least catch the case where two instructions
1268  // *of different types* are round the wrong way. We won't be able to
1269  // efficiently tell, given two ADD instructions, which way around we
1270  // should match them, but given an ADD and a SUB, we can at least infer
1271  // which one is which.
1272  //
1273  // This should allow us to deal with a greater subset of the isomorphism
1274  // problem. It does however change a linear algorithm into a quadratic
1275  // one, so limit the number of probes we do.
1276  auto TryIt = RootIt;
1277  unsigned N = NumToleratedFailedMatches;
1278  while (TryIt != Uses.end() &&
1279  !BaseInst->isSameOperationAs(TryIt->first) &&
1280  N--) {
1281  ++TryIt;
1282  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1283  }
1284 
1285  if (TryIt == Uses.end() || TryIt == RootIt ||
1286  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1287  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1288  << *BaseInst << " vs. " << *RootInst << "\n");
1289  return false;
1290  }
1291 
1292  RootIt = TryIt;
1293  RootInst = TryIt->first;
1294  }
1295 
1296  // All instructions between the last root and this root
1297  // may belong to some other iteration. If they belong to a
1298  // future iteration, then they're dangerous to alias with.
1299  //
1300  // Note that because we allow a limited amount of flexibility in the order
1301  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1302  // case we've already checked this set of instructions so we shouldn't
1303  // do anything.
1304  for (; LastRootIt < RootIt; ++LastRootIt) {
1305  Instruction *I = LastRootIt->first;
1306  if (LastRootIt->second.find_first() < (int)Iter)
1307  continue;
1308  if (I->mayWriteToMemory())
1309  AST.add(I);
1310  // Note: This is specifically guarded by a check on isa<PHINode>,
1311  // which while a valid (somewhat arbitrary) micro-optimization, is
1312  // needed because otherwise isSafeToSpeculativelyExecute returns
1313  // false on PHI nodes.
1314  if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1316  // Intervening instructions cause side effects.
1317  FutureSideEffects = true;
1318  }
1319 
1320  // Make sure that this instruction, which is in the use set of this
1321  // root instruction, does not also belong to the base set or the set of
1322  // some other root instruction.
1323  if (RootIt->second.count() > 1) {
1324  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1325  << " vs. " << *RootInst << " (prev. case overlap)\n");
1326  return false;
1327  }
1328 
1329  // Make sure that we don't alias with any instruction in the alias set
1330  // tracker. If we do, then we depend on a future iteration, and we
1331  // can't reroll.
1332  if (RootInst->mayReadFromMemory())
1333  for (auto &K : AST) {
1334  if (K.aliasesUnknownInst(RootInst, *AA)) {
1335  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1336  << *BaseInst << " vs. " << *RootInst
1337  << " (depends on future store)\n");
1338  return false;
1339  }
1340  }
1341 
1342  // If we've past an instruction from a future iteration that may have
1343  // side effects, and this instruction might also, then we can't reorder
1344  // them, and this matching fails. As an exception, we allow the alias
1345  // set tracker to handle regular (unordered) load/store dependencies.
1346  if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1347  !isSafeToSpeculativelyExecute(BaseInst)) ||
1348  (!isUnorderedLoadStore(RootInst) &&
1349  !isSafeToSpeculativelyExecute(RootInst)))) {
1350  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1351  << " vs. " << *RootInst
1352  << " (side effects prevent reordering)\n");
1353  return false;
1354  }
1355 
1356  // For instructions that are part of a reduction, if the operation is
1357  // associative, then don't bother matching the operands (because we
1358  // already know that the instructions are isomorphic, and the order
1359  // within the iteration does not matter). For non-associative reductions,
1360  // we do need to match the operands, because we need to reject
1361  // out-of-order instructions within an iteration!
1362  // For example (assume floating-point addition), we need to reject this:
1363  // x += a[i]; x += b[i];
1364  // x += a[i+1]; x += b[i+1];
1365  // x += b[i+2]; x += a[i+2];
1366  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1367 
1368  if (!(InReduction && BaseInst->isAssociative())) {
1369  bool Swapped = false, SomeOpMatched = false;
1370  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1371  Value *Op2 = RootInst->getOperand(j);
1372 
1373  // If this is part of a reduction (and the operation is not
1374  // associatve), then we match all operands, but not those that are
1375  // part of the reduction.
1376  if (InReduction)
1377  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1378  if (Reductions.isPairInSame(RootInst, Op2I))
1379  continue;
1380 
1381  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1382  if (BMI != BaseMap.end()) {
1383  Op2 = BMI->second;
1384  } else {
1385  for (auto &DRS : RootSets) {
1386  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1387  Op2 = DRS.BaseInst;
1388  break;
1389  }
1390  }
1391  }
1392 
1393  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1394  // If we've not already decided to swap the matched operands, and
1395  // we've not already matched our first operand (note that we could
1396  // have skipped matching the first operand because it is part of a
1397  // reduction above), and the instruction is commutative, then try
1398  // the swapped match.
1399  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1400  BaseInst->getOperand(!j) == Op2) {
1401  Swapped = true;
1402  } else {
1403  LLVM_DEBUG(dbgs()
1404  << "LRR: iteration root match failed at " << *BaseInst
1405  << " vs. " << *RootInst << " (operand " << j << ")\n");
1406  return false;
1407  }
1408  }
1409 
1410  SomeOpMatched = true;
1411  }
1412  }
1413 
1414  if ((!PossibleRedLastSet.count(BaseInst) &&
1415  hasUsesOutsideLoop(BaseInst, L)) ||
1416  (!PossibleRedLastSet.count(RootInst) &&
1417  hasUsesOutsideLoop(RootInst, L))) {
1418  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1419  << " vs. " << *RootInst << " (uses outside loop)\n");
1420  return false;
1421  }
1422 
1423  Reductions.recordPair(BaseInst, RootInst, Iter);
1424  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1425 
1426  LastRootIt = RootIt;
1427  Visited.insert(BaseInst);
1428  Visited.insert(RootInst);
1429  BaseIt = nextInstr(0, Uses, Visited);
1430  RootIt = nextInstr(Iter, Uses, Visited);
1431  }
1432  assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1433  "Mismatched set sizes!");
1434  }
1435 
1436  LLVM_DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *IV
1437  << "\n");
1438 
1439  return true;
1440 }
1441 
1442 void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) {
1443  BasicBlock *Header = L->getHeader();
1444 
1445  // Compute the start and increment for each BaseInst before we start erasing
1446  // instructions.
1447  SmallVector<const SCEV *, 8> StartExprs;
1448  SmallVector<const SCEV *, 8> IncrExprs;
1449  for (auto &DRS : RootSets) {
1450  const SCEVAddRecExpr *IVSCEV =
1451  cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
1452  StartExprs.push_back(IVSCEV->getStart());
1453  IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV));
1454  }
1455 
1456  // Remove instructions associated with non-base iterations.
1457  for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
1458  J != JE;) {
1459  unsigned I = Uses[&*J].find_first();
1460  if (I > 0 && I < IL_All) {
1461  LLVM_DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
1462  J++->eraseFromParent();
1463  continue;
1464  }
1465 
1466  ++J;
1467  }
1468 
1469  // Rewrite each BaseInst using SCEV.
1470  for (size_t i = 0, e = RootSets.size(); i != e; ++i)
1471  // Insert the new induction variable.
1472  replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]);
1473 
1474  { // Limit the lifetime of SCEVExpander.
1475  BranchInst *BI = cast<BranchInst>(Header->getTerminator());
1476  const DataLayout &DL = Header->getModule()->getDataLayout();
1477  SCEVExpander Expander(*SE, DL, "reroll");
1478  auto Zero = SE->getZero(BackedgeTakenCount->getType());
1479  auto One = SE->getOne(BackedgeTakenCount->getType());
1480  auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap);
1481  Value *NewIV =
1482  Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(),
1483  Header->getFirstNonPHIOrDbg());
1484  // FIXME: This arithmetic can overflow.
1485  auto TripCount = SE->getAddExpr(BackedgeTakenCount, One);
1486  auto ScaledTripCount = SE->getMulExpr(
1487  TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale));
1488  auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One);
1489  Value *TakenCount =
1490  Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(),
1491  Header->getFirstNonPHIOrDbg());
1492  Value *Cond =
1493  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond");
1494  BI->setCondition(Cond);
1495 
1496  if (BI->getSuccessor(1) != Header)
1497  BI->swapSuccessors();
1498  }
1499 
1500  SimplifyInstructionsInBlock(Header, TLI);
1501  DeleteDeadPHIs(Header, TLI);
1502 }
1503 
1504 void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS,
1505  const SCEV *Start,
1506  const SCEV *IncrExpr) {
1507  BasicBlock *Header = L->getHeader();
1508  Instruction *Inst = DRS.BaseInst;
1509 
1510  const SCEV *NewIVSCEV =
1511  SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1512 
1513  { // Limit the lifetime of SCEVExpander.
1514  const DataLayout &DL = Header->getModule()->getDataLayout();
1515  SCEVExpander Expander(*SE, DL, "reroll");
1516  Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1517  Header->getFirstNonPHIOrDbg());
1518 
1519  for (auto &KV : Uses)
1520  if (KV.second.find_first() == 0)
1521  KV.first->replaceUsesOfWith(Inst, NewIV);
1522  }
1523 }
1524 
1525 // Validate the selected reductions. All iterations must have an isomorphic
1526 // part of the reduction chain and, for non-associative reductions, the chain
1527 // entries must appear in order.
1528 bool LoopReroll::ReductionTracker::validateSelected() {
1529  // For a non-associative reduction, the chain entries must appear in order.
1530  for (int i : Reds) {
1531  int PrevIter = 0, BaseCount = 0, Count = 0;
1532  for (Instruction *J : PossibleReds[i]) {
1533  // Note that all instructions in the chain must have been found because
1534  // all instructions in the function must have been assigned to some
1535  // iteration.
1536  int Iter = PossibleRedIter[J];
1537  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1538  !PossibleReds[i].getReducedValue()->isAssociative()) {
1539  LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: "
1540  << J << "\n");
1541  return false;
1542  }
1543 
1544  if (Iter != PrevIter) {
1545  if (Count != BaseCount) {
1546  LLVM_DEBUG(dbgs()
1547  << "LRR: Iteration " << PrevIter << " reduction use count "
1548  << Count << " is not equal to the base use count "
1549  << BaseCount << "\n");
1550  return false;
1551  }
1552 
1553  Count = 0;
1554  }
1555 
1556  ++Count;
1557  if (Iter == 0)
1558  ++BaseCount;
1559 
1560  PrevIter = Iter;
1561  }
1562  }
1563 
1564  return true;
1565 }
1566 
1567 // For all selected reductions, remove all parts except those in the first
1568 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1569 // of the first-iteration reduced value (in other words, reroll the selected
1570 // reductions).
1571 void LoopReroll::ReductionTracker::replaceSelected() {
1572  // Fixup reductions to refer to the last instruction associated with the
1573  // first iteration (not the last).
1574  for (int i : Reds) {
1575  int j = 0;
1576  for (int e = PossibleReds[i].size(); j != e; ++j)
1577  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1578  --j;
1579  break;
1580  }
1581 
1582  // Replace users with the new end-of-chain value.
1583  SmallInstructionVector Users;
1584  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1585  Users.push_back(cast<Instruction>(U));
1586  }
1587 
1588  for (Instruction *User : Users)
1589  User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1590  PossibleReds[i][j]);
1591  }
1592 }
1593 
1594 // Reroll the provided loop with respect to the provided induction variable.
1595 // Generally, we're looking for a loop like this:
1596 //
1597 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1598 // f(%iv)
1599 // %iv.1 = add %iv, 1 <-- a root increment
1600 // f(%iv.1)
1601 // %iv.2 = add %iv, 2 <-- a root increment
1602 // f(%iv.2)
1603 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1604 // f(%iv.scale_m_1)
1605 // ...
1606 // %iv.next = add %iv, scale
1607 // %cmp = icmp(%iv, ...)
1608 // br %cmp, header, exit
1609 //
1610 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1611 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1612 // be intermixed with eachother. The restriction imposed by this algorithm is
1613 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1614 // etc. be the same.
1615 //
1616 // First, we collect the use set of %iv, excluding the other increment roots.
1617 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1618 // times, having collected the use set of f(%iv.(i+1)), during which we:
1619 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1620 // the next unmatched instruction in f(%iv.(i+1)).
1621 // - Ensure that both matched instructions don't have any external users
1622 // (with the exception of last-in-chain reduction instructions).
1623 // - Track the (aliasing) write set, and other side effects, of all
1624 // instructions that belong to future iterations that come before the matched
1625 // instructions. If the matched instructions read from that write set, then
1626 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1627 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1628 // if any of these future instructions had side effects (could not be
1629 // speculatively executed), and so do the matched instructions, when we
1630 // cannot reorder those side-effect-producing instructions, and rerolling
1631 // fails.
1632 //
1633 // Finally, we make sure that all loop instructions are either loop increment
1634 // roots, belong to simple latch code, parts of validated reductions, part of
1635 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1636 // have been validated), then we reroll the loop.
1637 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1638  const SCEV *BackedgeTakenCount,
1639  ReductionTracker &Reductions) {
1640  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1641  IVToIncMap, LoopControlIV);
1642 
1643  if (!DAGRoots.findRoots())
1644  return false;
1645  LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV
1646  << "\n");
1647 
1648  if (!DAGRoots.validate(Reductions))
1649  return false;
1650  if (!Reductions.validateSelected())
1651  return false;
1652  // At this point, we've validated the rerolling, and we're committed to
1653  // making changes!
1654 
1655  Reductions.replaceSelected();
1656  DAGRoots.replace(BackedgeTakenCount);
1657 
1658  ++NumRerolledLoops;
1659  return true;
1660 }
1661 
1662 bool LoopReroll::runOnLoop(Loop *L) {
1663  BasicBlock *Header = L->getHeader();
1664  LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
1665  << Header->getName() << " (" << L->getNumBlocks()
1666  << " block(s))\n");
1667 
1668  // For now, we'll handle only single BB loops.
1669  if (L->getNumBlocks() > 1)
1670  return false;
1671 
1673  return false;
1674 
1675  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1676  LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1677  LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount
1678  << "\n");
1679 
1680  // First, we need to find the induction variable with respect to which we can
1681  // reroll (there may be several possible options).
1682  SmallInstructionVector PossibleIVs;
1683  IVToIncMap.clear();
1684  LoopControlIV = nullptr;
1685  collectPossibleIVs(L, PossibleIVs);
1686 
1687  if (PossibleIVs.empty()) {
1688  LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n");
1689  return false;
1690  }
1691 
1692  ReductionTracker Reductions;
1693  collectPossibleReductions(L, Reductions);
1694  bool Changed = false;
1695 
1696  // For each possible IV, collect the associated possible set of 'root' nodes
1697  // (i+1, i+2, etc.).
1698  for (Instruction *PossibleIV : PossibleIVs)
1699  if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) {
1700  Changed = true;
1701  break;
1702  }
1703  LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1704 
1705  // Trip count of L has changed so SE must be re-evaluated.
1706  if (Changed)
1707  SE->forgetLoop(L);
1708 
1709  return Changed;
1710 }
1711 
1712 bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
1713  if (skipLoop(L))
1714  return false;
1715 
1716  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1717  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1718  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1719  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
1720  *L->getHeader()->getParent());
1721  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1722  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1723 
1724  return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
1725 }
1726 
1729  LPMUpdater &U) {
1730  return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
1733 }
llvm::MapVector::iterator
typename VectorType::iterator iterator
Definition: MapVector.h:49
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::ScalarEvolution::hasLoopInvariantBackedgeTakenCount
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
Definition: ScalarEvolution.cpp:12384
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:718
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm
Definition: AllocatorList.h:23
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:743
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::BasicBlock::getFirstNonPHIOrDbg
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=false) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:219
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4541
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
ScalarEvolutionExpander.h
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:362
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:131
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
Statistic.h
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1643
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3428
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
llvm::AliasSetTracker
Definition: AliasSetTracker.h:329
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::LoopRerollPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopRerollPass.cpp:1727
APInt.h
ScalarEvolution.h
DenseMap.h
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
Module.h
isSimpleArithmeticOp
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
Definition: LoopRerollPass.cpp:752
reroll
loop reroll
Definition: LoopRerollPass.cpp:503
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:834
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet< Instruction *, 16 >
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:398
STLExtras.h
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:90
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
replace
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Definition: ConstantMerge.cpp:116
llvm::BasicBlock::rend
reverse_iterator rend()
Definition: BasicBlock.h:303
Use.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1284
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2911
Instruction.h
CommandLine.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false, false) INITIALIZE_PASS_END(LoopRerollLegacyPass
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:686
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:607
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:647
llvm::User
Definition: User.h:44
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::replace
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
Definition: STLExtras.h:1679
TargetLibraryInfo.h
DenseSet.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
false
Definition: StackSlotColoring.cpp:142
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:395
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::initializeLoopRerollLegacyPassPass
void initializeLoopRerollLegacyPassPass(PassRegistry &)
llvm::Instruction
Definition: Instruction.h:45
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3121
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
NumToleratedFailedMatches
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" " during fuzzy matching. (default: 400)"))
BitVector.h
LoopUtils.h
llvm::BasicBlock::rbegin
reverse_iterator rbegin()
Definition: BasicBlock.h:301
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:144
isLoopIncrement
static bool isLoopIncrement(User *U, Instruction *IV)
Definition: LoopRerollPass.cpp:772
Utils.h
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:733
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3975
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1422
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::createLoopRerollPass
Pass * createLoopRerollPass()
Definition: LoopRerollPass.cpp:506
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1206
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
llvm::LoopPass
Definition: LoopPass.h:27
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:240
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition: ScalarEvolution.h:116
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1547
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::User::replaceUsesOfWith
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1463
isAssociative
static bool isAssociative(const COFFSection &Section)
Definition: WinCOFFObjectWriter.cpp:930
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::BinaryOperator
Definition: InstrTypes.h:190
DataLayout.h
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:240
llvm::AArch64ISD::ADR
@ ADR
Definition: AArch64ISelLowering.h:62
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:547
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:294
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
users
iv users
Definition: IVUsers.cpp:52
isUnorderedLoadStore
static bool isUnorderedLoadStore(Instruction *I)
Definition: LoopRerollPass.cpp:739
loops
loop Reroll loops
Definition: LoopRerollPass.cpp:503
j
return j(j<< 16)
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:485
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7316
isIgnorableInst
static bool isIgnorableInst(const Instruction *I)
Definition: LoopRerollPass.cpp:1138
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Definition: ScalarEvolution.cpp:4076
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:157
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
ScalarEvolutionExpressions.h
LoopReroll.h
llvm::BasicBlock::reverse_iterator
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:92
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
Dominators.h
N
#define N
llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition: ScalarEvolution.h:604
Users
iv Induction Variable Users
Definition: IVUsers.cpp:52
llvm::PHINode
Definition: Instructions.h:2600
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
DerivedTypes.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2395
AliasSetTracker.h
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3035
raw_ostream.h
llvm::Instruction::isSameOperationAs
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one.
Definition: Instruction.cpp:505
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7148
BasicBlockUtils.h
hasUsesOutsideLoop
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
Definition: LoopRerollPass.cpp:511
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1284
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:58
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3128
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38