LLVM  14.0.0git
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop reroller.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/LoopPass.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Module.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Use.h"
44 #include "llvm/IR/User.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/InitializePasses.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Debug.h"
52 #include "llvm/Transforms/Scalar.h"
54 #include "llvm/Transforms/Utils.h"
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <cstdlib>
63 #include <iterator>
64 #include <map>
65 #include <utility>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "loop-reroll"
70 
71 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
72 
73 static cl::opt<unsigned>
74 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
75  cl::Hidden,
76  cl::desc("The maximum number of failures to tolerate"
77  " during fuzzy matching. (default: 400)"));
78 
79 // This loop re-rolling transformation aims to transform loops like this:
80 //
81 // int foo(int a);
82 // void bar(int *x) {
83 // for (int i = 0; i < 500; i += 3) {
84 // foo(i);
85 // foo(i+1);
86 // foo(i+2);
87 // }
88 // }
89 //
90 // into a loop like this:
91 //
92 // void bar(int *x) {
93 // for (int i = 0; i < 500; ++i)
94 // foo(i);
95 // }
96 //
97 // It does this by looking for loops that, besides the latch code, are composed
98 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
99 // to the induction variable, and where each DAG is isomorphic to the DAG
100 // rooted at the induction variable (excepting the sub-DAGs which root the
101 // other induction-variable increments). In other words, we're looking for loop
102 // bodies of the form:
103 //
104 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
105 // f(%iv)
106 // %iv.1 = add %iv, 1 <-- a root increment
107 // f(%iv.1)
108 // %iv.2 = add %iv, 2 <-- a root increment
109 // f(%iv.2)
110 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
111 // f(%iv.scale_m_1)
112 // ...
113 // %iv.next = add %iv, scale
114 // %cmp = icmp(%iv, ...)
115 // br %cmp, header, exit
116 //
117 // where each f(i) is a set of instructions that, collectively, are a function
118 // only of i (and other loop-invariant values).
119 //
120 // As a special case, we can also reroll loops like this:
121 //
122 // int foo(int);
123 // void bar(int *x) {
124 // for (int i = 0; i < 500; ++i) {
125 // x[3*i] = foo(0);
126 // x[3*i+1] = foo(0);
127 // x[3*i+2] = foo(0);
128 // }
129 // }
130 //
131 // into this:
132 //
133 // void bar(int *x) {
134 // for (int i = 0; i < 1500; ++i)
135 // x[i] = foo(0);
136 // }
137 //
138 // in which case, we're looking for inputs like this:
139 //
140 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
141 // %scaled.iv = mul %iv, scale
142 // f(%scaled.iv)
143 // %scaled.iv.1 = add %scaled.iv, 1
144 // f(%scaled.iv.1)
145 // %scaled.iv.2 = add %scaled.iv, 2
146 // f(%scaled.iv.2)
147 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
148 // f(%scaled.iv.scale_m_1)
149 // ...
150 // %iv.next = add %iv, 1
151 // %cmp = icmp(%iv, ...)
152 // br %cmp, header, exit
153 
154 namespace {
155 
156  enum IterationLimits {
157  /// The maximum number of iterations that we'll try and reroll.
158  IL_MaxRerollIterations = 32,
159  /// The bitvector index used by loop induction variables and other
160  /// instructions that belong to all iterations.
161  IL_All,
162  IL_End
163  };
164 
165  class LoopRerollLegacyPass : public LoopPass {
166  public:
167  static char ID; // Pass ID, replacement for typeid
168 
169  LoopRerollLegacyPass() : LoopPass(ID) {
171  }
172 
173  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
174 
175  void getAnalysisUsage(AnalysisUsage &AU) const override {
178  }
179  };
180 
181  class LoopReroll {
182  public:
183  LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
184  TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
185  : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
186  PreserveLCSSA(PreserveLCSSA) {}
187  bool runOnLoop(Loop *L);
188 
189  protected:
190  AliasAnalysis *AA;
191  LoopInfo *LI;
192  ScalarEvolution *SE;
193  TargetLibraryInfo *TLI;
194  DominatorTree *DT;
195  bool PreserveLCSSA;
196 
197  using SmallInstructionVector = SmallVector<Instruction *, 16>;
198  using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
199 
200  // Map between induction variable and its increment
202 
203  // For loop with multiple induction variable, remember the one used only to
204  // control the loop.
205  Instruction *LoopControlIV;
206 
207  // A chain of isomorphic instructions, identified by a single-use PHI
208  // representing a reduction. Only the last value may be used outside the
209  // loop.
210  struct SimpleLoopReduction {
211  SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
212  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
213  add(L);
214  }
215 
216  bool valid() const {
217  return Valid;
218  }
219 
220  Instruction *getPHI() const {
221  assert(Valid && "Using invalid reduction");
222  return Instructions.front();
223  }
224 
225  Instruction *getReducedValue() const {
226  assert(Valid && "Using invalid reduction");
227  return Instructions.back();
228  }
229 
230  Instruction *get(size_t i) const {
231  assert(Valid && "Using invalid reduction");
232  return Instructions[i+1];
233  }
234 
235  Instruction *operator [] (size_t i) const { return get(i); }
236 
237  // The size, ignoring the initial PHI.
238  size_t size() const {
239  assert(Valid && "Using invalid reduction");
240  return Instructions.size()-1;
241  }
242 
243  using iterator = SmallInstructionVector::iterator;
244  using const_iterator = SmallInstructionVector::const_iterator;
245 
246  iterator begin() {
247  assert(Valid && "Using invalid reduction");
248  return std::next(Instructions.begin());
249  }
250 
251  const_iterator begin() const {
252  assert(Valid && "Using invalid reduction");
253  return std::next(Instructions.begin());
254  }
255 
256  iterator end() { return Instructions.end(); }
257  const_iterator end() const { return Instructions.end(); }
258 
259  protected:
260  bool Valid = false;
261  SmallInstructionVector Instructions;
262 
263  void add(Loop *L);
264  };
265 
266  // The set of all reductions, and state tracking of possible reductions
267  // during loop instruction processing.
268  struct ReductionTracker {
269  using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
270 
271  // Add a new possible reduction.
272  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
273 
274  // Setup to track possible reductions corresponding to the provided
275  // rerolling scale. Only reductions with a number of non-PHI instructions
276  // that is divisible by the scale are considered. Three instructions sets
277  // are filled in:
278  // - A set of all possible instructions in eligible reductions.
279  // - A set of all PHIs in eligible reductions
280  // - A set of all reduced values (last instructions) in eligible
281  // reductions.
282  void restrictToScale(uint64_t Scale,
283  SmallInstructionSet &PossibleRedSet,
284  SmallInstructionSet &PossibleRedPHISet,
285  SmallInstructionSet &PossibleRedLastSet) {
286  PossibleRedIdx.clear();
287  PossibleRedIter.clear();
288  Reds.clear();
289 
290  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
291  if (PossibleReds[i].size() % Scale == 0) {
292  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
293  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
294 
295  PossibleRedSet.insert(PossibleReds[i].getPHI());
296  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
297  for (Instruction *J : PossibleReds[i]) {
298  PossibleRedSet.insert(J);
299  PossibleRedIdx[J] = i;
300  }
301  }
302  }
303 
304  // The functions below are used while processing the loop instructions.
305 
306  // Are the two instructions both from reductions, and furthermore, from
307  // the same reduction?
308  bool isPairInSame(Instruction *J1, Instruction *J2) {
309  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
310  if (J1I != PossibleRedIdx.end()) {
311  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
312  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
313  return true;
314  }
315 
316  return false;
317  }
318 
319  // The two provided instructions, the first from the base iteration, and
320  // the second from iteration i, form a matched pair. If these are part of
321  // a reduction, record that fact.
322  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
323  if (PossibleRedIdx.count(J1)) {
324  assert(PossibleRedIdx.count(J2) &&
325  "Recording reduction vs. non-reduction instruction?");
326 
327  PossibleRedIter[J1] = 0;
328  PossibleRedIter[J2] = i;
329 
330  int Idx = PossibleRedIdx[J1];
331  assert(Idx == PossibleRedIdx[J2] &&
332  "Recording pair from different reductions?");
333  Reds.insert(Idx);
334  }
335  }
336 
337  // The functions below can be called after we've finished processing all
338  // instructions in the loop, and we know which reductions were selected.
339 
340  bool validateSelected();
341  void replaceSelected();
342 
343  protected:
344  // The vector of all possible reductions (for any scale).
345  SmallReductionVector PossibleReds;
346 
347  DenseMap<Instruction *, int> PossibleRedIdx;
348  DenseMap<Instruction *, int> PossibleRedIter;
349  DenseSet<int> Reds;
350  };
351 
352  // A DAGRootSet models an induction variable being used in a rerollable
353  // loop. For example,
354  //
355  // x[i*3+0] = y1
356  // x[i*3+1] = y2
357  // x[i*3+2] = y3
358  //
359  // Base instruction -> i*3
360  // +---+----+
361  // / | \
362  // ST[y1] +1 +2 <-- Roots
363  // | |
364  // ST[y2] ST[y3]
365  //
366  // There may be multiple DAGRoots, for example:
367  //
368  // x[i*2+0] = ... (1)
369  // x[i*2+1] = ... (1)
370  // x[i*2+4] = ... (2)
371  // x[i*2+5] = ... (2)
372  // x[(i+1234)*2+5678] = ... (3)
373  // x[(i+1234)*2+5679] = ... (3)
374  //
375  // The loop will be rerolled by adding a new loop induction variable,
376  // one for the Base instruction in each DAGRootSet.
377  //
378  struct DAGRootSet {
379  Instruction *BaseInst;
380  SmallInstructionVector Roots;
381 
382  // The instructions between IV and BaseInst (but not including BaseInst).
383  SmallInstructionSet SubsumedInsts;
384  };
385 
386  // The set of all DAG roots, and state tracking of all roots
387  // for a particular induction variable.
388  struct DAGRootTracker {
389  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
392  bool PreserveLCSSA,
394  Instruction *LoopCtrlIV)
395  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
396  PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
397  LoopControlIV(LoopCtrlIV) {}
398 
399  /// Stage 1: Find all the DAG roots for the induction variable.
400  bool findRoots();
401 
402  /// Stage 2: Validate if the found roots are valid.
403  bool validate(ReductionTracker &Reductions);
404 
405  /// Stage 3: Assuming validate() returned true, perform the
406  /// replacement.
407  /// @param BackedgeTakenCount The backedge-taken count of L.
408  void replace(const SCEV *BackedgeTakenCount);
409 
410  protected:
412 
413  void findRootsRecursive(Instruction *IVU,
414  SmallInstructionSet SubsumedInsts);
415  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
416  bool collectPossibleRoots(Instruction *Base,
417  std::map<int64_t,Instruction*> &Roots);
418  bool validateRootSet(DAGRootSet &DRS);
419 
420  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
421  void collectInLoopUserSet(const SmallInstructionVector &Roots,
422  const SmallInstructionSet &Exclude,
423  const SmallInstructionSet &Final,
425  void collectInLoopUserSet(Instruction *Root,
426  const SmallInstructionSet &Exclude,
427  const SmallInstructionSet &Final,
429 
430  UsesTy::iterator nextInstr(int Val, UsesTy &In,
431  const SmallInstructionSet &Exclude,
432  UsesTy::iterator *StartI=nullptr);
433  bool isBaseInst(Instruction *I);
434  bool isRootInst(Instruction *I);
435  bool instrDependsOn(Instruction *I,
436  UsesTy::iterator Start,
437  UsesTy::iterator End);
438  void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr);
439 
440  LoopReroll *Parent;
441 
442  // Members of Parent, replicated here for brevity.
443  Loop *L;
444  ScalarEvolution *SE;
445  AliasAnalysis *AA;
446  TargetLibraryInfo *TLI;
447  DominatorTree *DT;
448  LoopInfo *LI;
449  bool PreserveLCSSA;
450 
451  // The loop induction variable.
452  Instruction *IV;
453 
454  // Loop step amount.
455  int64_t Inc;
456 
457  // Loop reroll count; if Inc == 1, this records the scaling applied
458  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
459  // If Inc is not 1, Scale = Inc.
460  uint64_t Scale;
461 
462  // The roots themselves.
464 
465  // All increment instructions for IV.
466  SmallInstructionVector LoopIncs;
467 
468  // Map of all instructions in the loop (in order) to the iterations
469  // they are used in (or specially, IL_All for instructions
470  // used in the loop increment mechanism).
471  UsesTy Uses;
472 
473  // Map between induction variable and its increment
475 
476  Instruction *LoopControlIV;
477  };
478 
479  // Check if it is a compare-like instruction whose user is a branch
480  bool isCompareUsedByBranch(Instruction *I) {
481  auto *TI = I->getParent()->getTerminator();
482  if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
483  return false;
484  return I->hasOneUse() && TI->getOperand(0) == I;
485  };
486 
487  bool isLoopControlIV(Loop *L, Instruction *IV);
488  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
489  void collectPossibleReductions(Loop *L,
490  ReductionTracker &Reductions);
491  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header,
492  const SCEV *BackedgeTakenCount, ReductionTracker &Reductions);
493  };
494 
495 } // end anonymous namespace
496 
497 char LoopRerollLegacyPass::ID = 0;
498 
499 INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
500  false, false)
503 INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
504  false)
505 
506 Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
507 
508 // Returns true if the provided instruction is used outside the given loop.
509 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
510 // non-loop blocks to be outside the loop.
512  for (User *U : I->users()) {
513  if (!L->contains(cast<Instruction>(U)))
514  return true;
515  }
516  return false;
517 }
518 
519 // Check if an IV is only used to control the loop. There are two cases:
520 // 1. It only has one use which is loop increment, and the increment is only
521 // used by comparison and the PHI (could has sext with nsw in between), and the
522 // comparison is only used by branch.
523 // 2. It is used by loop increment and the comparison, the loop increment is
524 // only used by the PHI, and the comparison is used only by the branch.
525 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
526  unsigned IVUses = IV->getNumUses();
527  if (IVUses != 2 && IVUses != 1)
528  return false;
529 
530  for (auto *User : IV->users()) {
531  int32_t IncOrCmpUses = User->getNumUses();
532  bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
533 
534  // User can only have one or two uses.
535  if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
536  return false;
537 
538  // Case 1
539  if (IVUses == 1) {
540  // The only user must be the loop increment.
541  // The loop increment must have two uses.
542  if (IsCompInst || IncOrCmpUses != 2)
543  return false;
544  }
545 
546  // Case 2
547  if (IVUses == 2 && IncOrCmpUses != 1)
548  return false;
549 
550  // The users of the IV must be a binary operation or a comparison
551  if (auto *BO = dyn_cast<BinaryOperator>(User)) {
552  if (BO->getOpcode() == Instruction::Add) {
553  // Loop Increment
554  // User of Loop Increment should be either PHI or CMP
555  for (auto *UU : User->users()) {
556  if (PHINode *PN = dyn_cast<PHINode>(UU)) {
557  if (PN != IV)
558  return false;
559  }
560  // Must be a CMP or an ext (of a value with nsw) then CMP
561  else {
562  Instruction *UUser = dyn_cast<Instruction>(UU);
563  // Skip SExt if we are extending an nsw value
564  // TODO: Allow ZExt too
565  if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
566  isa<SExtInst>(UUser))
567  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
568  if (!isCompareUsedByBranch(UUser))
569  return false;
570  }
571  }
572  } else
573  return false;
574  // Compare : can only have one use, and must be branch
575  } else if (!IsCompInst)
576  return false;
577  }
578  return true;
579 }
580 
581 // Collect the list of loop induction variables with respect to which it might
582 // be possible to reroll the loop.
583 void LoopReroll::collectPossibleIVs(Loop *L,
584  SmallInstructionVector &PossibleIVs) {
585  BasicBlock *Header = L->getHeader();
586  for (BasicBlock::iterator I = Header->begin(),
587  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
588  if (!isa<PHINode>(I))
589  continue;
590  if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
591  continue;
592 
593  if (const SCEVAddRecExpr *PHISCEV =
594  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
595  if (PHISCEV->getLoop() != L)
596  continue;
597  if (!PHISCEV->isAffine())
598  continue;
599  auto IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
600  if (IncSCEV) {
601  IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
602  LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
603  << "\n");
604 
605  if (isLoopControlIV(L, &*I)) {
606  assert(!LoopControlIV && "Found two loop control only IV");
607  LoopControlIV = &(*I);
608  LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I
609  << " = " << *PHISCEV << "\n");
610  } else
611  PossibleIVs.push_back(&*I);
612  }
613  }
614  }
615 }
616 
617 // Add the remainder of the reduction-variable chain to the instruction vector
618 // (the initial PHINode has already been added). If successful, the object is
619 // marked as valid.
621  assert(!Valid && "Cannot add to an already-valid chain");
622 
623  // The reduction variable must be a chain of single-use instructions
624  // (including the PHI), except for the last value (which is used by the PHI
625  // and also outside the loop).
626  Instruction *C = Instructions.front();
627  if (C->user_empty())
628  return;
629 
630  do {
631  C = cast<Instruction>(*C->user_begin());
632  if (C->hasOneUse()) {
633  if (!C->isBinaryOp())
634  return;
635 
636  if (!(isa<PHINode>(Instructions.back()) ||
637  C->isSameOperationAs(Instructions.back())))
638  return;
639 
640  Instructions.push_back(C);
641  }
642  } while (C->hasOneUse());
643 
644  if (Instructions.size() < 2 ||
645  !C->isSameOperationAs(Instructions.back()) ||
646  C->use_empty())
647  return;
648 
649  // C is now the (potential) last instruction in the reduction chain.
650  for (User *U : C->users()) {
651  // The only in-loop user can be the initial PHI.
652  if (L->contains(cast<Instruction>(U)))
653  if (cast<Instruction>(U) != Instructions.front())
654  return;
655  }
656 
657  Instructions.push_back(C);
658  Valid = true;
659 }
660 
661 // Collect the vector of possible reduction variables.
662 void LoopReroll::collectPossibleReductions(Loop *L,
663  ReductionTracker &Reductions) {
664  BasicBlock *Header = L->getHeader();
665  for (BasicBlock::iterator I = Header->begin(),
666  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
667  if (!isa<PHINode>(I))
668  continue;
669  if (!I->getType()->isSingleValueType())
670  continue;
671 
672  SimpleLoopReduction SLR(&*I, L);
673  if (!SLR.valid())
674  continue;
675 
676  LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with "
677  << SLR.size() << " chained instructions)\n");
678  Reductions.addSLR(SLR);
679  }
680 }
681 
682 // Collect the set of all users of the provided root instruction. This set of
683 // users contains not only the direct users of the root instruction, but also
684 // all users of those users, and so on. There are two exceptions:
685 //
686 // 1. Instructions in the set of excluded instructions are never added to the
687 // use set (even if they are users). This is used, for example, to exclude
688 // including root increments in the use set of the primary IV.
689 //
690 // 2. Instructions in the set of final instructions are added to the use set
691 // if they are users, but their users are not added. This is used, for
692 // example, to prevent a reduction update from forcing all later reduction
693 // updates into the use set.
694 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
695  Instruction *Root, const SmallInstructionSet &Exclude,
696  const SmallInstructionSet &Final,
698  SmallInstructionVector Queue(1, Root);
699  while (!Queue.empty()) {
700  Instruction *I = Queue.pop_back_val();
701  if (!Users.insert(I).second)
702  continue;
703 
704  if (!Final.count(I))
705  for (Use &U : I->uses()) {
706  Instruction *User = cast<Instruction>(U.getUser());
707  if (PHINode *PN = dyn_cast<PHINode>(User)) {
708  // Ignore "wrap-around" uses to PHIs of this loop's header.
709  if (PN->getIncomingBlock(U) == L->getHeader())
710  continue;
711  }
712 
713  if (L->contains(User) && !Exclude.count(User)) {
714  Queue.push_back(User);
715  }
716  }
717 
718  // We also want to collect single-user "feeder" values.
719  for (Use &U : I->operands()) {
720  if (Instruction *Op = dyn_cast<Instruction>(U))
721  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
722  !Final.count(Op))
723  Queue.push_back(Op);
724  }
725  }
726 }
727 
728 // Collect all of the users of all of the provided root instructions (combined
729 // into a single set).
730 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
731  const SmallInstructionVector &Roots,
732  const SmallInstructionSet &Exclude,
733  const SmallInstructionSet &Final,
735  for (Instruction *Root : Roots)
736  collectInLoopUserSet(Root, Exclude, Final, Users);
737 }
738 
740  if (LoadInst *LI = dyn_cast<LoadInst>(I))
741  return LI->isUnordered();
742  if (StoreInst *SI = dyn_cast<StoreInst>(I))
743  return SI->isUnordered();
744  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
745  return !MI->isVolatile();
746  return false;
747 }
748 
749 /// Return true if IVU is a "simple" arithmetic operation.
750 /// This is used for narrowing the search space for DAGRoots; only arithmetic
751 /// and GEPs can be part of a DAGRoot.
752 static bool isSimpleArithmeticOp(User *IVU) {
753  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
754  switch (I->getOpcode()) {
755  default: return false;
756  case Instruction::Add:
757  case Instruction::Sub:
758  case Instruction::Mul:
759  case Instruction::Shl:
760  case Instruction::AShr:
761  case Instruction::LShr:
762  case Instruction::GetElementPtr:
763  case Instruction::Trunc:
764  case Instruction::ZExt:
765  case Instruction::SExt:
766  return true;
767  }
768  }
769  return false;
770 }
771 
772 static bool isLoopIncrement(User *U, Instruction *IV) {
773  BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
774 
775  if ((BO && BO->getOpcode() != Instruction::Add) ||
776  (!BO && !isa<GetElementPtrInst>(U)))
777  return false;
778 
779  for (auto *UU : U->users()) {
780  PHINode *PN = dyn_cast<PHINode>(UU);
781  if (PN && PN == IV)
782  return true;
783  }
784  return false;
785 }
786 
787 bool LoopReroll::DAGRootTracker::
788 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
789  SmallInstructionVector BaseUsers;
790 
791  for (auto *I : Base->users()) {
792  ConstantInt *CI = nullptr;
793 
794  if (isLoopIncrement(I, IV)) {
795  LoopIncs.push_back(cast<Instruction>(I));
796  continue;
797  }
798 
799  // The root nodes must be either GEPs, ORs or ADDs.
800  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
801  if (BO->getOpcode() == Instruction::Add ||
802  BO->getOpcode() == Instruction::Or)
803  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
804  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
805  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
806  CI = dyn_cast<ConstantInt>(LastOperand);
807  }
808 
809  if (!CI) {
810  if (Instruction *II = dyn_cast<Instruction>(I)) {
811  BaseUsers.push_back(II);
812  continue;
813  } else {
814  LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I
815  << "\n");
816  return false;
817  }
818  }
819 
820  int64_t V = std::abs(CI->getValue().getSExtValue());
821  if (Roots.find(V) != Roots.end())
822  // No duplicates, please.
823  return false;
824 
825  Roots[V] = cast<Instruction>(I);
826  }
827 
828  // Make sure we have at least two roots.
829  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
830  return false;
831 
832  // If we found non-loop-inc, non-root users of Base, assume they are
833  // for the zeroth root index. This is because "add %a, 0" gets optimized
834  // away.
835  if (BaseUsers.size()) {
836  if (Roots.find(0) != Roots.end()) {
837  LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
838  return false;
839  }
840  Roots[0] = Base;
841  }
842 
843  // Calculate the number of users of the base, or lowest indexed, iteration.
844  unsigned NumBaseUses = BaseUsers.size();
845  if (NumBaseUses == 0)
846  NumBaseUses = Roots.begin()->second->getNumUses();
847 
848  // Check that every node has the same number of users.
849  for (auto &KV : Roots) {
850  if (KV.first == 0)
851  continue;
852  if (!KV.second->hasNUses(NumBaseUses)) {
853  LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
854  << "#Base=" << NumBaseUses
855  << ", #Root=" << KV.second->getNumUses() << "\n");
856  return false;
857  }
858  }
859 
860  return true;
861 }
862 
863 void LoopReroll::DAGRootTracker::
864 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
865  // Does the user look like it could be part of a root set?
866  // All its users must be simple arithmetic ops.
867  if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
868  return;
869 
870  if (I != IV && findRootsBase(I, SubsumedInsts))
871  return;
872 
873  SubsumedInsts.insert(I);
874 
875  for (User *V : I->users()) {
876  Instruction *I = cast<Instruction>(V);
877  if (is_contained(LoopIncs, I))
878  continue;
879 
880  if (!isSimpleArithmeticOp(I))
881  continue;
882 
883  // The recursive call makes a copy of SubsumedInsts.
884  findRootsRecursive(I, SubsumedInsts);
885  }
886 }
887 
888 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
889  if (DRS.Roots.empty())
890  return false;
891 
892  // If the value of the base instruction is used outside the loop, we cannot
893  // reroll the loop. Check for other root instructions is unnecessary because
894  // they don't match any base instructions if their values are used outside.
895  if (hasUsesOutsideLoop(DRS.BaseInst, L))
896  return false;
897 
898  // Consider a DAGRootSet with N-1 roots (so N different values including
899  // BaseInst).
900  // Define d = Roots[0] - BaseInst, which should be the same as
901  // Roots[I] - Roots[I-1] for all I in [1..N).
902  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
903  // loop iteration J.
904  //
905  // Now, For the loop iterations to be consecutive:
906  // D = d * N
907  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
908  if (!ADR)
909  return false;
910 
911  // Check that the first root is evenly spaced.
912  unsigned N = DRS.Roots.size() + 1;
913  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
914  if (isa<SCEVCouldNotCompute>(StepSCEV) || StepSCEV->getType()->isPointerTy())
915  return false;
916  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
917  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
918  return false;
919 
920  // Check that the remainling roots are evenly spaced.
921  for (unsigned i = 1; i < N - 1; ++i) {
922  const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
923  SE->getSCEV(DRS.Roots[i-1]));
924  if (NewStepSCEV != StepSCEV)
925  return false;
926  }
927 
928  return true;
929 }
930 
931 bool LoopReroll::DAGRootTracker::
932 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
933  // The base of a RootSet must be an AddRec, so it can be erased.
934  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
935  if (!IVU_ADR || IVU_ADR->getLoop() != L)
936  return false;
937 
938  std::map<int64_t, Instruction*> V;
939  if (!collectPossibleRoots(IVU, V))
940  return false;
941 
942  // If we didn't get a root for index zero, then IVU must be
943  // subsumed.
944  if (V.find(0) == V.end())
945  SubsumedInsts.insert(IVU);
946 
947  // Partition the vector into monotonically increasing indexes.
948  DAGRootSet DRS;
949  DRS.BaseInst = nullptr;
950 
951  SmallVector<DAGRootSet, 16> PotentialRootSets;
952 
953  for (auto &KV : V) {
954  if (!DRS.BaseInst) {
955  DRS.BaseInst = KV.second;
956  DRS.SubsumedInsts = SubsumedInsts;
957  } else if (DRS.Roots.empty()) {
958  DRS.Roots.push_back(KV.second);
959  } else if (V.find(KV.first - 1) != V.end()) {
960  DRS.Roots.push_back(KV.second);
961  } else {
962  // Linear sequence terminated.
963  if (!validateRootSet(DRS))
964  return false;
965 
966  // Construct a new DAGRootSet with the next sequence.
967  PotentialRootSets.push_back(DRS);
968  DRS.BaseInst = KV.second;
969  DRS.Roots.clear();
970  }
971  }
972 
973  if (!validateRootSet(DRS))
974  return false;
975 
976  PotentialRootSets.push_back(DRS);
977 
978  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
979 
980  return true;
981 }
982 
983 bool LoopReroll::DAGRootTracker::findRoots() {
984  Inc = IVToIncMap[IV];
985 
986  assert(RootSets.empty() && "Unclean state!");
987  if (std::abs(Inc) == 1) {
988  for (auto *IVU : IV->users()) {
989  if (isLoopIncrement(IVU, IV))
990  LoopIncs.push_back(cast<Instruction>(IVU));
991  }
992  findRootsRecursive(IV, SmallInstructionSet());
993  LoopIncs.push_back(IV);
994  } else {
995  if (!findRootsBase(IV, SmallInstructionSet()))
996  return false;
997  }
998 
999  // Ensure all sets have the same size.
1000  if (RootSets.empty()) {
1001  LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
1002  return false;
1003  }
1004  for (auto &V : RootSets) {
1005  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
1006  LLVM_DEBUG(
1007  dbgs()
1008  << "LRR: Aborting because not all root sets have the same size\n");
1009  return false;
1010  }
1011  }
1012 
1013  Scale = RootSets[0].Roots.size() + 1;
1014 
1015  if (Scale > IL_MaxRerollIterations) {
1016  LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1017  << "#Found=" << Scale
1018  << ", #Max=" << IL_MaxRerollIterations << "\n");
1019  return false;
1020  }
1021 
1022  LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale
1023  << "\n");
1024 
1025  return true;
1026 }
1027 
1028 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1029  // Populate the MapVector with all instructions in the block, in order first,
1030  // so we can iterate over the contents later in perfect order.
1031  for (auto &I : *L->getHeader()) {
1032  Uses[&I].resize(IL_End);
1033  }
1034 
1035  SmallInstructionSet Exclude;
1036  for (auto &DRS : RootSets) {
1037  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1038  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1039  Exclude.insert(DRS.BaseInst);
1040  }
1041  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1042 
1043  for (auto &DRS : RootSets) {
1044  DenseSet<Instruction*> VBase;
1045  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1046  for (auto *I : VBase) {
1047  Uses[I].set(0);
1048  }
1049 
1050  unsigned Idx = 1;
1051  for (auto *Root : DRS.Roots) {
1053  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1054 
1055  // While we're here, check the use sets are the same size.
1056  if (V.size() != VBase.size()) {
1057  LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1058  return false;
1059  }
1060 
1061  for (auto *I : V) {
1062  Uses[I].set(Idx);
1063  }
1064  ++Idx;
1065  }
1066 
1067  // Make sure our subsumed instructions are remembered too.
1068  for (auto *I : DRS.SubsumedInsts) {
1069  Uses[I].set(IL_All);
1070  }
1071  }
1072 
1073  // Make sure the loop increments are also accounted for.
1074 
1075  Exclude.clear();
1076  for (auto &DRS : RootSets) {
1077  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1078  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1079  Exclude.insert(DRS.BaseInst);
1080  }
1081 
1083  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1084  for (auto *I : V) {
1085  if (I->mayHaveSideEffects()) {
1086  LLVM_DEBUG(dbgs() << "LRR: Aborting - "
1087  << "An instruction which does not belong to any root "
1088  << "sets must not have side effects: " << *I);
1089  return false;
1090  }
1091  Uses[I].set(IL_All);
1092  }
1093 
1094  return true;
1095 }
1096 
1097 /// Get the next instruction in "In" that is a member of set Val.
1098 /// Start searching from StartI, and do not return anything in Exclude.
1099 /// If StartI is not given, start from In.begin().
1101 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1102  const SmallInstructionSet &Exclude,
1103  UsesTy::iterator *StartI) {
1104  UsesTy::iterator I = StartI ? *StartI : In.begin();
1105  while (I != In.end() && (I->second.test(Val) == 0 ||
1106  Exclude.contains(I->first)))
1107  ++I;
1108  return I;
1109 }
1110 
1111 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1112  for (auto &DRS : RootSets) {
1113  if (DRS.BaseInst == I)
1114  return true;
1115  }
1116  return false;
1117 }
1118 
1119 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1120  for (auto &DRS : RootSets) {
1121  if (is_contained(DRS.Roots, I))
1122  return true;
1123  }
1124  return false;
1125 }
1126 
1127 /// Return true if instruction I depends on any instruction between
1128 /// Start and End.
1129 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1130  UsesTy::iterator Start,
1131  UsesTy::iterator End) {
1132  for (auto *U : I->users()) {
1133  for (auto It = Start; It != End; ++It)
1134  if (U == It->first)
1135  return true;
1136  }
1137  return false;
1138 }
1139 
1140 static bool isIgnorableInst(const Instruction *I) {
1141  if (isa<DbgInfoIntrinsic>(I))
1142  return true;
1143  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1144  if (!II)
1145  return false;
1146  switch (II->getIntrinsicID()) {
1147  default:
1148  return false;
1149  case Intrinsic::annotation:
1150  case Intrinsic::ptr_annotation:
1151  case Intrinsic::var_annotation:
1152  // TODO: the following intrinsics may also be allowed:
1153  // lifetime_start, lifetime_end, invariant_start, invariant_end
1154  return true;
1155  }
1156  return false;
1157 }
1158 
1159 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1160  // We now need to check for equivalence of the use graph of each root with
1161  // that of the primary induction variable (excluding the roots). Our goal
1162  // here is not to solve the full graph isomorphism problem, but rather to
1163  // catch common cases without a lot of work. As a result, we will assume
1164  // that the relative order of the instructions in each unrolled iteration
1165  // is the same (although we will not make an assumption about how the
1166  // different iterations are intermixed). Note that while the order must be
1167  // the same, the instructions may not be in the same basic block.
1168 
1169  // An array of just the possible reductions for this scale factor. When we
1170  // collect the set of all users of some root instructions, these reduction
1171  // instructions are treated as 'final' (their uses are not considered).
1172  // This is important because we don't want the root use set to search down
1173  // the reduction chain.
1174  SmallInstructionSet PossibleRedSet;
1175  SmallInstructionSet PossibleRedLastSet;
1176  SmallInstructionSet PossibleRedPHISet;
1177  Reductions.restrictToScale(Scale, PossibleRedSet,
1178  PossibleRedPHISet, PossibleRedLastSet);
1179 
1180  // Populate "Uses" with where each instruction is used.
1181  if (!collectUsedInstructions(PossibleRedSet))
1182  return false;
1183 
1184  // Make sure we mark the reduction PHIs as used in all iterations.
1185  for (auto *I : PossibleRedPHISet) {
1186  Uses[I].set(IL_All);
1187  }
1188 
1189  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1190  // comment above LoopReroll::isLoopControlIV for more information.
1191  BasicBlock *Header = L->getHeader();
1192  if (LoopControlIV && LoopControlIV != IV) {
1193  for (auto *U : LoopControlIV->users()) {
1194  Instruction *IVUser = dyn_cast<Instruction>(U);
1195  // IVUser could be loop increment or compare
1196  Uses[IVUser].set(IL_All);
1197  for (auto *UU : IVUser->users()) {
1198  Instruction *UUser = dyn_cast<Instruction>(UU);
1199  // UUser could be compare, PHI or branch
1200  Uses[UUser].set(IL_All);
1201  // Skip SExt
1202  if (isa<SExtInst>(UUser)) {
1203  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1204  Uses[UUser].set(IL_All);
1205  }
1206  // Is UUser a compare instruction?
1207  if (UU->hasOneUse()) {
1208  Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1209  if (BI == cast<BranchInst>(Header->getTerminator()))
1210  Uses[BI].set(IL_All);
1211  }
1212  }
1213  }
1214  }
1215 
1216  // Make sure all instructions in the loop are in one and only one
1217  // set.
1218  for (auto &KV : Uses) {
1219  if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1220  LLVM_DEBUG(
1221  dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1222  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1223  return false;
1224  }
1225  }
1226 
1227  LLVM_DEBUG(for (auto &KV
1228  : Uses) {
1229  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1230  });
1231 
1232  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1233  // In addition to regular aliasing information, we need to look for
1234  // instructions from later (future) iterations that have side effects
1235  // preventing us from reordering them past other instructions with side
1236  // effects.
1237  bool FutureSideEffects = false;
1238  AliasSetTracker AST(*AA);
1239  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1241 
1242  // Compare iteration Iter to the base.
1243  SmallInstructionSet Visited;
1244  auto BaseIt = nextInstr(0, Uses, Visited);
1245  auto RootIt = nextInstr(Iter, Uses, Visited);
1246  auto LastRootIt = Uses.begin();
1247 
1248  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1249  Instruction *BaseInst = BaseIt->first;
1250  Instruction *RootInst = RootIt->first;
1251 
1252  // Skip over the IV or root instructions; only match their users.
1253  bool Continue = false;
1254  if (isBaseInst(BaseInst)) {
1255  Visited.insert(BaseInst);
1256  BaseIt = nextInstr(0, Uses, Visited);
1257  Continue = true;
1258  }
1259  if (isRootInst(RootInst)) {
1260  LastRootIt = RootIt;
1261  Visited.insert(RootInst);
1262  RootIt = nextInstr(Iter, Uses, Visited);
1263  Continue = true;
1264  }
1265  if (Continue) continue;
1266 
1267  if (!BaseInst->isSameOperationAs(RootInst)) {
1268  // Last chance saloon. We don't try and solve the full isomorphism
1269  // problem, but try and at least catch the case where two instructions
1270  // *of different types* are round the wrong way. We won't be able to
1271  // efficiently tell, given two ADD instructions, which way around we
1272  // should match them, but given an ADD and a SUB, we can at least infer
1273  // which one is which.
1274  //
1275  // This should allow us to deal with a greater subset of the isomorphism
1276  // problem. It does however change a linear algorithm into a quadratic
1277  // one, so limit the number of probes we do.
1278  auto TryIt = RootIt;
1279  unsigned N = NumToleratedFailedMatches;
1280  while (TryIt != Uses.end() &&
1281  !BaseInst->isSameOperationAs(TryIt->first) &&
1282  N--) {
1283  ++TryIt;
1284  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1285  }
1286 
1287  if (TryIt == Uses.end() || TryIt == RootIt ||
1288  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1289  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1290  << *BaseInst << " vs. " << *RootInst << "\n");
1291  return false;
1292  }
1293 
1294  RootIt = TryIt;
1295  RootInst = TryIt->first;
1296  }
1297 
1298  // All instructions between the last root and this root
1299  // may belong to some other iteration. If they belong to a
1300  // future iteration, then they're dangerous to alias with.
1301  //
1302  // Note that because we allow a limited amount of flexibility in the order
1303  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1304  // case we've already checked this set of instructions so we shouldn't
1305  // do anything.
1306  for (; LastRootIt < RootIt; ++LastRootIt) {
1307  Instruction *I = LastRootIt->first;
1308  if (LastRootIt->second.find_first() < (int)Iter)
1309  continue;
1310  if (I->mayWriteToMemory())
1311  AST.add(I);
1312  // Note: This is specifically guarded by a check on isa<PHINode>,
1313  // which while a valid (somewhat arbitrary) micro-optimization, is
1314  // needed because otherwise isSafeToSpeculativelyExecute returns
1315  // false on PHI nodes.
1316  if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1318  // Intervening instructions cause side effects.
1319  FutureSideEffects = true;
1320  }
1321 
1322  // Make sure that this instruction, which is in the use set of this
1323  // root instruction, does not also belong to the base set or the set of
1324  // some other root instruction.
1325  if (RootIt->second.count() > 1) {
1326  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1327  << " vs. " << *RootInst << " (prev. case overlap)\n");
1328  return false;
1329  }
1330 
1331  // Make sure that we don't alias with any instruction in the alias set
1332  // tracker. If we do, then we depend on a future iteration, and we
1333  // can't reroll.
1334  if (RootInst->mayReadFromMemory())
1335  for (auto &K : AST) {
1336  if (K.aliasesUnknownInst(RootInst, *AA)) {
1337  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1338  << *BaseInst << " vs. " << *RootInst
1339  << " (depends on future store)\n");
1340  return false;
1341  }
1342  }
1343 
1344  // If we've past an instruction from a future iteration that may have
1345  // side effects, and this instruction might also, then we can't reorder
1346  // them, and this matching fails. As an exception, we allow the alias
1347  // set tracker to handle regular (unordered) load/store dependencies.
1348  if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1349  !isSafeToSpeculativelyExecute(BaseInst)) ||
1350  (!isUnorderedLoadStore(RootInst) &&
1351  !isSafeToSpeculativelyExecute(RootInst)))) {
1352  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1353  << " vs. " << *RootInst
1354  << " (side effects prevent reordering)\n");
1355  return false;
1356  }
1357 
1358  // For instructions that are part of a reduction, if the operation is
1359  // associative, then don't bother matching the operands (because we
1360  // already know that the instructions are isomorphic, and the order
1361  // within the iteration does not matter). For non-associative reductions,
1362  // we do need to match the operands, because we need to reject
1363  // out-of-order instructions within an iteration!
1364  // For example (assume floating-point addition), we need to reject this:
1365  // x += a[i]; x += b[i];
1366  // x += a[i+1]; x += b[i+1];
1367  // x += b[i+2]; x += a[i+2];
1368  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1369 
1370  if (!(InReduction && BaseInst->isAssociative())) {
1371  bool Swapped = false, SomeOpMatched = false;
1372  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1373  Value *Op2 = RootInst->getOperand(j);
1374 
1375  // If this is part of a reduction (and the operation is not
1376  // associatve), then we match all operands, but not those that are
1377  // part of the reduction.
1378  if (InReduction)
1379  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1380  if (Reductions.isPairInSame(RootInst, Op2I))
1381  continue;
1382 
1383  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1384  if (BMI != BaseMap.end()) {
1385  Op2 = BMI->second;
1386  } else {
1387  for (auto &DRS : RootSets) {
1388  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1389  Op2 = DRS.BaseInst;
1390  break;
1391  }
1392  }
1393  }
1394 
1395  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1396  // If we've not already decided to swap the matched operands, and
1397  // we've not already matched our first operand (note that we could
1398  // have skipped matching the first operand because it is part of a
1399  // reduction above), and the instruction is commutative, then try
1400  // the swapped match.
1401  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1402  BaseInst->getOperand(!j) == Op2) {
1403  Swapped = true;
1404  } else {
1405  LLVM_DEBUG(dbgs()
1406  << "LRR: iteration root match failed at " << *BaseInst
1407  << " vs. " << *RootInst << " (operand " << j << ")\n");
1408  return false;
1409  }
1410  }
1411 
1412  SomeOpMatched = true;
1413  }
1414  }
1415 
1416  if ((!PossibleRedLastSet.count(BaseInst) &&
1417  hasUsesOutsideLoop(BaseInst, L)) ||
1418  (!PossibleRedLastSet.count(RootInst) &&
1419  hasUsesOutsideLoop(RootInst, L))) {
1420  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1421  << " vs. " << *RootInst << " (uses outside loop)\n");
1422  return false;
1423  }
1424 
1425  Reductions.recordPair(BaseInst, RootInst, Iter);
1426  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1427 
1428  LastRootIt = RootIt;
1429  Visited.insert(BaseInst);
1430  Visited.insert(RootInst);
1431  BaseIt = nextInstr(0, Uses, Visited);
1432  RootIt = nextInstr(Iter, Uses, Visited);
1433  }
1434  assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1435  "Mismatched set sizes!");
1436  }
1437 
1438  LLVM_DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *IV
1439  << "\n");
1440 
1441  return true;
1442 }
1443 
1444 void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) {
1445  BasicBlock *Header = L->getHeader();
1446 
1447  // Compute the start and increment for each BaseInst before we start erasing
1448  // instructions.
1449  SmallVector<const SCEV *, 8> StartExprs;
1450  SmallVector<const SCEV *, 8> IncrExprs;
1451  for (auto &DRS : RootSets) {
1452  const SCEVAddRecExpr *IVSCEV =
1453  cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
1454  StartExprs.push_back(IVSCEV->getStart());
1455  IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV));
1456  }
1457 
1458  // Remove instructions associated with non-base iterations.
1459  for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
1460  J != JE;) {
1461  unsigned I = Uses[&*J].find_first();
1462  if (I > 0 && I < IL_All) {
1463  LLVM_DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
1464  J++->eraseFromParent();
1465  continue;
1466  }
1467 
1468  ++J;
1469  }
1470 
1471  // Rewrite each BaseInst using SCEV.
1472  for (size_t i = 0, e = RootSets.size(); i != e; ++i)
1473  // Insert the new induction variable.
1474  replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]);
1475 
1476  { // Limit the lifetime of SCEVExpander.
1477  BranchInst *BI = cast<BranchInst>(Header->getTerminator());
1478  const DataLayout &DL = Header->getModule()->getDataLayout();
1479  SCEVExpander Expander(*SE, DL, "reroll");
1480  auto Zero = SE->getZero(BackedgeTakenCount->getType());
1481  auto One = SE->getOne(BackedgeTakenCount->getType());
1482  auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap);
1483  Value *NewIV =
1484  Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(),
1485  Header->getFirstNonPHIOrDbg());
1486  // FIXME: This arithmetic can overflow.
1487  auto TripCount = SE->getAddExpr(BackedgeTakenCount, One);
1488  auto ScaledTripCount = SE->getMulExpr(
1489  TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale));
1490  auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One);
1491  Value *TakenCount =
1492  Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(),
1493  Header->getFirstNonPHIOrDbg());
1494  Value *Cond =
1495  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond");
1496  BI->setCondition(Cond);
1497 
1498  if (BI->getSuccessor(1) != Header)
1499  BI->swapSuccessors();
1500  }
1501 
1502  SimplifyInstructionsInBlock(Header, TLI);
1503  DeleteDeadPHIs(Header, TLI);
1504 }
1505 
1506 void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS,
1507  const SCEV *Start,
1508  const SCEV *IncrExpr) {
1509  BasicBlock *Header = L->getHeader();
1510  Instruction *Inst = DRS.BaseInst;
1511 
1512  const SCEV *NewIVSCEV =
1513  SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1514 
1515  { // Limit the lifetime of SCEVExpander.
1516  const DataLayout &DL = Header->getModule()->getDataLayout();
1517  SCEVExpander Expander(*SE, DL, "reroll");
1518  Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1519  Header->getFirstNonPHIOrDbg());
1520 
1521  for (auto &KV : Uses)
1522  if (KV.second.find_first() == 0)
1523  KV.first->replaceUsesOfWith(Inst, NewIV);
1524  }
1525 }
1526 
1527 // Validate the selected reductions. All iterations must have an isomorphic
1528 // part of the reduction chain and, for non-associative reductions, the chain
1529 // entries must appear in order.
1530 bool LoopReroll::ReductionTracker::validateSelected() {
1531  // For a non-associative reduction, the chain entries must appear in order.
1532  for (int i : Reds) {
1533  int PrevIter = 0, BaseCount = 0, Count = 0;
1534  for (Instruction *J : PossibleReds[i]) {
1535  // Note that all instructions in the chain must have been found because
1536  // all instructions in the function must have been assigned to some
1537  // iteration.
1538  int Iter = PossibleRedIter[J];
1539  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1540  !PossibleReds[i].getReducedValue()->isAssociative()) {
1541  LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: "
1542  << J << "\n");
1543  return false;
1544  }
1545 
1546  if (Iter != PrevIter) {
1547  if (Count != BaseCount) {
1548  LLVM_DEBUG(dbgs()
1549  << "LRR: Iteration " << PrevIter << " reduction use count "
1550  << Count << " is not equal to the base use count "
1551  << BaseCount << "\n");
1552  return false;
1553  }
1554 
1555  Count = 0;
1556  }
1557 
1558  ++Count;
1559  if (Iter == 0)
1560  ++BaseCount;
1561 
1562  PrevIter = Iter;
1563  }
1564  }
1565 
1566  return true;
1567 }
1568 
1569 // For all selected reductions, remove all parts except those in the first
1570 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1571 // of the first-iteration reduced value (in other words, reroll the selected
1572 // reductions).
1573 void LoopReroll::ReductionTracker::replaceSelected() {
1574  // Fixup reductions to refer to the last instruction associated with the
1575  // first iteration (not the last).
1576  for (int i : Reds) {
1577  int j = 0;
1578  for (int e = PossibleReds[i].size(); j != e; ++j)
1579  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1580  --j;
1581  break;
1582  }
1583 
1584  // Replace users with the new end-of-chain value.
1585  SmallInstructionVector Users;
1586  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1587  Users.push_back(cast<Instruction>(U));
1588  }
1589 
1590  for (Instruction *User : Users)
1591  User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1592  PossibleReds[i][j]);
1593  }
1594 }
1595 
1596 // Reroll the provided loop with respect to the provided induction variable.
1597 // Generally, we're looking for a loop like this:
1598 //
1599 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1600 // f(%iv)
1601 // %iv.1 = add %iv, 1 <-- a root increment
1602 // f(%iv.1)
1603 // %iv.2 = add %iv, 2 <-- a root increment
1604 // f(%iv.2)
1605 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1606 // f(%iv.scale_m_1)
1607 // ...
1608 // %iv.next = add %iv, scale
1609 // %cmp = icmp(%iv, ...)
1610 // br %cmp, header, exit
1611 //
1612 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1613 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1614 // be intermixed with eachother. The restriction imposed by this algorithm is
1615 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1616 // etc. be the same.
1617 //
1618 // First, we collect the use set of %iv, excluding the other increment roots.
1619 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1620 // times, having collected the use set of f(%iv.(i+1)), during which we:
1621 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1622 // the next unmatched instruction in f(%iv.(i+1)).
1623 // - Ensure that both matched instructions don't have any external users
1624 // (with the exception of last-in-chain reduction instructions).
1625 // - Track the (aliasing) write set, and other side effects, of all
1626 // instructions that belong to future iterations that come before the matched
1627 // instructions. If the matched instructions read from that write set, then
1628 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1629 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1630 // if any of these future instructions had side effects (could not be
1631 // speculatively executed), and so do the matched instructions, when we
1632 // cannot reorder those side-effect-producing instructions, and rerolling
1633 // fails.
1634 //
1635 // Finally, we make sure that all loop instructions are either loop increment
1636 // roots, belong to simple latch code, parts of validated reductions, part of
1637 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1638 // have been validated), then we reroll the loop.
1639 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1640  const SCEV *BackedgeTakenCount,
1641  ReductionTracker &Reductions) {
1642  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1643  IVToIncMap, LoopControlIV);
1644 
1645  if (!DAGRoots.findRoots())
1646  return false;
1647  LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV
1648  << "\n");
1649 
1650  if (!DAGRoots.validate(Reductions))
1651  return false;
1652  if (!Reductions.validateSelected())
1653  return false;
1654  // At this point, we've validated the rerolling, and we're committed to
1655  // making changes!
1656 
1657  Reductions.replaceSelected();
1658  DAGRoots.replace(BackedgeTakenCount);
1659 
1660  ++NumRerolledLoops;
1661  return true;
1662 }
1663 
1664 bool LoopReroll::runOnLoop(Loop *L) {
1665  BasicBlock *Header = L->getHeader();
1666  LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
1667  << Header->getName() << " (" << L->getNumBlocks()
1668  << " block(s))\n");
1669 
1670  // For now, we'll handle only single BB loops.
1671  if (L->getNumBlocks() > 1)
1672  return false;
1673 
1675  return false;
1676 
1677  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1678  LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1679  LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount
1680  << "\n");
1681 
1682  // First, we need to find the induction variable with respect to which we can
1683  // reroll (there may be several possible options).
1684  SmallInstructionVector PossibleIVs;
1685  IVToIncMap.clear();
1686  LoopControlIV = nullptr;
1687  collectPossibleIVs(L, PossibleIVs);
1688 
1689  if (PossibleIVs.empty()) {
1690  LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n");
1691  return false;
1692  }
1693 
1694  ReductionTracker Reductions;
1695  collectPossibleReductions(L, Reductions);
1696  bool Changed = false;
1697 
1698  // For each possible IV, collect the associated possible set of 'root' nodes
1699  // (i+1, i+2, etc.).
1700  for (Instruction *PossibleIV : PossibleIVs)
1701  if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) {
1702  Changed = true;
1703  break;
1704  }
1705  LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1706 
1707  // Trip count of L has changed so SE must be re-evaluated.
1708  if (Changed)
1709  SE->forgetLoop(L);
1710 
1711  return Changed;
1712 }
1713 
1714 bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
1715  if (skipLoop(L))
1716  return false;
1717 
1718  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1719  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1720  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1721  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
1722  *L->getHeader()->getParent());
1723  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1724  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1725 
1726  return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
1727 }
1728 
1731  LPMUpdater &U) {
1732  return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
1735 }
llvm::MapVector::iterator
typename VectorType::iterator iterator
Definition: MapVector.h:50
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::ScalarEvolution::hasLoopInvariantBackedgeTakenCount
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
Definition: ScalarEvolution.cpp:12494
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:749
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:742
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4630
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
ScalarEvolutionExpander.h
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:363
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:65
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1472
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3538
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
llvm::AliasSetTracker
Definition: AliasSetTracker.h:329
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::LoopRerollPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopRerollPass.cpp:1729
APInt.h
ScalarEvolution.h
DenseMap.h
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
Module.h
isSimpleArithmeticOp
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
Definition: LoopRerollPass.cpp:752
reroll
loop reroll
Definition: LoopRerollPass.cpp:503
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:874
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
STLExtras.h
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:95
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
replace
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Definition: ConstantMerge.cpp:116
llvm::BasicBlock::rend
reverse_iterator rend()
Definition: BasicBlock.h:303
Use.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1307
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:589
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3014
Instruction.h
CommandLine.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false, false) INITIALIZE_PASS_END(LoopRerollLegacyPass
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:701
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:647
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::User
Definition: User.h:44
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::replace
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
Definition: STLExtras.h:1804
TargetLibraryInfo.h
DenseSet.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:253
false
Definition: StackSlotColoring.cpp:142
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:394
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::initializeLoopRerollLegacyPassPass
void initializeLoopRerollLegacyPassPass(PassRegistry &)
llvm::Instruction
Definition: Instruction.h:45
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3169
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
NumToleratedFailedMatches
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" " during fuzzy matching. (default: 400)"))
BitVector.h
LoopUtils.h
llvm::BasicBlock::rbegin
reverse_iterator rbegin()
Definition: BasicBlock.h:301
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
isLoopIncrement
static bool isLoopIncrement(User *U, Instruction *IV)
Definition: LoopRerollPass.cpp:772
Utils.h
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:764
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4110
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::createLoopRerollPass
Pass * createLoopRerollPass()
Definition: LoopRerollPass.cpp:506
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1185
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
llvm::LoopPass
Definition: LoopPass.h:27
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition: ScalarEvolution.h:133
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1665
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::User::replaceUsesOfWith
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1581
isAssociative
static bool isAssociative(const COFFSection &Section)
Definition: WinCOFFObjectWriter.cpp:970
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::BinaryOperator
Definition: InstrTypes.h:190
DataLayout.h
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
llvm::AArch64ISD::ADR
@ ADR
Definition: AArch64ISelLowering.h:62
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:570
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
users
iv users
Definition: IVUsers.cpp:52
isUnorderedLoadStore
static bool isUnorderedLoadStore(Instruction *I)
Definition: LoopRerollPass.cpp:739
loops
loop Reroll loops
Definition: LoopRerollPass.cpp:503
j
return j(j<< 16)
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:485
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7622
isIgnorableInst
static bool isIgnorableInst(const Instruction *I)
Definition: LoopRerollPass.cpp:1140
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4242
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:164
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
ScalarEvolutionExpressions.h
LoopReroll.h
llvm::BasicBlock::reverse_iterator
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:92
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
Dominators.h
N
#define N
llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition: ScalarEvolution.h:644
Users
iv Induction Variable Users
Definition: IVUsers.cpp:52
llvm::BasicBlock::getFirstNonPHIOrDbg
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:223
llvm::PHINode
Definition: Instructions.h:2648
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:377
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2417
AliasSetTracker.h
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3083
raw_ostream.h
llvm::Instruction::isSameOperationAs
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one.
Definition: Instruction.cpp:528
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7492
BasicBlockUtils.h
hasUsesOutsideLoop
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
Definition: LoopRerollPass.cpp:511
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:58
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3176
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38