LLVM  16.0.0git
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop reroller.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/LoopPass.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Use.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
48 #include "llvm/Transforms/Scalar.h"
50 #include "llvm/Transforms/Utils.h"
55 #include <cassert>
56 #include <cstddef>
57 #include <cstdint>
58 #include <iterator>
59 #include <map>
60 #include <utility>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "loop-reroll"
65 
66 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
67 
68 static cl::opt<unsigned>
69 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
70  cl::Hidden,
71  cl::desc("The maximum number of failures to tolerate"
72  " during fuzzy matching. (default: 400)"));
73 
74 // This loop re-rolling transformation aims to transform loops like this:
75 //
76 // int foo(int a);
77 // void bar(int *x) {
78 // for (int i = 0; i < 500; i += 3) {
79 // foo(i);
80 // foo(i+1);
81 // foo(i+2);
82 // }
83 // }
84 //
85 // into a loop like this:
86 //
87 // void bar(int *x) {
88 // for (int i = 0; i < 500; ++i)
89 // foo(i);
90 // }
91 //
92 // It does this by looking for loops that, besides the latch code, are composed
93 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
94 // to the induction variable, and where each DAG is isomorphic to the DAG
95 // rooted at the induction variable (excepting the sub-DAGs which root the
96 // other induction-variable increments). In other words, we're looking for loop
97 // bodies of the form:
98 //
99 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
100 // f(%iv)
101 // %iv.1 = add %iv, 1 <-- a root increment
102 // f(%iv.1)
103 // %iv.2 = add %iv, 2 <-- a root increment
104 // f(%iv.2)
105 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
106 // f(%iv.scale_m_1)
107 // ...
108 // %iv.next = add %iv, scale
109 // %cmp = icmp(%iv, ...)
110 // br %cmp, header, exit
111 //
112 // where each f(i) is a set of instructions that, collectively, are a function
113 // only of i (and other loop-invariant values).
114 //
115 // As a special case, we can also reroll loops like this:
116 //
117 // int foo(int);
118 // void bar(int *x) {
119 // for (int i = 0; i < 500; ++i) {
120 // x[3*i] = foo(0);
121 // x[3*i+1] = foo(0);
122 // x[3*i+2] = foo(0);
123 // }
124 // }
125 //
126 // into this:
127 //
128 // void bar(int *x) {
129 // for (int i = 0; i < 1500; ++i)
130 // x[i] = foo(0);
131 // }
132 //
133 // in which case, we're looking for inputs like this:
134 //
135 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
136 // %scaled.iv = mul %iv, scale
137 // f(%scaled.iv)
138 // %scaled.iv.1 = add %scaled.iv, 1
139 // f(%scaled.iv.1)
140 // %scaled.iv.2 = add %scaled.iv, 2
141 // f(%scaled.iv.2)
142 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
143 // f(%scaled.iv.scale_m_1)
144 // ...
145 // %iv.next = add %iv, 1
146 // %cmp = icmp(%iv, ...)
147 // br %cmp, header, exit
148 
149 namespace {
150 
151  enum IterationLimits {
152  /// The maximum number of iterations that we'll try and reroll.
153  IL_MaxRerollIterations = 32,
154  /// The bitvector index used by loop induction variables and other
155  /// instructions that belong to all iterations.
156  IL_All,
157  IL_End
158  };
159 
160  class LoopRerollLegacyPass : public LoopPass {
161  public:
162  static char ID; // Pass ID, replacement for typeid
163 
164  LoopRerollLegacyPass() : LoopPass(ID) {
166  }
167 
168  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
169 
170  void getAnalysisUsage(AnalysisUsage &AU) const override {
173  }
174  };
175 
176  class LoopReroll {
177  public:
178  LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
179  TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
180  : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
181  PreserveLCSSA(PreserveLCSSA) {}
182  bool runOnLoop(Loop *L);
183 
184  protected:
185  AliasAnalysis *AA;
186  LoopInfo *LI;
187  ScalarEvolution *SE;
188  TargetLibraryInfo *TLI;
189  DominatorTree *DT;
190  bool PreserveLCSSA;
191 
192  using SmallInstructionVector = SmallVector<Instruction *, 16>;
193  using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
194 
195  // Map between induction variable and its increment
197 
198  // For loop with multiple induction variable, remember the one used only to
199  // control the loop.
200  Instruction *LoopControlIV;
201 
202  // A chain of isomorphic instructions, identified by a single-use PHI
203  // representing a reduction. Only the last value may be used outside the
204  // loop.
205  struct SimpleLoopReduction {
206  SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
207  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
208  add(L);
209  }
210 
211  bool valid() const {
212  return Valid;
213  }
214 
215  Instruction *getPHI() const {
216  assert(Valid && "Using invalid reduction");
217  return Instructions.front();
218  }
219 
220  Instruction *getReducedValue() const {
221  assert(Valid && "Using invalid reduction");
222  return Instructions.back();
223  }
224 
225  Instruction *get(size_t i) const {
226  assert(Valid && "Using invalid reduction");
227  return Instructions[i+1];
228  }
229 
230  Instruction *operator [] (size_t i) const { return get(i); }
231 
232  // The size, ignoring the initial PHI.
233  size_t size() const {
234  assert(Valid && "Using invalid reduction");
235  return Instructions.size()-1;
236  }
237 
238  using iterator = SmallInstructionVector::iterator;
239  using const_iterator = SmallInstructionVector::const_iterator;
240 
241  iterator begin() {
242  assert(Valid && "Using invalid reduction");
243  return std::next(Instructions.begin());
244  }
245 
246  const_iterator begin() const {
247  assert(Valid && "Using invalid reduction");
248  return std::next(Instructions.begin());
249  }
250 
251  iterator end() { return Instructions.end(); }
252  const_iterator end() const { return Instructions.end(); }
253 
254  protected:
255  bool Valid = false;
256  SmallInstructionVector Instructions;
257 
258  void add(Loop *L);
259  };
260 
261  // The set of all reductions, and state tracking of possible reductions
262  // during loop instruction processing.
263  struct ReductionTracker {
264  using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
265 
266  // Add a new possible reduction.
267  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
268 
269  // Setup to track possible reductions corresponding to the provided
270  // rerolling scale. Only reductions with a number of non-PHI instructions
271  // that is divisible by the scale are considered. Three instructions sets
272  // are filled in:
273  // - A set of all possible instructions in eligible reductions.
274  // - A set of all PHIs in eligible reductions
275  // - A set of all reduced values (last instructions) in eligible
276  // reductions.
277  void restrictToScale(uint64_t Scale,
278  SmallInstructionSet &PossibleRedSet,
279  SmallInstructionSet &PossibleRedPHISet,
280  SmallInstructionSet &PossibleRedLastSet) {
281  PossibleRedIdx.clear();
282  PossibleRedIter.clear();
283  Reds.clear();
284 
285  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
286  if (PossibleReds[i].size() % Scale == 0) {
287  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
288  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
289 
290  PossibleRedSet.insert(PossibleReds[i].getPHI());
291  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
292  for (Instruction *J : PossibleReds[i]) {
293  PossibleRedSet.insert(J);
294  PossibleRedIdx[J] = i;
295  }
296  }
297  }
298 
299  // The functions below are used while processing the loop instructions.
300 
301  // Are the two instructions both from reductions, and furthermore, from
302  // the same reduction?
303  bool isPairInSame(Instruction *J1, Instruction *J2) {
304  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
305  if (J1I != PossibleRedIdx.end()) {
306  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
307  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
308  return true;
309  }
310 
311  return false;
312  }
313 
314  // The two provided instructions, the first from the base iteration, and
315  // the second from iteration i, form a matched pair. If these are part of
316  // a reduction, record that fact.
317  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
318  if (PossibleRedIdx.count(J1)) {
319  assert(PossibleRedIdx.count(J2) &&
320  "Recording reduction vs. non-reduction instruction?");
321 
322  PossibleRedIter[J1] = 0;
323  PossibleRedIter[J2] = i;
324 
325  int Idx = PossibleRedIdx[J1];
326  assert(Idx == PossibleRedIdx[J2] &&
327  "Recording pair from different reductions?");
328  Reds.insert(Idx);
329  }
330  }
331 
332  // The functions below can be called after we've finished processing all
333  // instructions in the loop, and we know which reductions were selected.
334 
335  bool validateSelected();
336  void replaceSelected();
337 
338  protected:
339  // The vector of all possible reductions (for any scale).
340  SmallReductionVector PossibleReds;
341 
342  DenseMap<Instruction *, int> PossibleRedIdx;
343  DenseMap<Instruction *, int> PossibleRedIter;
344  DenseSet<int> Reds;
345  };
346 
347  // A DAGRootSet models an induction variable being used in a rerollable
348  // loop. For example,
349  //
350  // x[i*3+0] = y1
351  // x[i*3+1] = y2
352  // x[i*3+2] = y3
353  //
354  // Base instruction -> i*3
355  // +---+----+
356  // / | \
357  // ST[y1] +1 +2 <-- Roots
358  // | |
359  // ST[y2] ST[y3]
360  //
361  // There may be multiple DAGRoots, for example:
362  //
363  // x[i*2+0] = ... (1)
364  // x[i*2+1] = ... (1)
365  // x[i*2+4] = ... (2)
366  // x[i*2+5] = ... (2)
367  // x[(i+1234)*2+5678] = ... (3)
368  // x[(i+1234)*2+5679] = ... (3)
369  //
370  // The loop will be rerolled by adding a new loop induction variable,
371  // one for the Base instruction in each DAGRootSet.
372  //
373  struct DAGRootSet {
374  Instruction *BaseInst;
375  SmallInstructionVector Roots;
376 
377  // The instructions between IV and BaseInst (but not including BaseInst).
378  SmallInstructionSet SubsumedInsts;
379  };
380 
381  // The set of all DAG roots, and state tracking of all roots
382  // for a particular induction variable.
383  struct DAGRootTracker {
384  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
387  bool PreserveLCSSA,
389  Instruction *LoopCtrlIV)
390  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
391  PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
392  LoopControlIV(LoopCtrlIV) {}
393 
394  /// Stage 1: Find all the DAG roots for the induction variable.
395  bool findRoots();
396 
397  /// Stage 2: Validate if the found roots are valid.
398  bool validate(ReductionTracker &Reductions);
399 
400  /// Stage 3: Assuming validate() returned true, perform the
401  /// replacement.
402  /// @param BackedgeTakenCount The backedge-taken count of L.
403  void replace(const SCEV *BackedgeTakenCount);
404 
405  protected:
407 
408  void findRootsRecursive(Instruction *IVU,
409  SmallInstructionSet SubsumedInsts);
410  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
411  bool collectPossibleRoots(Instruction *Base,
412  std::map<int64_t,Instruction*> &Roots);
413  bool validateRootSet(DAGRootSet &DRS);
414 
415  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
416  void collectInLoopUserSet(const SmallInstructionVector &Roots,
417  const SmallInstructionSet &Exclude,
418  const SmallInstructionSet &Final,
420  void collectInLoopUserSet(Instruction *Root,
421  const SmallInstructionSet &Exclude,
422  const SmallInstructionSet &Final,
424 
425  UsesTy::iterator nextInstr(int Val, UsesTy &In,
426  const SmallInstructionSet &Exclude,
427  UsesTy::iterator *StartI=nullptr);
428  bool isBaseInst(Instruction *I);
429  bool isRootInst(Instruction *I);
430  bool instrDependsOn(Instruction *I,
431  UsesTy::iterator Start,
432  UsesTy::iterator End);
433  void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr);
434 
435  LoopReroll *Parent;
436 
437  // Members of Parent, replicated here for brevity.
438  Loop *L;
439  ScalarEvolution *SE;
440  AliasAnalysis *AA;
441  TargetLibraryInfo *TLI;
442  DominatorTree *DT;
443  LoopInfo *LI;
444  bool PreserveLCSSA;
445 
446  // The loop induction variable.
447  Instruction *IV;
448 
449  // Loop step amount.
450  int64_t Inc;
451 
452  // Loop reroll count; if Inc == 1, this records the scaling applied
453  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
454  // If Inc is not 1, Scale = Inc.
455  uint64_t Scale;
456 
457  // The roots themselves.
459 
460  // All increment instructions for IV.
461  SmallInstructionVector LoopIncs;
462 
463  // Map of all instructions in the loop (in order) to the iterations
464  // they are used in (or specially, IL_All for instructions
465  // used in the loop increment mechanism).
466  UsesTy Uses;
467 
468  // Map between induction variable and its increment
470 
471  Instruction *LoopControlIV;
472  };
473 
474  // Check if it is a compare-like instruction whose user is a branch
475  bool isCompareUsedByBranch(Instruction *I) {
476  auto *TI = I->getParent()->getTerminator();
477  if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
478  return false;
479  return I->hasOneUse() && TI->getOperand(0) == I;
480  };
481 
482  bool isLoopControlIV(Loop *L, Instruction *IV);
483  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
484  void collectPossibleReductions(Loop *L,
485  ReductionTracker &Reductions);
486  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header,
487  const SCEV *BackedgeTakenCount, ReductionTracker &Reductions);
488  };
489 
490 } // end anonymous namespace
491 
492 char LoopRerollLegacyPass::ID = 0;
493 
494 INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
495  false, false)
498 INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
499  false)
500 
501 Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
502 
503 // Returns true if the provided instruction is used outside the given loop.
504 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
505 // non-loop blocks to be outside the loop.
507  for (User *U : I->users()) {
508  if (!L->contains(cast<Instruction>(U)))
509  return true;
510  }
511  return false;
512 }
513 
514 // Check if an IV is only used to control the loop. There are two cases:
515 // 1. It only has one use which is loop increment, and the increment is only
516 // used by comparison and the PHI (could has sext with nsw in between), and the
517 // comparison is only used by branch.
518 // 2. It is used by loop increment and the comparison, the loop increment is
519 // only used by the PHI, and the comparison is used only by the branch.
520 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
521  unsigned IVUses = IV->getNumUses();
522  if (IVUses != 2 && IVUses != 1)
523  return false;
524 
525  for (auto *User : IV->users()) {
526  int32_t IncOrCmpUses = User->getNumUses();
527  bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
528 
529  // User can only have one or two uses.
530  if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
531  return false;
532 
533  // Case 1
534  if (IVUses == 1) {
535  // The only user must be the loop increment.
536  // The loop increment must have two uses.
537  if (IsCompInst || IncOrCmpUses != 2)
538  return false;
539  }
540 
541  // Case 2
542  if (IVUses == 2 && IncOrCmpUses != 1)
543  return false;
544 
545  // The users of the IV must be a binary operation or a comparison
546  if (auto *BO = dyn_cast<BinaryOperator>(User)) {
547  if (BO->getOpcode() == Instruction::Add) {
548  // Loop Increment
549  // User of Loop Increment should be either PHI or CMP
550  for (auto *UU : User->users()) {
551  if (PHINode *PN = dyn_cast<PHINode>(UU)) {
552  if (PN != IV)
553  return false;
554  }
555  // Must be a CMP or an ext (of a value with nsw) then CMP
556  else {
557  auto *UUser = cast<Instruction>(UU);
558  // Skip SExt if we are extending an nsw value
559  // TODO: Allow ZExt too
560  if (BO->hasNoSignedWrap() && UUser->hasOneUse() &&
561  isa<SExtInst>(UUser))
562  UUser = cast<Instruction>(*(UUser->user_begin()));
563  if (!isCompareUsedByBranch(UUser))
564  return false;
565  }
566  }
567  } else
568  return false;
569  // Compare : can only have one use, and must be branch
570  } else if (!IsCompInst)
571  return false;
572  }
573  return true;
574 }
575 
576 // Collect the list of loop induction variables with respect to which it might
577 // be possible to reroll the loop.
578 void LoopReroll::collectPossibleIVs(Loop *L,
579  SmallInstructionVector &PossibleIVs) {
580  BasicBlock *Header = L->getHeader();
581  for (BasicBlock::iterator I = Header->begin(),
582  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
583  if (!isa<PHINode>(I))
584  continue;
585  if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
586  continue;
587 
588  if (const SCEVAddRecExpr *PHISCEV =
589  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
590  if (PHISCEV->getLoop() != L)
591  continue;
592  if (!PHISCEV->isAffine())
593  continue;
594  auto IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
595  if (IncSCEV) {
596  IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
597  LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
598  << "\n");
599 
600  if (isLoopControlIV(L, &*I)) {
601  assert(!LoopControlIV && "Found two loop control only IV");
602  LoopControlIV = &(*I);
603  LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I
604  << " = " << *PHISCEV << "\n");
605  } else
606  PossibleIVs.push_back(&*I);
607  }
608  }
609  }
610 }
611 
612 // Add the remainder of the reduction-variable chain to the instruction vector
613 // (the initial PHINode has already been added). If successful, the object is
614 // marked as valid.
616  assert(!Valid && "Cannot add to an already-valid chain");
617 
618  // The reduction variable must be a chain of single-use instructions
619  // (including the PHI), except for the last value (which is used by the PHI
620  // and also outside the loop).
621  Instruction *C = Instructions.front();
622  if (C->user_empty())
623  return;
624 
625  do {
626  C = cast<Instruction>(*C->user_begin());
627  if (C->hasOneUse()) {
628  if (!C->isBinaryOp())
629  return;
630 
631  if (!(isa<PHINode>(Instructions.back()) ||
632  C->isSameOperationAs(Instructions.back())))
633  return;
634 
635  Instructions.push_back(C);
636  }
637  } while (C->hasOneUse());
638 
639  if (Instructions.size() < 2 ||
640  !C->isSameOperationAs(Instructions.back()) ||
641  C->use_empty())
642  return;
643 
644  // C is now the (potential) last instruction in the reduction chain.
645  for (User *U : C->users()) {
646  // The only in-loop user can be the initial PHI.
647  if (L->contains(cast<Instruction>(U)))
648  if (cast<Instruction>(U) != Instructions.front())
649  return;
650  }
651 
652  Instructions.push_back(C);
653  Valid = true;
654 }
655 
656 // Collect the vector of possible reduction variables.
657 void LoopReroll::collectPossibleReductions(Loop *L,
658  ReductionTracker &Reductions) {
659  BasicBlock *Header = L->getHeader();
660  for (BasicBlock::iterator I = Header->begin(),
661  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
662  if (!isa<PHINode>(I))
663  continue;
664  if (!I->getType()->isSingleValueType())
665  continue;
666 
667  SimpleLoopReduction SLR(&*I, L);
668  if (!SLR.valid())
669  continue;
670 
671  LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with "
672  << SLR.size() << " chained instructions)\n");
673  Reductions.addSLR(SLR);
674  }
675 }
676 
677 // Collect the set of all users of the provided root instruction. This set of
678 // users contains not only the direct users of the root instruction, but also
679 // all users of those users, and so on. There are two exceptions:
680 //
681 // 1. Instructions in the set of excluded instructions are never added to the
682 // use set (even if they are users). This is used, for example, to exclude
683 // including root increments in the use set of the primary IV.
684 //
685 // 2. Instructions in the set of final instructions are added to the use set
686 // if they are users, but their users are not added. This is used, for
687 // example, to prevent a reduction update from forcing all later reduction
688 // updates into the use set.
689 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
690  Instruction *Root, const SmallInstructionSet &Exclude,
691  const SmallInstructionSet &Final,
693  SmallInstructionVector Queue(1, Root);
694  while (!Queue.empty()) {
695  Instruction *I = Queue.pop_back_val();
696  if (!Users.insert(I).second)
697  continue;
698 
699  if (!Final.count(I))
700  for (Use &U : I->uses()) {
701  Instruction *User = cast<Instruction>(U.getUser());
702  if (PHINode *PN = dyn_cast<PHINode>(User)) {
703  // Ignore "wrap-around" uses to PHIs of this loop's header.
704  if (PN->getIncomingBlock(U) == L->getHeader())
705  continue;
706  }
707 
708  if (L->contains(User) && !Exclude.count(User)) {
709  Queue.push_back(User);
710  }
711  }
712 
713  // We also want to collect single-user "feeder" values.
714  for (Use &U : I->operands()) {
715  if (Instruction *Op = dyn_cast<Instruction>(U))
716  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
717  !Final.count(Op))
718  Queue.push_back(Op);
719  }
720  }
721 }
722 
723 // Collect all of the users of all of the provided root instructions (combined
724 // into a single set).
725 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
726  const SmallInstructionVector &Roots,
727  const SmallInstructionSet &Exclude,
728  const SmallInstructionSet &Final,
730  for (Instruction *Root : Roots)
731  collectInLoopUserSet(Root, Exclude, Final, Users);
732 }
733 
735  if (LoadInst *LI = dyn_cast<LoadInst>(I))
736  return LI->isUnordered();
737  if (StoreInst *SI = dyn_cast<StoreInst>(I))
738  return SI->isUnordered();
739  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
740  return !MI->isVolatile();
741  return false;
742 }
743 
744 /// Return true if IVU is a "simple" arithmetic operation.
745 /// This is used for narrowing the search space for DAGRoots; only arithmetic
746 /// and GEPs can be part of a DAGRoot.
747 static bool isSimpleArithmeticOp(User *IVU) {
748  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
749  switch (I->getOpcode()) {
750  default: return false;
751  case Instruction::Add:
752  case Instruction::Sub:
753  case Instruction::Mul:
754  case Instruction::Shl:
755  case Instruction::AShr:
756  case Instruction::LShr:
757  case Instruction::GetElementPtr:
758  case Instruction::Trunc:
759  case Instruction::ZExt:
760  case Instruction::SExt:
761  return true;
762  }
763  }
764  return false;
765 }
766 
767 static bool isLoopIncrement(User *U, Instruction *IV) {
768  BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
769 
770  if ((BO && BO->getOpcode() != Instruction::Add) ||
771  (!BO && !isa<GetElementPtrInst>(U)))
772  return false;
773 
774  for (auto *UU : U->users()) {
775  PHINode *PN = dyn_cast<PHINode>(UU);
776  if (PN && PN == IV)
777  return true;
778  }
779  return false;
780 }
781 
782 bool LoopReroll::DAGRootTracker::
783 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
784  SmallInstructionVector BaseUsers;
785 
786  for (auto *I : Base->users()) {
787  ConstantInt *CI = nullptr;
788 
789  if (isLoopIncrement(I, IV)) {
790  LoopIncs.push_back(cast<Instruction>(I));
791  continue;
792  }
793 
794  // The root nodes must be either GEPs, ORs or ADDs.
795  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
796  if (BO->getOpcode() == Instruction::Add ||
797  BO->getOpcode() == Instruction::Or)
798  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
799  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
800  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
801  CI = dyn_cast<ConstantInt>(LastOperand);
802  }
803 
804  if (!CI) {
805  if (Instruction *II = dyn_cast<Instruction>(I)) {
806  BaseUsers.push_back(II);
807  continue;
808  } else {
809  LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I
810  << "\n");
811  return false;
812  }
813  }
814 
815  int64_t V = std::abs(CI->getValue().getSExtValue());
816  if (Roots.find(V) != Roots.end())
817  // No duplicates, please.
818  return false;
819 
820  Roots[V] = cast<Instruction>(I);
821  }
822 
823  // Make sure we have at least two roots.
824  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
825  return false;
826 
827  // If we found non-loop-inc, non-root users of Base, assume they are
828  // for the zeroth root index. This is because "add %a, 0" gets optimized
829  // away.
830  if (BaseUsers.size()) {
831  if (Roots.find(0) != Roots.end()) {
832  LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
833  return false;
834  }
835  Roots[0] = Base;
836  }
837 
838  // Calculate the number of users of the base, or lowest indexed, iteration.
839  unsigned NumBaseUses = BaseUsers.size();
840  if (NumBaseUses == 0)
841  NumBaseUses = Roots.begin()->second->getNumUses();
842 
843  // Check that every node has the same number of users.
844  for (auto &KV : Roots) {
845  if (KV.first == 0)
846  continue;
847  if (!KV.second->hasNUses(NumBaseUses)) {
848  LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
849  << "#Base=" << NumBaseUses
850  << ", #Root=" << KV.second->getNumUses() << "\n");
851  return false;
852  }
853  }
854 
855  return true;
856 }
857 
858 void LoopReroll::DAGRootTracker::
859 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
860  // Does the user look like it could be part of a root set?
861  // All its users must be simple arithmetic ops.
862  if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
863  return;
864 
865  if (I != IV && findRootsBase(I, SubsumedInsts))
866  return;
867 
868  SubsumedInsts.insert(I);
869 
870  for (User *V : I->users()) {
871  Instruction *I = cast<Instruction>(V);
872  if (is_contained(LoopIncs, I))
873  continue;
874 
875  if (!isSimpleArithmeticOp(I))
876  continue;
877 
878  // The recursive call makes a copy of SubsumedInsts.
879  findRootsRecursive(I, SubsumedInsts);
880  }
881 }
882 
883 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
884  if (DRS.Roots.empty())
885  return false;
886 
887  // If the value of the base instruction is used outside the loop, we cannot
888  // reroll the loop. Check for other root instructions is unnecessary because
889  // they don't match any base instructions if their values are used outside.
890  if (hasUsesOutsideLoop(DRS.BaseInst, L))
891  return false;
892 
893  // Consider a DAGRootSet with N-1 roots (so N different values including
894  // BaseInst).
895  // Define d = Roots[0] - BaseInst, which should be the same as
896  // Roots[I] - Roots[I-1] for all I in [1..N).
897  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
898  // loop iteration J.
899  //
900  // Now, For the loop iterations to be consecutive:
901  // D = d * N
902  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
903  if (!ADR)
904  return false;
905 
906  // Check that the first root is evenly spaced.
907  unsigned N = DRS.Roots.size() + 1;
908  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
909  if (isa<SCEVCouldNotCompute>(StepSCEV) || StepSCEV->getType()->isPointerTy())
910  return false;
911  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
912  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
913  return false;
914 
915  // Check that the remainling roots are evenly spaced.
916  for (unsigned i = 1; i < N - 1; ++i) {
917  const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
918  SE->getSCEV(DRS.Roots[i-1]));
919  if (NewStepSCEV != StepSCEV)
920  return false;
921  }
922 
923  return true;
924 }
925 
926 bool LoopReroll::DAGRootTracker::
927 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
928  // The base of a RootSet must be an AddRec, so it can be erased.
929  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
930  if (!IVU_ADR || IVU_ADR->getLoop() != L)
931  return false;
932 
933  std::map<int64_t, Instruction*> V;
934  if (!collectPossibleRoots(IVU, V))
935  return false;
936 
937  // If we didn't get a root for index zero, then IVU must be
938  // subsumed.
939  if (V.find(0) == V.end())
940  SubsumedInsts.insert(IVU);
941 
942  // Partition the vector into monotonically increasing indexes.
943  DAGRootSet DRS;
944  DRS.BaseInst = nullptr;
945 
946  SmallVector<DAGRootSet, 16> PotentialRootSets;
947 
948  for (auto &KV : V) {
949  if (!DRS.BaseInst) {
950  DRS.BaseInst = KV.second;
951  DRS.SubsumedInsts = SubsumedInsts;
952  } else if (DRS.Roots.empty()) {
953  DRS.Roots.push_back(KV.second);
954  } else if (V.find(KV.first - 1) != V.end()) {
955  DRS.Roots.push_back(KV.second);
956  } else {
957  // Linear sequence terminated.
958  if (!validateRootSet(DRS))
959  return false;
960 
961  // Construct a new DAGRootSet with the next sequence.
962  PotentialRootSets.push_back(DRS);
963  DRS.BaseInst = KV.second;
964  DRS.Roots.clear();
965  }
966  }
967 
968  if (!validateRootSet(DRS))
969  return false;
970 
971  PotentialRootSets.push_back(DRS);
972 
973  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
974 
975  return true;
976 }
977 
978 bool LoopReroll::DAGRootTracker::findRoots() {
979  Inc = IVToIncMap[IV];
980 
981  assert(RootSets.empty() && "Unclean state!");
982  if (std::abs(Inc) == 1) {
983  for (auto *IVU : IV->users()) {
984  if (isLoopIncrement(IVU, IV))
985  LoopIncs.push_back(cast<Instruction>(IVU));
986  }
987  findRootsRecursive(IV, SmallInstructionSet());
988  LoopIncs.push_back(IV);
989  } else {
990  if (!findRootsBase(IV, SmallInstructionSet()))
991  return false;
992  }
993 
994  // Ensure all sets have the same size.
995  if (RootSets.empty()) {
996  LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
997  return false;
998  }
999  for (auto &V : RootSets) {
1000  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
1001  LLVM_DEBUG(
1002  dbgs()
1003  << "LRR: Aborting because not all root sets have the same size\n");
1004  return false;
1005  }
1006  }
1007 
1008  Scale = RootSets[0].Roots.size() + 1;
1009 
1010  if (Scale > IL_MaxRerollIterations) {
1011  LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1012  << "#Found=" << Scale
1013  << ", #Max=" << IL_MaxRerollIterations << "\n");
1014  return false;
1015  }
1016 
1017  LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale
1018  << "\n");
1019 
1020  return true;
1021 }
1022 
1023 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1024  // Populate the MapVector with all instructions in the block, in order first,
1025  // so we can iterate over the contents later in perfect order.
1026  for (auto &I : *L->getHeader()) {
1027  Uses[&I].resize(IL_End);
1028  }
1029 
1030  SmallInstructionSet Exclude;
1031  for (auto &DRS : RootSets) {
1032  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1033  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1034  Exclude.insert(DRS.BaseInst);
1035  }
1036  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1037 
1038  for (auto &DRS : RootSets) {
1039  DenseSet<Instruction*> VBase;
1040  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1041  for (auto *I : VBase) {
1042  Uses[I].set(0);
1043  }
1044 
1045  unsigned Idx = 1;
1046  for (auto *Root : DRS.Roots) {
1048  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1049 
1050  // While we're here, check the use sets are the same size.
1051  if (V.size() != VBase.size()) {
1052  LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1053  return false;
1054  }
1055 
1056  for (auto *I : V) {
1057  Uses[I].set(Idx);
1058  }
1059  ++Idx;
1060  }
1061 
1062  // Make sure our subsumed instructions are remembered too.
1063  for (auto *I : DRS.SubsumedInsts) {
1064  Uses[I].set(IL_All);
1065  }
1066  }
1067 
1068  // Make sure the loop increments are also accounted for.
1069 
1070  Exclude.clear();
1071  for (auto &DRS : RootSets) {
1072  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1073  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1074  Exclude.insert(DRS.BaseInst);
1075  }
1076 
1078  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1079  for (auto *I : V) {
1080  if (I->mayHaveSideEffects()) {
1081  LLVM_DEBUG(dbgs() << "LRR: Aborting - "
1082  << "An instruction which does not belong to any root "
1083  << "sets must not have side effects: " << *I);
1084  return false;
1085  }
1086  Uses[I].set(IL_All);
1087  }
1088 
1089  return true;
1090 }
1091 
1092 /// Get the next instruction in "In" that is a member of set Val.
1093 /// Start searching from StartI, and do not return anything in Exclude.
1094 /// If StartI is not given, start from In.begin().
1096 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1097  const SmallInstructionSet &Exclude,
1098  UsesTy::iterator *StartI) {
1099  UsesTy::iterator I = StartI ? *StartI : In.begin();
1100  while (I != In.end() && (I->second.test(Val) == 0 ||
1101  Exclude.contains(I->first)))
1102  ++I;
1103  return I;
1104 }
1105 
1106 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1107  for (auto &DRS : RootSets) {
1108  if (DRS.BaseInst == I)
1109  return true;
1110  }
1111  return false;
1112 }
1113 
1114 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1115  for (auto &DRS : RootSets) {
1116  if (is_contained(DRS.Roots, I))
1117  return true;
1118  }
1119  return false;
1120 }
1121 
1122 /// Return true if instruction I depends on any instruction between
1123 /// Start and End.
1124 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1125  UsesTy::iterator Start,
1126  UsesTy::iterator End) {
1127  for (auto *U : I->users()) {
1128  for (auto It = Start; It != End; ++It)
1129  if (U == It->first)
1130  return true;
1131  }
1132  return false;
1133 }
1134 
1135 static bool isIgnorableInst(const Instruction *I) {
1136  if (isa<DbgInfoIntrinsic>(I))
1137  return true;
1138  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1139  if (!II)
1140  return false;
1141  switch (II->getIntrinsicID()) {
1142  default:
1143  return false;
1144  case Intrinsic::annotation:
1145  case Intrinsic::ptr_annotation:
1146  case Intrinsic::var_annotation:
1147  // TODO: the following intrinsics may also be allowed:
1148  // lifetime_start, lifetime_end, invariant_start, invariant_end
1149  return true;
1150  }
1151  return false;
1152 }
1153 
1154 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1155  // We now need to check for equivalence of the use graph of each root with
1156  // that of the primary induction variable (excluding the roots). Our goal
1157  // here is not to solve the full graph isomorphism problem, but rather to
1158  // catch common cases without a lot of work. As a result, we will assume
1159  // that the relative order of the instructions in each unrolled iteration
1160  // is the same (although we will not make an assumption about how the
1161  // different iterations are intermixed). Note that while the order must be
1162  // the same, the instructions may not be in the same basic block.
1163 
1164  // An array of just the possible reductions for this scale factor. When we
1165  // collect the set of all users of some root instructions, these reduction
1166  // instructions are treated as 'final' (their uses are not considered).
1167  // This is important because we don't want the root use set to search down
1168  // the reduction chain.
1169  SmallInstructionSet PossibleRedSet;
1170  SmallInstructionSet PossibleRedLastSet;
1171  SmallInstructionSet PossibleRedPHISet;
1172  Reductions.restrictToScale(Scale, PossibleRedSet,
1173  PossibleRedPHISet, PossibleRedLastSet);
1174 
1175  // Populate "Uses" with where each instruction is used.
1176  if (!collectUsedInstructions(PossibleRedSet))
1177  return false;
1178 
1179  // Make sure we mark the reduction PHIs as used in all iterations.
1180  for (auto *I : PossibleRedPHISet) {
1181  Uses[I].set(IL_All);
1182  }
1183 
1184  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1185  // comment above LoopReroll::isLoopControlIV for more information.
1186  BasicBlock *Header = L->getHeader();
1187  if (LoopControlIV && LoopControlIV != IV) {
1188  for (auto *U : LoopControlIV->users()) {
1189  Instruction *IVUser = dyn_cast<Instruction>(U);
1190  // IVUser could be loop increment or compare
1191  Uses[IVUser].set(IL_All);
1192  for (auto *UU : IVUser->users()) {
1193  Instruction *UUser = dyn_cast<Instruction>(UU);
1194  // UUser could be compare, PHI or branch
1195  Uses[UUser].set(IL_All);
1196  // Skip SExt
1197  if (isa<SExtInst>(UUser)) {
1198  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1199  Uses[UUser].set(IL_All);
1200  }
1201  // Is UUser a compare instruction?
1202  if (UU->hasOneUse()) {
1203  Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1204  if (BI == cast<BranchInst>(Header->getTerminator()))
1205  Uses[BI].set(IL_All);
1206  }
1207  }
1208  }
1209  }
1210 
1211  // Make sure all instructions in the loop are in one and only one
1212  // set.
1213  for (auto &KV : Uses) {
1214  if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1215  LLVM_DEBUG(
1216  dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1217  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1218  return false;
1219  }
1220  }
1221 
1222  LLVM_DEBUG(for (auto &KV
1223  : Uses) {
1224  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1225  });
1226 
1227  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1228  // In addition to regular aliasing information, we need to look for
1229  // instructions from later (future) iterations that have side effects
1230  // preventing us from reordering them past other instructions with side
1231  // effects.
1232  bool FutureSideEffects = false;
1233  AliasSetTracker AST(*AA);
1234  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1236 
1237  // Compare iteration Iter to the base.
1238  SmallInstructionSet Visited;
1239  auto BaseIt = nextInstr(0, Uses, Visited);
1240  auto RootIt = nextInstr(Iter, Uses, Visited);
1241  auto LastRootIt = Uses.begin();
1242 
1243  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1244  Instruction *BaseInst = BaseIt->first;
1245  Instruction *RootInst = RootIt->first;
1246 
1247  // Skip over the IV or root instructions; only match their users.
1248  bool Continue = false;
1249  if (isBaseInst(BaseInst)) {
1250  Visited.insert(BaseInst);
1251  BaseIt = nextInstr(0, Uses, Visited);
1252  Continue = true;
1253  }
1254  if (isRootInst(RootInst)) {
1255  LastRootIt = RootIt;
1256  Visited.insert(RootInst);
1257  RootIt = nextInstr(Iter, Uses, Visited);
1258  Continue = true;
1259  }
1260  if (Continue) continue;
1261 
1262  if (!BaseInst->isSameOperationAs(RootInst)) {
1263  // Last chance saloon. We don't try and solve the full isomorphism
1264  // problem, but try and at least catch the case where two instructions
1265  // *of different types* are round the wrong way. We won't be able to
1266  // efficiently tell, given two ADD instructions, which way around we
1267  // should match them, but given an ADD and a SUB, we can at least infer
1268  // which one is which.
1269  //
1270  // This should allow us to deal with a greater subset of the isomorphism
1271  // problem. It does however change a linear algorithm into a quadratic
1272  // one, so limit the number of probes we do.
1273  auto TryIt = RootIt;
1274  unsigned N = NumToleratedFailedMatches;
1275  while (TryIt != Uses.end() &&
1276  !BaseInst->isSameOperationAs(TryIt->first) &&
1277  N--) {
1278  ++TryIt;
1279  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1280  }
1281 
1282  if (TryIt == Uses.end() || TryIt == RootIt ||
1283  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1284  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1285  << *BaseInst << " vs. " << *RootInst << "\n");
1286  return false;
1287  }
1288 
1289  RootIt = TryIt;
1290  RootInst = TryIt->first;
1291  }
1292 
1293  // All instructions between the last root and this root
1294  // may belong to some other iteration. If they belong to a
1295  // future iteration, then they're dangerous to alias with.
1296  //
1297  // Note that because we allow a limited amount of flexibility in the order
1298  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1299  // case we've already checked this set of instructions so we shouldn't
1300  // do anything.
1301  for (; LastRootIt < RootIt; ++LastRootIt) {
1302  Instruction *I = LastRootIt->first;
1303  if (LastRootIt->second.find_first() < (int)Iter)
1304  continue;
1305  if (I->mayWriteToMemory())
1306  AST.add(I);
1307  // Note: This is specifically guarded by a check on isa<PHINode>,
1308  // which while a valid (somewhat arbitrary) micro-optimization, is
1309  // needed because otherwise isSafeToSpeculativelyExecute returns
1310  // false on PHI nodes.
1311  if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1313  // Intervening instructions cause side effects.
1314  FutureSideEffects = true;
1315  }
1316 
1317  // Make sure that this instruction, which is in the use set of this
1318  // root instruction, does not also belong to the base set or the set of
1319  // some other root instruction.
1320  if (RootIt->second.count() > 1) {
1321  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1322  << " vs. " << *RootInst << " (prev. case overlap)\n");
1323  return false;
1324  }
1325 
1326  // Make sure that we don't alias with any instruction in the alias set
1327  // tracker. If we do, then we depend on a future iteration, and we
1328  // can't reroll.
1329  if (RootInst->mayReadFromMemory()) {
1330  BatchAAResults BatchAA(*AA);
1331  for (auto &K : AST) {
1332  if (K.aliasesUnknownInst(RootInst, BatchAA)) {
1333  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1334  << *BaseInst << " vs. " << *RootInst
1335  << " (depends on future store)\n");
1336  return false;
1337  }
1338  }
1339  }
1340 
1341  // If we've past an instruction from a future iteration that may have
1342  // side effects, and this instruction might also, then we can't reorder
1343  // them, and this matching fails. As an exception, we allow the alias
1344  // set tracker to handle regular (unordered) load/store dependencies.
1345  if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1346  !isSafeToSpeculativelyExecute(BaseInst)) ||
1347  (!isUnorderedLoadStore(RootInst) &&
1348  !isSafeToSpeculativelyExecute(RootInst)))) {
1349  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1350  << " vs. " << *RootInst
1351  << " (side effects prevent reordering)\n");
1352  return false;
1353  }
1354 
1355  // For instructions that are part of a reduction, if the operation is
1356  // associative, then don't bother matching the operands (because we
1357  // already know that the instructions are isomorphic, and the order
1358  // within the iteration does not matter). For non-associative reductions,
1359  // we do need to match the operands, because we need to reject
1360  // out-of-order instructions within an iteration!
1361  // For example (assume floating-point addition), we need to reject this:
1362  // x += a[i]; x += b[i];
1363  // x += a[i+1]; x += b[i+1];
1364  // x += b[i+2]; x += a[i+2];
1365  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1366 
1367  if (!(InReduction && BaseInst->isAssociative())) {
1368  bool Swapped = false, SomeOpMatched = false;
1369  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1370  Value *Op2 = RootInst->getOperand(j);
1371 
1372  // If this is part of a reduction (and the operation is not
1373  // associatve), then we match all operands, but not those that are
1374  // part of the reduction.
1375  if (InReduction)
1376  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1377  if (Reductions.isPairInSame(RootInst, Op2I))
1378  continue;
1379 
1380  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1381  if (BMI != BaseMap.end()) {
1382  Op2 = BMI->second;
1383  } else {
1384  for (auto &DRS : RootSets) {
1385  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1386  Op2 = DRS.BaseInst;
1387  break;
1388  }
1389  }
1390  }
1391 
1392  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1393  // If we've not already decided to swap the matched operands, and
1394  // we've not already matched our first operand (note that we could
1395  // have skipped matching the first operand because it is part of a
1396  // reduction above), and the instruction is commutative, then try
1397  // the swapped match.
1398  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1399  BaseInst->getOperand(!j) == Op2) {
1400  Swapped = true;
1401  } else {
1402  LLVM_DEBUG(dbgs()
1403  << "LRR: iteration root match failed at " << *BaseInst
1404  << " vs. " << *RootInst << " (operand " << j << ")\n");
1405  return false;
1406  }
1407  }
1408 
1409  SomeOpMatched = true;
1410  }
1411  }
1412 
1413  if ((!PossibleRedLastSet.count(BaseInst) &&
1414  hasUsesOutsideLoop(BaseInst, L)) ||
1415  (!PossibleRedLastSet.count(RootInst) &&
1416  hasUsesOutsideLoop(RootInst, L))) {
1417  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1418  << " vs. " << *RootInst << " (uses outside loop)\n");
1419  return false;
1420  }
1421 
1422  Reductions.recordPair(BaseInst, RootInst, Iter);
1423  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1424 
1425  LastRootIt = RootIt;
1426  Visited.insert(BaseInst);
1427  Visited.insert(RootInst);
1428  BaseIt = nextInstr(0, Uses, Visited);
1429  RootIt = nextInstr(Iter, Uses, Visited);
1430  }
1431  assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1432  "Mismatched set sizes!");
1433  }
1434 
1435  LLVM_DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *IV
1436  << "\n");
1437 
1438  return true;
1439 }
1440 
1441 void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) {
1442  BasicBlock *Header = L->getHeader();
1443 
1444  // Compute the start and increment for each BaseInst before we start erasing
1445  // instructions.
1446  SmallVector<const SCEV *, 8> StartExprs;
1447  SmallVector<const SCEV *, 8> IncrExprs;
1448  for (auto &DRS : RootSets) {
1449  const SCEVAddRecExpr *IVSCEV =
1450  cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
1451  StartExprs.push_back(IVSCEV->getStart());
1452  IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV));
1453  }
1454 
1455  // Remove instructions associated with non-base iterations.
1456  for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*Header))) {
1457  unsigned I = Uses[&Inst].find_first();
1458  if (I > 0 && I < IL_All) {
1459  LLVM_DEBUG(dbgs() << "LRR: removing: " << Inst << "\n");
1460  Inst.eraseFromParent();
1461  }
1462  }
1463 
1464  // Rewrite each BaseInst using SCEV.
1465  for (size_t i = 0, e = RootSets.size(); i != e; ++i)
1466  // Insert the new induction variable.
1467  replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]);
1468 
1469  { // Limit the lifetime of SCEVExpander.
1470  BranchInst *BI = cast<BranchInst>(Header->getTerminator());
1471  const DataLayout &DL = Header->getModule()->getDataLayout();
1472  SCEVExpander Expander(*SE, DL, "reroll");
1473  auto Zero = SE->getZero(BackedgeTakenCount->getType());
1474  auto One = SE->getOne(BackedgeTakenCount->getType());
1475  auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap);
1476  Value *NewIV =
1477  Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(),
1478  Header->getFirstNonPHIOrDbg());
1479  // FIXME: This arithmetic can overflow.
1480  auto TripCount = SE->getAddExpr(BackedgeTakenCount, One);
1481  auto ScaledTripCount = SE->getMulExpr(
1482  TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale));
1483  auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One);
1484  Value *TakenCount =
1485  Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(),
1486  Header->getFirstNonPHIOrDbg());
1487  Value *Cond =
1488  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond");
1489  BI->setCondition(Cond);
1490 
1491  if (BI->getSuccessor(1) != Header)
1492  BI->swapSuccessors();
1493  }
1494 
1495  SimplifyInstructionsInBlock(Header, TLI);
1496  DeleteDeadPHIs(Header, TLI);
1497 }
1498 
1499 void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS,
1500  const SCEV *Start,
1501  const SCEV *IncrExpr) {
1502  BasicBlock *Header = L->getHeader();
1503  Instruction *Inst = DRS.BaseInst;
1504 
1505  const SCEV *NewIVSCEV =
1506  SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1507 
1508  { // Limit the lifetime of SCEVExpander.
1509  const DataLayout &DL = Header->getModule()->getDataLayout();
1510  SCEVExpander Expander(*SE, DL, "reroll");
1511  Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1512  Header->getFirstNonPHIOrDbg());
1513 
1514  for (auto &KV : Uses)
1515  if (KV.second.find_first() == 0)
1516  KV.first->replaceUsesOfWith(Inst, NewIV);
1517  }
1518 }
1519 
1520 // Validate the selected reductions. All iterations must have an isomorphic
1521 // part of the reduction chain and, for non-associative reductions, the chain
1522 // entries must appear in order.
1523 bool LoopReroll::ReductionTracker::validateSelected() {
1524  // For a non-associative reduction, the chain entries must appear in order.
1525  for (int i : Reds) {
1526  int PrevIter = 0, BaseCount = 0, Count = 0;
1527  for (Instruction *J : PossibleReds[i]) {
1528  // Note that all instructions in the chain must have been found because
1529  // all instructions in the function must have been assigned to some
1530  // iteration.
1531  int Iter = PossibleRedIter[J];
1532  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1533  !PossibleReds[i].getReducedValue()->isAssociative()) {
1534  LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: "
1535  << J << "\n");
1536  return false;
1537  }
1538 
1539  if (Iter != PrevIter) {
1540  if (Count != BaseCount) {
1541  LLVM_DEBUG(dbgs()
1542  << "LRR: Iteration " << PrevIter << " reduction use count "
1543  << Count << " is not equal to the base use count "
1544  << BaseCount << "\n");
1545  return false;
1546  }
1547 
1548  Count = 0;
1549  }
1550 
1551  ++Count;
1552  if (Iter == 0)
1553  ++BaseCount;
1554 
1555  PrevIter = Iter;
1556  }
1557  }
1558 
1559  return true;
1560 }
1561 
1562 // For all selected reductions, remove all parts except those in the first
1563 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1564 // of the first-iteration reduced value (in other words, reroll the selected
1565 // reductions).
1566 void LoopReroll::ReductionTracker::replaceSelected() {
1567  // Fixup reductions to refer to the last instruction associated with the
1568  // first iteration (not the last).
1569  for (int i : Reds) {
1570  int j = 0;
1571  for (int e = PossibleReds[i].size(); j != e; ++j)
1572  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1573  --j;
1574  break;
1575  }
1576 
1577  // Replace users with the new end-of-chain value.
1578  SmallInstructionVector Users;
1579  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1580  Users.push_back(cast<Instruction>(U));
1581  }
1582 
1583  for (Instruction *User : Users)
1584  User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1585  PossibleReds[i][j]);
1586  }
1587 }
1588 
1589 // Reroll the provided loop with respect to the provided induction variable.
1590 // Generally, we're looking for a loop like this:
1591 //
1592 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1593 // f(%iv)
1594 // %iv.1 = add %iv, 1 <-- a root increment
1595 // f(%iv.1)
1596 // %iv.2 = add %iv, 2 <-- a root increment
1597 // f(%iv.2)
1598 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1599 // f(%iv.scale_m_1)
1600 // ...
1601 // %iv.next = add %iv, scale
1602 // %cmp = icmp(%iv, ...)
1603 // br %cmp, header, exit
1604 //
1605 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1606 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1607 // be intermixed with eachother. The restriction imposed by this algorithm is
1608 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1609 // etc. be the same.
1610 //
1611 // First, we collect the use set of %iv, excluding the other increment roots.
1612 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1613 // times, having collected the use set of f(%iv.(i+1)), during which we:
1614 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1615 // the next unmatched instruction in f(%iv.(i+1)).
1616 // - Ensure that both matched instructions don't have any external users
1617 // (with the exception of last-in-chain reduction instructions).
1618 // - Track the (aliasing) write set, and other side effects, of all
1619 // instructions that belong to future iterations that come before the matched
1620 // instructions. If the matched instructions read from that write set, then
1621 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1622 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1623 // if any of these future instructions had side effects (could not be
1624 // speculatively executed), and so do the matched instructions, when we
1625 // cannot reorder those side-effect-producing instructions, and rerolling
1626 // fails.
1627 //
1628 // Finally, we make sure that all loop instructions are either loop increment
1629 // roots, belong to simple latch code, parts of validated reductions, part of
1630 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1631 // have been validated), then we reroll the loop.
1632 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1633  const SCEV *BackedgeTakenCount,
1634  ReductionTracker &Reductions) {
1635  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1636  IVToIncMap, LoopControlIV);
1637 
1638  if (!DAGRoots.findRoots())
1639  return false;
1640  LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV
1641  << "\n");
1642 
1643  if (!DAGRoots.validate(Reductions))
1644  return false;
1645  if (!Reductions.validateSelected())
1646  return false;
1647  // At this point, we've validated the rerolling, and we're committed to
1648  // making changes!
1649 
1650  Reductions.replaceSelected();
1651  DAGRoots.replace(BackedgeTakenCount);
1652 
1653  ++NumRerolledLoops;
1654  return true;
1655 }
1656 
1657 bool LoopReroll::runOnLoop(Loop *L) {
1658  BasicBlock *Header = L->getHeader();
1659  LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
1660  << Header->getName() << " (" << L->getNumBlocks()
1661  << " block(s))\n");
1662 
1663  // For now, we'll handle only single BB loops.
1664  if (L->getNumBlocks() > 1)
1665  return false;
1666 
1668  return false;
1669 
1670  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1671  LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1672  LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount
1673  << "\n");
1674 
1675  // First, we need to find the induction variable with respect to which we can
1676  // reroll (there may be several possible options).
1677  SmallInstructionVector PossibleIVs;
1678  IVToIncMap.clear();
1679  LoopControlIV = nullptr;
1680  collectPossibleIVs(L, PossibleIVs);
1681 
1682  if (PossibleIVs.empty()) {
1683  LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n");
1684  return false;
1685  }
1686 
1687  ReductionTracker Reductions;
1688  collectPossibleReductions(L, Reductions);
1689  bool Changed = false;
1690 
1691  // For each possible IV, collect the associated possible set of 'root' nodes
1692  // (i+1, i+2, etc.).
1693  for (Instruction *PossibleIV : PossibleIVs)
1694  if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) {
1695  Changed = true;
1696  break;
1697  }
1698  LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1699 
1700  // Trip count of L has changed so SE must be re-evaluated.
1701  if (Changed)
1702  SE->forgetLoop(L);
1703 
1704  return Changed;
1705 }
1706 
1707 bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
1708  if (skipLoop(L))
1709  return false;
1710 
1711  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1712  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1713  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1714  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
1715  *L->getHeader()->getParent());
1716  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1717  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1718 
1719  return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
1720 }
1721 
1724  LPMUpdater &U) {
1725  return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
1728 }
llvm::MapVector::iterator
typename VectorType::iterator iterator
Definition: MapVector.h:50
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:609
llvm::ScalarEvolution::hasLoopInvariantBackedgeTakenCount
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
Definition: ScalarEvolution.cpp:13344
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:792
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
ScalarEvolutionExpander.h
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:50
llvm::Instruction::isSameOperationAs
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
Definition: Instruction.cpp:571
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1498
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3611
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
llvm::AliasSetTracker
Definition: AliasSetTracker.h:305
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::LoopRerollPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopRerollPass.cpp:1722
APInt.h
ScalarEvolution.h
DenseMap.h
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
Module.h
isSimpleArithmeticOp
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
Definition: LoopRerollPass.cpp:747
reroll
loop reroll
Definition: LoopRerollPass.cpp:498
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:1041
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
STLExtras.h
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:97
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
replace
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Definition: ConstantMerge.cpp:116
Use.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:202
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1418
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:590
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
Definition: Instruction.cpp:613
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3090
Instruction.h
CommandLine.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false, false) INITIALIZE_PASS_END(LoopRerollLegacyPass
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:709
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:649
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:294
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::replace
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
Definition: STLExtras.h:2020
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:141
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:393
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::initializeLoopRerollLegacyPassPass
void initializeLoopRerollLegacyPassPass(PassRegistry &)
llvm::Instruction
Definition: Instruction.h:42
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3218
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
NumToleratedFailedMatches
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" " during fuzzy matching. (default: 400)"))
BitVector.h
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
isLoopIncrement
static bool isLoopIncrement(User *U, Instruction *IV)
Definition: LoopRerollPass.cpp:767
Utils.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:807
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4442
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::createLoopRerollPass
Pass * createLoopRerollPass()
Definition: LoopRerollPass.cpp:501
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:110
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:475
uint64_t
llvm::LoopPass
Definition: LoopPass.h:28
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition: ScalarEvolution.h:131
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1715
isAssociative
static bool isAssociative(const COFFSection &Section)
Definition: WinCOFFObjectWriter.cpp:926
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::BinaryOperator
Definition: InstrTypes.h:189
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:472
llvm::logicalview::LVAttributeKind::Zero
@ Zero
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:254
llvm::AArch64ISD::ADR
@ ADR
Definition: AArch64ISelLowering.h:76
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::User::replaceUsesOfWith
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
users
iv users
Definition: IVUsers.cpp:48
isUnorderedLoadStore
static bool isUnorderedLoadStore(Instruction *I)
Definition: LoopRerollPass.cpp:734
loops
loop Reroll loops
Definition: LoopRerollPass.cpp:498
j
return j(j<< 16)
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:492
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8372
isIgnorableInst
static bool isIgnorableInst(const Instruction *I)
Definition: LoopRerollPass.cpp:1135
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4550
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:163
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
ScalarEvolutionExpressions.h
LoopReroll.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
Dominators.h
N
#define N
llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition: ScalarEvolution.h:646
Users
iv Induction Variable Users
Definition: IVUsers.cpp:48
llvm::PHINode
Definition: Instructions.h:2697
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:403
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2493
AliasSetTracker.h
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:8242
BasicBlockUtils.h
hasUsesOutsideLoop
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
Definition: LoopRerollPass.cpp:506
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1297
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:57
InitializePasses.h
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4731
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3225
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43