LLVM  15.0.0git
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop reroller.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/LoopPass.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Use.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
48 #include "llvm/Transforms/Scalar.h"
50 #include "llvm/Transforms/Utils.h"
55 #include <cassert>
56 #include <cstddef>
57 #include <cstdint>
58 #include <iterator>
59 #include <map>
60 #include <utility>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "loop-reroll"
65 
66 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
67 
68 static cl::opt<unsigned>
69 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
70  cl::Hidden,
71  cl::desc("The maximum number of failures to tolerate"
72  " during fuzzy matching. (default: 400)"));
73 
74 // This loop re-rolling transformation aims to transform loops like this:
75 //
76 // int foo(int a);
77 // void bar(int *x) {
78 // for (int i = 0; i < 500; i += 3) {
79 // foo(i);
80 // foo(i+1);
81 // foo(i+2);
82 // }
83 // }
84 //
85 // into a loop like this:
86 //
87 // void bar(int *x) {
88 // for (int i = 0; i < 500; ++i)
89 // foo(i);
90 // }
91 //
92 // It does this by looking for loops that, besides the latch code, are composed
93 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
94 // to the induction variable, and where each DAG is isomorphic to the DAG
95 // rooted at the induction variable (excepting the sub-DAGs which root the
96 // other induction-variable increments). In other words, we're looking for loop
97 // bodies of the form:
98 //
99 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
100 // f(%iv)
101 // %iv.1 = add %iv, 1 <-- a root increment
102 // f(%iv.1)
103 // %iv.2 = add %iv, 2 <-- a root increment
104 // f(%iv.2)
105 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
106 // f(%iv.scale_m_1)
107 // ...
108 // %iv.next = add %iv, scale
109 // %cmp = icmp(%iv, ...)
110 // br %cmp, header, exit
111 //
112 // where each f(i) is a set of instructions that, collectively, are a function
113 // only of i (and other loop-invariant values).
114 //
115 // As a special case, we can also reroll loops like this:
116 //
117 // int foo(int);
118 // void bar(int *x) {
119 // for (int i = 0; i < 500; ++i) {
120 // x[3*i] = foo(0);
121 // x[3*i+1] = foo(0);
122 // x[3*i+2] = foo(0);
123 // }
124 // }
125 //
126 // into this:
127 //
128 // void bar(int *x) {
129 // for (int i = 0; i < 1500; ++i)
130 // x[i] = foo(0);
131 // }
132 //
133 // in which case, we're looking for inputs like this:
134 //
135 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
136 // %scaled.iv = mul %iv, scale
137 // f(%scaled.iv)
138 // %scaled.iv.1 = add %scaled.iv, 1
139 // f(%scaled.iv.1)
140 // %scaled.iv.2 = add %scaled.iv, 2
141 // f(%scaled.iv.2)
142 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
143 // f(%scaled.iv.scale_m_1)
144 // ...
145 // %iv.next = add %iv, 1
146 // %cmp = icmp(%iv, ...)
147 // br %cmp, header, exit
148 
149 namespace {
150 
151  enum IterationLimits {
152  /// The maximum number of iterations that we'll try and reroll.
153  IL_MaxRerollIterations = 32,
154  /// The bitvector index used by loop induction variables and other
155  /// instructions that belong to all iterations.
156  IL_All,
157  IL_End
158  };
159 
160  class LoopRerollLegacyPass : public LoopPass {
161  public:
162  static char ID; // Pass ID, replacement for typeid
163 
164  LoopRerollLegacyPass() : LoopPass(ID) {
166  }
167 
168  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
169 
170  void getAnalysisUsage(AnalysisUsage &AU) const override {
173  }
174  };
175 
176  class LoopReroll {
177  public:
178  LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
179  TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
180  : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
181  PreserveLCSSA(PreserveLCSSA) {}
182  bool runOnLoop(Loop *L);
183 
184  protected:
185  AliasAnalysis *AA;
186  LoopInfo *LI;
187  ScalarEvolution *SE;
188  TargetLibraryInfo *TLI;
189  DominatorTree *DT;
190  bool PreserveLCSSA;
191 
192  using SmallInstructionVector = SmallVector<Instruction *, 16>;
193  using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
194 
195  // Map between induction variable and its increment
197 
198  // For loop with multiple induction variable, remember the one used only to
199  // control the loop.
200  Instruction *LoopControlIV;
201 
202  // A chain of isomorphic instructions, identified by a single-use PHI
203  // representing a reduction. Only the last value may be used outside the
204  // loop.
205  struct SimpleLoopReduction {
206  SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
207  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
208  add(L);
209  }
210 
211  bool valid() const {
212  return Valid;
213  }
214 
215  Instruction *getPHI() const {
216  assert(Valid && "Using invalid reduction");
217  return Instructions.front();
218  }
219 
220  Instruction *getReducedValue() const {
221  assert(Valid && "Using invalid reduction");
222  return Instructions.back();
223  }
224 
225  Instruction *get(size_t i) const {
226  assert(Valid && "Using invalid reduction");
227  return Instructions[i+1];
228  }
229 
230  Instruction *operator [] (size_t i) const { return get(i); }
231 
232  // The size, ignoring the initial PHI.
233  size_t size() const {
234  assert(Valid && "Using invalid reduction");
235  return Instructions.size()-1;
236  }
237 
238  using iterator = SmallInstructionVector::iterator;
239  using const_iterator = SmallInstructionVector::const_iterator;
240 
241  iterator begin() {
242  assert(Valid && "Using invalid reduction");
243  return std::next(Instructions.begin());
244  }
245 
246  const_iterator begin() const {
247  assert(Valid && "Using invalid reduction");
248  return std::next(Instructions.begin());
249  }
250 
251  iterator end() { return Instructions.end(); }
252  const_iterator end() const { return Instructions.end(); }
253 
254  protected:
255  bool Valid = false;
256  SmallInstructionVector Instructions;
257 
258  void add(Loop *L);
259  };
260 
261  // The set of all reductions, and state tracking of possible reductions
262  // during loop instruction processing.
263  struct ReductionTracker {
264  using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
265 
266  // Add a new possible reduction.
267  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
268 
269  // Setup to track possible reductions corresponding to the provided
270  // rerolling scale. Only reductions with a number of non-PHI instructions
271  // that is divisible by the scale are considered. Three instructions sets
272  // are filled in:
273  // - A set of all possible instructions in eligible reductions.
274  // - A set of all PHIs in eligible reductions
275  // - A set of all reduced values (last instructions) in eligible
276  // reductions.
277  void restrictToScale(uint64_t Scale,
278  SmallInstructionSet &PossibleRedSet,
279  SmallInstructionSet &PossibleRedPHISet,
280  SmallInstructionSet &PossibleRedLastSet) {
281  PossibleRedIdx.clear();
282  PossibleRedIter.clear();
283  Reds.clear();
284 
285  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
286  if (PossibleReds[i].size() % Scale == 0) {
287  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
288  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
289 
290  PossibleRedSet.insert(PossibleReds[i].getPHI());
291  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
292  for (Instruction *J : PossibleReds[i]) {
293  PossibleRedSet.insert(J);
294  PossibleRedIdx[J] = i;
295  }
296  }
297  }
298 
299  // The functions below are used while processing the loop instructions.
300 
301  // Are the two instructions both from reductions, and furthermore, from
302  // the same reduction?
303  bool isPairInSame(Instruction *J1, Instruction *J2) {
304  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
305  if (J1I != PossibleRedIdx.end()) {
306  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
307  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
308  return true;
309  }
310 
311  return false;
312  }
313 
314  // The two provided instructions, the first from the base iteration, and
315  // the second from iteration i, form a matched pair. If these are part of
316  // a reduction, record that fact.
317  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
318  if (PossibleRedIdx.count(J1)) {
319  assert(PossibleRedIdx.count(J2) &&
320  "Recording reduction vs. non-reduction instruction?");
321 
322  PossibleRedIter[J1] = 0;
323  PossibleRedIter[J2] = i;
324 
325  int Idx = PossibleRedIdx[J1];
326  assert(Idx == PossibleRedIdx[J2] &&
327  "Recording pair from different reductions?");
328  Reds.insert(Idx);
329  }
330  }
331 
332  // The functions below can be called after we've finished processing all
333  // instructions in the loop, and we know which reductions were selected.
334 
335  bool validateSelected();
336  void replaceSelected();
337 
338  protected:
339  // The vector of all possible reductions (for any scale).
340  SmallReductionVector PossibleReds;
341 
342  DenseMap<Instruction *, int> PossibleRedIdx;
343  DenseMap<Instruction *, int> PossibleRedIter;
344  DenseSet<int> Reds;
345  };
346 
347  // A DAGRootSet models an induction variable being used in a rerollable
348  // loop. For example,
349  //
350  // x[i*3+0] = y1
351  // x[i*3+1] = y2
352  // x[i*3+2] = y3
353  //
354  // Base instruction -> i*3
355  // +---+----+
356  // / | \
357  // ST[y1] +1 +2 <-- Roots
358  // | |
359  // ST[y2] ST[y3]
360  //
361  // There may be multiple DAGRoots, for example:
362  //
363  // x[i*2+0] = ... (1)
364  // x[i*2+1] = ... (1)
365  // x[i*2+4] = ... (2)
366  // x[i*2+5] = ... (2)
367  // x[(i+1234)*2+5678] = ... (3)
368  // x[(i+1234)*2+5679] = ... (3)
369  //
370  // The loop will be rerolled by adding a new loop induction variable,
371  // one for the Base instruction in each DAGRootSet.
372  //
373  struct DAGRootSet {
374  Instruction *BaseInst;
375  SmallInstructionVector Roots;
376 
377  // The instructions between IV and BaseInst (but not including BaseInst).
378  SmallInstructionSet SubsumedInsts;
379  };
380 
381  // The set of all DAG roots, and state tracking of all roots
382  // for a particular induction variable.
383  struct DAGRootTracker {
384  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
387  bool PreserveLCSSA,
389  Instruction *LoopCtrlIV)
390  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
391  PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
392  LoopControlIV(LoopCtrlIV) {}
393 
394  /// Stage 1: Find all the DAG roots for the induction variable.
395  bool findRoots();
396 
397  /// Stage 2: Validate if the found roots are valid.
398  bool validate(ReductionTracker &Reductions);
399 
400  /// Stage 3: Assuming validate() returned true, perform the
401  /// replacement.
402  /// @param BackedgeTakenCount The backedge-taken count of L.
403  void replace(const SCEV *BackedgeTakenCount);
404 
405  protected:
407 
408  void findRootsRecursive(Instruction *IVU,
409  SmallInstructionSet SubsumedInsts);
410  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
411  bool collectPossibleRoots(Instruction *Base,
412  std::map<int64_t,Instruction*> &Roots);
413  bool validateRootSet(DAGRootSet &DRS);
414 
415  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
416  void collectInLoopUserSet(const SmallInstructionVector &Roots,
417  const SmallInstructionSet &Exclude,
418  const SmallInstructionSet &Final,
420  void collectInLoopUserSet(Instruction *Root,
421  const SmallInstructionSet &Exclude,
422  const SmallInstructionSet &Final,
424 
425  UsesTy::iterator nextInstr(int Val, UsesTy &In,
426  const SmallInstructionSet &Exclude,
427  UsesTy::iterator *StartI=nullptr);
428  bool isBaseInst(Instruction *I);
429  bool isRootInst(Instruction *I);
430  bool instrDependsOn(Instruction *I,
431  UsesTy::iterator Start,
432  UsesTy::iterator End);
433  void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr);
434 
435  LoopReroll *Parent;
436 
437  // Members of Parent, replicated here for brevity.
438  Loop *L;
439  ScalarEvolution *SE;
440  AliasAnalysis *AA;
441  TargetLibraryInfo *TLI;
442  DominatorTree *DT;
443  LoopInfo *LI;
444  bool PreserveLCSSA;
445 
446  // The loop induction variable.
447  Instruction *IV;
448 
449  // Loop step amount.
450  int64_t Inc;
451 
452  // Loop reroll count; if Inc == 1, this records the scaling applied
453  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
454  // If Inc is not 1, Scale = Inc.
455  uint64_t Scale;
456 
457  // The roots themselves.
459 
460  // All increment instructions for IV.
461  SmallInstructionVector LoopIncs;
462 
463  // Map of all instructions in the loop (in order) to the iterations
464  // they are used in (or specially, IL_All for instructions
465  // used in the loop increment mechanism).
466  UsesTy Uses;
467 
468  // Map between induction variable and its increment
470 
471  Instruction *LoopControlIV;
472  };
473 
474  // Check if it is a compare-like instruction whose user is a branch
475  bool isCompareUsedByBranch(Instruction *I) {
476  auto *TI = I->getParent()->getTerminator();
477  if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
478  return false;
479  return I->hasOneUse() && TI->getOperand(0) == I;
480  };
481 
482  bool isLoopControlIV(Loop *L, Instruction *IV);
483  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
484  void collectPossibleReductions(Loop *L,
485  ReductionTracker &Reductions);
486  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header,
487  const SCEV *BackedgeTakenCount, ReductionTracker &Reductions);
488  };
489 
490 } // end anonymous namespace
491 
492 char LoopRerollLegacyPass::ID = 0;
493 
494 INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
495  false, false)
498 INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
499  false)
500 
501 Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
502 
503 // Returns true if the provided instruction is used outside the given loop.
504 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
505 // non-loop blocks to be outside the loop.
507  for (User *U : I->users()) {
508  if (!L->contains(cast<Instruction>(U)))
509  return true;
510  }
511  return false;
512 }
513 
514 // Check if an IV is only used to control the loop. There are two cases:
515 // 1. It only has one use which is loop increment, and the increment is only
516 // used by comparison and the PHI (could has sext with nsw in between), and the
517 // comparison is only used by branch.
518 // 2. It is used by loop increment and the comparison, the loop increment is
519 // only used by the PHI, and the comparison is used only by the branch.
520 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
521  unsigned IVUses = IV->getNumUses();
522  if (IVUses != 2 && IVUses != 1)
523  return false;
524 
525  for (auto *User : IV->users()) {
526  int32_t IncOrCmpUses = User->getNumUses();
527  bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
528 
529  // User can only have one or two uses.
530  if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
531  return false;
532 
533  // Case 1
534  if (IVUses == 1) {
535  // The only user must be the loop increment.
536  // The loop increment must have two uses.
537  if (IsCompInst || IncOrCmpUses != 2)
538  return false;
539  }
540 
541  // Case 2
542  if (IVUses == 2 && IncOrCmpUses != 1)
543  return false;
544 
545  // The users of the IV must be a binary operation or a comparison
546  if (auto *BO = dyn_cast<BinaryOperator>(User)) {
547  if (BO->getOpcode() == Instruction::Add) {
548  // Loop Increment
549  // User of Loop Increment should be either PHI or CMP
550  for (auto *UU : User->users()) {
551  if (PHINode *PN = dyn_cast<PHINode>(UU)) {
552  if (PN != IV)
553  return false;
554  }
555  // Must be a CMP or an ext (of a value with nsw) then CMP
556  else {
557  auto *UUser = cast<Instruction>(UU);
558  // Skip SExt if we are extending an nsw value
559  // TODO: Allow ZExt too
560  if (BO->hasNoSignedWrap() && UUser->hasOneUse() &&
561  isa<SExtInst>(UUser))
562  UUser = cast<Instruction>(*(UUser->user_begin()));
563  if (!isCompareUsedByBranch(UUser))
564  return false;
565  }
566  }
567  } else
568  return false;
569  // Compare : can only have one use, and must be branch
570  } else if (!IsCompInst)
571  return false;
572  }
573  return true;
574 }
575 
576 // Collect the list of loop induction variables with respect to which it might
577 // be possible to reroll the loop.
578 void LoopReroll::collectPossibleIVs(Loop *L,
579  SmallInstructionVector &PossibleIVs) {
580  BasicBlock *Header = L->getHeader();
581  for (BasicBlock::iterator I = Header->begin(),
582  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
583  if (!isa<PHINode>(I))
584  continue;
585  if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
586  continue;
587 
588  if (const SCEVAddRecExpr *PHISCEV =
589  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
590  if (PHISCEV->getLoop() != L)
591  continue;
592  if (!PHISCEV->isAffine())
593  continue;
594  auto IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
595  if (IncSCEV) {
596  IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
597  LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
598  << "\n");
599 
600  if (isLoopControlIV(L, &*I)) {
601  assert(!LoopControlIV && "Found two loop control only IV");
602  LoopControlIV = &(*I);
603  LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I
604  << " = " << *PHISCEV << "\n");
605  } else
606  PossibleIVs.push_back(&*I);
607  }
608  }
609  }
610 }
611 
612 // Add the remainder of the reduction-variable chain to the instruction vector
613 // (the initial PHINode has already been added). If successful, the object is
614 // marked as valid.
616  assert(!Valid && "Cannot add to an already-valid chain");
617 
618  // The reduction variable must be a chain of single-use instructions
619  // (including the PHI), except for the last value (which is used by the PHI
620  // and also outside the loop).
621  Instruction *C = Instructions.front();
622  if (C->user_empty())
623  return;
624 
625  do {
626  C = cast<Instruction>(*C->user_begin());
627  if (C->hasOneUse()) {
628  if (!C->isBinaryOp())
629  return;
630 
631  if (!(isa<PHINode>(Instructions.back()) ||
632  C->isSameOperationAs(Instructions.back())))
633  return;
634 
635  Instructions.push_back(C);
636  }
637  } while (C->hasOneUse());
638 
639  if (Instructions.size() < 2 ||
640  !C->isSameOperationAs(Instructions.back()) ||
641  C->use_empty())
642  return;
643 
644  // C is now the (potential) last instruction in the reduction chain.
645  for (User *U : C->users()) {
646  // The only in-loop user can be the initial PHI.
647  if (L->contains(cast<Instruction>(U)))
648  if (cast<Instruction>(U) != Instructions.front())
649  return;
650  }
651 
652  Instructions.push_back(C);
653  Valid = true;
654 }
655 
656 // Collect the vector of possible reduction variables.
657 void LoopReroll::collectPossibleReductions(Loop *L,
658  ReductionTracker &Reductions) {
659  BasicBlock *Header = L->getHeader();
660  for (BasicBlock::iterator I = Header->begin(),
661  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
662  if (!isa<PHINode>(I))
663  continue;
664  if (!I->getType()->isSingleValueType())
665  continue;
666 
667  SimpleLoopReduction SLR(&*I, L);
668  if (!SLR.valid())
669  continue;
670 
671  LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with "
672  << SLR.size() << " chained instructions)\n");
673  Reductions.addSLR(SLR);
674  }
675 }
676 
677 // Collect the set of all users of the provided root instruction. This set of
678 // users contains not only the direct users of the root instruction, but also
679 // all users of those users, and so on. There are two exceptions:
680 //
681 // 1. Instructions in the set of excluded instructions are never added to the
682 // use set (even if they are users). This is used, for example, to exclude
683 // including root increments in the use set of the primary IV.
684 //
685 // 2. Instructions in the set of final instructions are added to the use set
686 // if they are users, but their users are not added. This is used, for
687 // example, to prevent a reduction update from forcing all later reduction
688 // updates into the use set.
689 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
690  Instruction *Root, const SmallInstructionSet &Exclude,
691  const SmallInstructionSet &Final,
693  SmallInstructionVector Queue(1, Root);
694  while (!Queue.empty()) {
695  Instruction *I = Queue.pop_back_val();
696  if (!Users.insert(I).second)
697  continue;
698 
699  if (!Final.count(I))
700  for (Use &U : I->uses()) {
701  Instruction *User = cast<Instruction>(U.getUser());
702  if (PHINode *PN = dyn_cast<PHINode>(User)) {
703  // Ignore "wrap-around" uses to PHIs of this loop's header.
704  if (PN->getIncomingBlock(U) == L->getHeader())
705  continue;
706  }
707 
708  if (L->contains(User) && !Exclude.count(User)) {
709  Queue.push_back(User);
710  }
711  }
712 
713  // We also want to collect single-user "feeder" values.
714  for (Use &U : I->operands()) {
715  if (Instruction *Op = dyn_cast<Instruction>(U))
716  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
717  !Final.count(Op))
718  Queue.push_back(Op);
719  }
720  }
721 }
722 
723 // Collect all of the users of all of the provided root instructions (combined
724 // into a single set).
725 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
726  const SmallInstructionVector &Roots,
727  const SmallInstructionSet &Exclude,
728  const SmallInstructionSet &Final,
730  for (Instruction *Root : Roots)
731  collectInLoopUserSet(Root, Exclude, Final, Users);
732 }
733 
735  if (LoadInst *LI = dyn_cast<LoadInst>(I))
736  return LI->isUnordered();
737  if (StoreInst *SI = dyn_cast<StoreInst>(I))
738  return SI->isUnordered();
739  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
740  return !MI->isVolatile();
741  return false;
742 }
743 
744 /// Return true if IVU is a "simple" arithmetic operation.
745 /// This is used for narrowing the search space for DAGRoots; only arithmetic
746 /// and GEPs can be part of a DAGRoot.
747 static bool isSimpleArithmeticOp(User *IVU) {
748  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
749  switch (I->getOpcode()) {
750  default: return false;
751  case Instruction::Add:
752  case Instruction::Sub:
753  case Instruction::Mul:
754  case Instruction::Shl:
755  case Instruction::AShr:
756  case Instruction::LShr:
757  case Instruction::GetElementPtr:
758  case Instruction::Trunc:
759  case Instruction::ZExt:
760  case Instruction::SExt:
761  return true;
762  }
763  }
764  return false;
765 }
766 
767 static bool isLoopIncrement(User *U, Instruction *IV) {
768  BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
769 
770  if ((BO && BO->getOpcode() != Instruction::Add) ||
771  (!BO && !isa<GetElementPtrInst>(U)))
772  return false;
773 
774  for (auto *UU : U->users()) {
775  PHINode *PN = dyn_cast<PHINode>(UU);
776  if (PN && PN == IV)
777  return true;
778  }
779  return false;
780 }
781 
782 bool LoopReroll::DAGRootTracker::
783 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
784  SmallInstructionVector BaseUsers;
785 
786  for (auto *I : Base->users()) {
787  ConstantInt *CI = nullptr;
788 
789  if (isLoopIncrement(I, IV)) {
790  LoopIncs.push_back(cast<Instruction>(I));
791  continue;
792  }
793 
794  // The root nodes must be either GEPs, ORs or ADDs.
795  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
796  if (BO->getOpcode() == Instruction::Add ||
797  BO->getOpcode() == Instruction::Or)
798  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
799  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
800  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
801  CI = dyn_cast<ConstantInt>(LastOperand);
802  }
803 
804  if (!CI) {
805  if (Instruction *II = dyn_cast<Instruction>(I)) {
806  BaseUsers.push_back(II);
807  continue;
808  } else {
809  LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I
810  << "\n");
811  return false;
812  }
813  }
814 
815  int64_t V = std::abs(CI->getValue().getSExtValue());
816  if (Roots.find(V) != Roots.end())
817  // No duplicates, please.
818  return false;
819 
820  Roots[V] = cast<Instruction>(I);
821  }
822 
823  // Make sure we have at least two roots.
824  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
825  return false;
826 
827  // If we found non-loop-inc, non-root users of Base, assume they are
828  // for the zeroth root index. This is because "add %a, 0" gets optimized
829  // away.
830  if (BaseUsers.size()) {
831  if (Roots.find(0) != Roots.end()) {
832  LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
833  return false;
834  }
835  Roots[0] = Base;
836  }
837 
838  // Calculate the number of users of the base, or lowest indexed, iteration.
839  unsigned NumBaseUses = BaseUsers.size();
840  if (NumBaseUses == 0)
841  NumBaseUses = Roots.begin()->second->getNumUses();
842 
843  // Check that every node has the same number of users.
844  for (auto &KV : Roots) {
845  if (KV.first == 0)
846  continue;
847  if (!KV.second->hasNUses(NumBaseUses)) {
848  LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
849  << "#Base=" << NumBaseUses
850  << ", #Root=" << KV.second->getNumUses() << "\n");
851  return false;
852  }
853  }
854 
855  return true;
856 }
857 
858 void LoopReroll::DAGRootTracker::
859 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
860  // Does the user look like it could be part of a root set?
861  // All its users must be simple arithmetic ops.
862  if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
863  return;
864 
865  if (I != IV && findRootsBase(I, SubsumedInsts))
866  return;
867 
868  SubsumedInsts.insert(I);
869 
870  for (User *V : I->users()) {
871  Instruction *I = cast<Instruction>(V);
872  if (is_contained(LoopIncs, I))
873  continue;
874 
875  if (!isSimpleArithmeticOp(I))
876  continue;
877 
878  // The recursive call makes a copy of SubsumedInsts.
879  findRootsRecursive(I, SubsumedInsts);
880  }
881 }
882 
883 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
884  if (DRS.Roots.empty())
885  return false;
886 
887  // If the value of the base instruction is used outside the loop, we cannot
888  // reroll the loop. Check for other root instructions is unnecessary because
889  // they don't match any base instructions if their values are used outside.
890  if (hasUsesOutsideLoop(DRS.BaseInst, L))
891  return false;
892 
893  // Consider a DAGRootSet with N-1 roots (so N different values including
894  // BaseInst).
895  // Define d = Roots[0] - BaseInst, which should be the same as
896  // Roots[I] - Roots[I-1] for all I in [1..N).
897  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
898  // loop iteration J.
899  //
900  // Now, For the loop iterations to be consecutive:
901  // D = d * N
902  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
903  if (!ADR)
904  return false;
905 
906  // Check that the first root is evenly spaced.
907  unsigned N = DRS.Roots.size() + 1;
908  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
909  if (isa<SCEVCouldNotCompute>(StepSCEV) || StepSCEV->getType()->isPointerTy())
910  return false;
911  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
912  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
913  return false;
914 
915  // Check that the remainling roots are evenly spaced.
916  for (unsigned i = 1; i < N - 1; ++i) {
917  const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
918  SE->getSCEV(DRS.Roots[i-1]));
919  if (NewStepSCEV != StepSCEV)
920  return false;
921  }
922 
923  return true;
924 }
925 
926 bool LoopReroll::DAGRootTracker::
927 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
928  // The base of a RootSet must be an AddRec, so it can be erased.
929  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
930  if (!IVU_ADR || IVU_ADR->getLoop() != L)
931  return false;
932 
933  std::map<int64_t, Instruction*> V;
934  if (!collectPossibleRoots(IVU, V))
935  return false;
936 
937  // If we didn't get a root for index zero, then IVU must be
938  // subsumed.
939  if (V.find(0) == V.end())
940  SubsumedInsts.insert(IVU);
941 
942  // Partition the vector into monotonically increasing indexes.
943  DAGRootSet DRS;
944  DRS.BaseInst = nullptr;
945 
946  SmallVector<DAGRootSet, 16> PotentialRootSets;
947 
948  for (auto &KV : V) {
949  if (!DRS.BaseInst) {
950  DRS.BaseInst = KV.second;
951  DRS.SubsumedInsts = SubsumedInsts;
952  } else if (DRS.Roots.empty()) {
953  DRS.Roots.push_back(KV.second);
954  } else if (V.find(KV.first - 1) != V.end()) {
955  DRS.Roots.push_back(KV.second);
956  } else {
957  // Linear sequence terminated.
958  if (!validateRootSet(DRS))
959  return false;
960 
961  // Construct a new DAGRootSet with the next sequence.
962  PotentialRootSets.push_back(DRS);
963  DRS.BaseInst = KV.second;
964  DRS.Roots.clear();
965  }
966  }
967 
968  if (!validateRootSet(DRS))
969  return false;
970 
971  PotentialRootSets.push_back(DRS);
972 
973  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
974 
975  return true;
976 }
977 
978 bool LoopReroll::DAGRootTracker::findRoots() {
979  Inc = IVToIncMap[IV];
980 
981  assert(RootSets.empty() && "Unclean state!");
982  if (std::abs(Inc) == 1) {
983  for (auto *IVU : IV->users()) {
984  if (isLoopIncrement(IVU, IV))
985  LoopIncs.push_back(cast<Instruction>(IVU));
986  }
987  findRootsRecursive(IV, SmallInstructionSet());
988  LoopIncs.push_back(IV);
989  } else {
990  if (!findRootsBase(IV, SmallInstructionSet()))
991  return false;
992  }
993 
994  // Ensure all sets have the same size.
995  if (RootSets.empty()) {
996  LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
997  return false;
998  }
999  for (auto &V : RootSets) {
1000  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
1001  LLVM_DEBUG(
1002  dbgs()
1003  << "LRR: Aborting because not all root sets have the same size\n");
1004  return false;
1005  }
1006  }
1007 
1008  Scale = RootSets[0].Roots.size() + 1;
1009 
1010  if (Scale > IL_MaxRerollIterations) {
1011  LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1012  << "#Found=" << Scale
1013  << ", #Max=" << IL_MaxRerollIterations << "\n");
1014  return false;
1015  }
1016 
1017  LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale
1018  << "\n");
1019 
1020  return true;
1021 }
1022 
1023 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1024  // Populate the MapVector with all instructions in the block, in order first,
1025  // so we can iterate over the contents later in perfect order.
1026  for (auto &I : *L->getHeader()) {
1027  Uses[&I].resize(IL_End);
1028  }
1029 
1030  SmallInstructionSet Exclude;
1031  for (auto &DRS : RootSets) {
1032  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1033  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1034  Exclude.insert(DRS.BaseInst);
1035  }
1036  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1037 
1038  for (auto &DRS : RootSets) {
1039  DenseSet<Instruction*> VBase;
1040  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1041  for (auto *I : VBase) {
1042  Uses[I].set(0);
1043  }
1044 
1045  unsigned Idx = 1;
1046  for (auto *Root : DRS.Roots) {
1048  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1049 
1050  // While we're here, check the use sets are the same size.
1051  if (V.size() != VBase.size()) {
1052  LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1053  return false;
1054  }
1055 
1056  for (auto *I : V) {
1057  Uses[I].set(Idx);
1058  }
1059  ++Idx;
1060  }
1061 
1062  // Make sure our subsumed instructions are remembered too.
1063  for (auto *I : DRS.SubsumedInsts) {
1064  Uses[I].set(IL_All);
1065  }
1066  }
1067 
1068  // Make sure the loop increments are also accounted for.
1069 
1070  Exclude.clear();
1071  for (auto &DRS : RootSets) {
1072  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1073  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1074  Exclude.insert(DRS.BaseInst);
1075  }
1076 
1078  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1079  for (auto *I : V) {
1080  if (I->mayHaveSideEffects()) {
1081  LLVM_DEBUG(dbgs() << "LRR: Aborting - "
1082  << "An instruction which does not belong to any root "
1083  << "sets must not have side effects: " << *I);
1084  return false;
1085  }
1086  Uses[I].set(IL_All);
1087  }
1088 
1089  return true;
1090 }
1091 
1092 /// Get the next instruction in "In" that is a member of set Val.
1093 /// Start searching from StartI, and do not return anything in Exclude.
1094 /// If StartI is not given, start from In.begin().
1096 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1097  const SmallInstructionSet &Exclude,
1098  UsesTy::iterator *StartI) {
1099  UsesTy::iterator I = StartI ? *StartI : In.begin();
1100  while (I != In.end() && (I->second.test(Val) == 0 ||
1101  Exclude.contains(I->first)))
1102  ++I;
1103  return I;
1104 }
1105 
1106 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1107  for (auto &DRS : RootSets) {
1108  if (DRS.BaseInst == I)
1109  return true;
1110  }
1111  return false;
1112 }
1113 
1114 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1115  for (auto &DRS : RootSets) {
1116  if (is_contained(DRS.Roots, I))
1117  return true;
1118  }
1119  return false;
1120 }
1121 
1122 /// Return true if instruction I depends on any instruction between
1123 /// Start and End.
1124 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1125  UsesTy::iterator Start,
1126  UsesTy::iterator End) {
1127  for (auto *U : I->users()) {
1128  for (auto It = Start; It != End; ++It)
1129  if (U == It->first)
1130  return true;
1131  }
1132  return false;
1133 }
1134 
1135 static bool isIgnorableInst(const Instruction *I) {
1136  if (isa<DbgInfoIntrinsic>(I))
1137  return true;
1138  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1139  if (!II)
1140  return false;
1141  switch (II->getIntrinsicID()) {
1142  default:
1143  return false;
1144  case Intrinsic::annotation:
1145  case Intrinsic::ptr_annotation:
1146  case Intrinsic::var_annotation:
1147  // TODO: the following intrinsics may also be allowed:
1148  // lifetime_start, lifetime_end, invariant_start, invariant_end
1149  return true;
1150  }
1151  return false;
1152 }
1153 
1154 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1155  // We now need to check for equivalence of the use graph of each root with
1156  // that of the primary induction variable (excluding the roots). Our goal
1157  // here is not to solve the full graph isomorphism problem, but rather to
1158  // catch common cases without a lot of work. As a result, we will assume
1159  // that the relative order of the instructions in each unrolled iteration
1160  // is the same (although we will not make an assumption about how the
1161  // different iterations are intermixed). Note that while the order must be
1162  // the same, the instructions may not be in the same basic block.
1163 
1164  // An array of just the possible reductions for this scale factor. When we
1165  // collect the set of all users of some root instructions, these reduction
1166  // instructions are treated as 'final' (their uses are not considered).
1167  // This is important because we don't want the root use set to search down
1168  // the reduction chain.
1169  SmallInstructionSet PossibleRedSet;
1170  SmallInstructionSet PossibleRedLastSet;
1171  SmallInstructionSet PossibleRedPHISet;
1172  Reductions.restrictToScale(Scale, PossibleRedSet,
1173  PossibleRedPHISet, PossibleRedLastSet);
1174 
1175  // Populate "Uses" with where each instruction is used.
1176  if (!collectUsedInstructions(PossibleRedSet))
1177  return false;
1178 
1179  // Make sure we mark the reduction PHIs as used in all iterations.
1180  for (auto *I : PossibleRedPHISet) {
1181  Uses[I].set(IL_All);
1182  }
1183 
1184  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1185  // comment above LoopReroll::isLoopControlIV for more information.
1186  BasicBlock *Header = L->getHeader();
1187  if (LoopControlIV && LoopControlIV != IV) {
1188  for (auto *U : LoopControlIV->users()) {
1189  Instruction *IVUser = dyn_cast<Instruction>(U);
1190  // IVUser could be loop increment or compare
1191  Uses[IVUser].set(IL_All);
1192  for (auto *UU : IVUser->users()) {
1193  Instruction *UUser = dyn_cast<Instruction>(UU);
1194  // UUser could be compare, PHI or branch
1195  Uses[UUser].set(IL_All);
1196  // Skip SExt
1197  if (isa<SExtInst>(UUser)) {
1198  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1199  Uses[UUser].set(IL_All);
1200  }
1201  // Is UUser a compare instruction?
1202  if (UU->hasOneUse()) {
1203  Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1204  if (BI == cast<BranchInst>(Header->getTerminator()))
1205  Uses[BI].set(IL_All);
1206  }
1207  }
1208  }
1209  }
1210 
1211  // Make sure all instructions in the loop are in one and only one
1212  // set.
1213  for (auto &KV : Uses) {
1214  if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1215  LLVM_DEBUG(
1216  dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1217  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1218  return false;
1219  }
1220  }
1221 
1222  LLVM_DEBUG(for (auto &KV
1223  : Uses) {
1224  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1225  });
1226 
1227  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1228  // In addition to regular aliasing information, we need to look for
1229  // instructions from later (future) iterations that have side effects
1230  // preventing us from reordering them past other instructions with side
1231  // effects.
1232  bool FutureSideEffects = false;
1233  AliasSetTracker AST(*AA);
1234  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1236 
1237  // Compare iteration Iter to the base.
1238  SmallInstructionSet Visited;
1239  auto BaseIt = nextInstr(0, Uses, Visited);
1240  auto RootIt = nextInstr(Iter, Uses, Visited);
1241  auto LastRootIt = Uses.begin();
1242 
1243  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1244  Instruction *BaseInst = BaseIt->first;
1245  Instruction *RootInst = RootIt->first;
1246 
1247  // Skip over the IV or root instructions; only match their users.
1248  bool Continue = false;
1249  if (isBaseInst(BaseInst)) {
1250  Visited.insert(BaseInst);
1251  BaseIt = nextInstr(0, Uses, Visited);
1252  Continue = true;
1253  }
1254  if (isRootInst(RootInst)) {
1255  LastRootIt = RootIt;
1256  Visited.insert(RootInst);
1257  RootIt = nextInstr(Iter, Uses, Visited);
1258  Continue = true;
1259  }
1260  if (Continue) continue;
1261 
1262  if (!BaseInst->isSameOperationAs(RootInst)) {
1263  // Last chance saloon. We don't try and solve the full isomorphism
1264  // problem, but try and at least catch the case where two instructions
1265  // *of different types* are round the wrong way. We won't be able to
1266  // efficiently tell, given two ADD instructions, which way around we
1267  // should match them, but given an ADD and a SUB, we can at least infer
1268  // which one is which.
1269  //
1270  // This should allow us to deal with a greater subset of the isomorphism
1271  // problem. It does however change a linear algorithm into a quadratic
1272  // one, so limit the number of probes we do.
1273  auto TryIt = RootIt;
1274  unsigned N = NumToleratedFailedMatches;
1275  while (TryIt != Uses.end() &&
1276  !BaseInst->isSameOperationAs(TryIt->first) &&
1277  N--) {
1278  ++TryIt;
1279  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1280  }
1281 
1282  if (TryIt == Uses.end() || TryIt == RootIt ||
1283  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1284  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1285  << *BaseInst << " vs. " << *RootInst << "\n");
1286  return false;
1287  }
1288 
1289  RootIt = TryIt;
1290  RootInst = TryIt->first;
1291  }
1292 
1293  // All instructions between the last root and this root
1294  // may belong to some other iteration. If they belong to a
1295  // future iteration, then they're dangerous to alias with.
1296  //
1297  // Note that because we allow a limited amount of flexibility in the order
1298  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1299  // case we've already checked this set of instructions so we shouldn't
1300  // do anything.
1301  for (; LastRootIt < RootIt; ++LastRootIt) {
1302  Instruction *I = LastRootIt->first;
1303  if (LastRootIt->second.find_first() < (int)Iter)
1304  continue;
1305  if (I->mayWriteToMemory())
1306  AST.add(I);
1307  // Note: This is specifically guarded by a check on isa<PHINode>,
1308  // which while a valid (somewhat arbitrary) micro-optimization, is
1309  // needed because otherwise isSafeToSpeculativelyExecute returns
1310  // false on PHI nodes.
1311  if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1313  // Intervening instructions cause side effects.
1314  FutureSideEffects = true;
1315  }
1316 
1317  // Make sure that this instruction, which is in the use set of this
1318  // root instruction, does not also belong to the base set or the set of
1319  // some other root instruction.
1320  if (RootIt->second.count() > 1) {
1321  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1322  << " vs. " << *RootInst << " (prev. case overlap)\n");
1323  return false;
1324  }
1325 
1326  // Make sure that we don't alias with any instruction in the alias set
1327  // tracker. If we do, then we depend on a future iteration, and we
1328  // can't reroll.
1329  if (RootInst->mayReadFromMemory())
1330  for (auto &K : AST) {
1331  if (K.aliasesUnknownInst(RootInst, *AA)) {
1332  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1333  << *BaseInst << " vs. " << *RootInst
1334  << " (depends on future store)\n");
1335  return false;
1336  }
1337  }
1338 
1339  // If we've past an instruction from a future iteration that may have
1340  // side effects, and this instruction might also, then we can't reorder
1341  // them, and this matching fails. As an exception, we allow the alias
1342  // set tracker to handle regular (unordered) load/store dependencies.
1343  if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1344  !isSafeToSpeculativelyExecute(BaseInst)) ||
1345  (!isUnorderedLoadStore(RootInst) &&
1346  !isSafeToSpeculativelyExecute(RootInst)))) {
1347  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1348  << " vs. " << *RootInst
1349  << " (side effects prevent reordering)\n");
1350  return false;
1351  }
1352 
1353  // For instructions that are part of a reduction, if the operation is
1354  // associative, then don't bother matching the operands (because we
1355  // already know that the instructions are isomorphic, and the order
1356  // within the iteration does not matter). For non-associative reductions,
1357  // we do need to match the operands, because we need to reject
1358  // out-of-order instructions within an iteration!
1359  // For example (assume floating-point addition), we need to reject this:
1360  // x += a[i]; x += b[i];
1361  // x += a[i+1]; x += b[i+1];
1362  // x += b[i+2]; x += a[i+2];
1363  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1364 
1365  if (!(InReduction && BaseInst->isAssociative())) {
1366  bool Swapped = false, SomeOpMatched = false;
1367  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1368  Value *Op2 = RootInst->getOperand(j);
1369 
1370  // If this is part of a reduction (and the operation is not
1371  // associatve), then we match all operands, but not those that are
1372  // part of the reduction.
1373  if (InReduction)
1374  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1375  if (Reductions.isPairInSame(RootInst, Op2I))
1376  continue;
1377 
1378  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1379  if (BMI != BaseMap.end()) {
1380  Op2 = BMI->second;
1381  } else {
1382  for (auto &DRS : RootSets) {
1383  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1384  Op2 = DRS.BaseInst;
1385  break;
1386  }
1387  }
1388  }
1389 
1390  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1391  // If we've not already decided to swap the matched operands, and
1392  // we've not already matched our first operand (note that we could
1393  // have skipped matching the first operand because it is part of a
1394  // reduction above), and the instruction is commutative, then try
1395  // the swapped match.
1396  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1397  BaseInst->getOperand(!j) == Op2) {
1398  Swapped = true;
1399  } else {
1400  LLVM_DEBUG(dbgs()
1401  << "LRR: iteration root match failed at " << *BaseInst
1402  << " vs. " << *RootInst << " (operand " << j << ")\n");
1403  return false;
1404  }
1405  }
1406 
1407  SomeOpMatched = true;
1408  }
1409  }
1410 
1411  if ((!PossibleRedLastSet.count(BaseInst) &&
1412  hasUsesOutsideLoop(BaseInst, L)) ||
1413  (!PossibleRedLastSet.count(RootInst) &&
1414  hasUsesOutsideLoop(RootInst, L))) {
1415  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1416  << " vs. " << *RootInst << " (uses outside loop)\n");
1417  return false;
1418  }
1419 
1420  Reductions.recordPair(BaseInst, RootInst, Iter);
1421  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1422 
1423  LastRootIt = RootIt;
1424  Visited.insert(BaseInst);
1425  Visited.insert(RootInst);
1426  BaseIt = nextInstr(0, Uses, Visited);
1427  RootIt = nextInstr(Iter, Uses, Visited);
1428  }
1429  assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1430  "Mismatched set sizes!");
1431  }
1432 
1433  LLVM_DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *IV
1434  << "\n");
1435 
1436  return true;
1437 }
1438 
1439 void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) {
1440  BasicBlock *Header = L->getHeader();
1441 
1442  // Compute the start and increment for each BaseInst before we start erasing
1443  // instructions.
1444  SmallVector<const SCEV *, 8> StartExprs;
1445  SmallVector<const SCEV *, 8> IncrExprs;
1446  for (auto &DRS : RootSets) {
1447  const SCEVAddRecExpr *IVSCEV =
1448  cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
1449  StartExprs.push_back(IVSCEV->getStart());
1450  IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV));
1451  }
1452 
1453  // Remove instructions associated with non-base iterations.
1454  for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*Header))) {
1455  unsigned I = Uses[&Inst].find_first();
1456  if (I > 0 && I < IL_All) {
1457  LLVM_DEBUG(dbgs() << "LRR: removing: " << Inst << "\n");
1458  Inst.eraseFromParent();
1459  }
1460  }
1461 
1462  // Rewrite each BaseInst using SCEV.
1463  for (size_t i = 0, e = RootSets.size(); i != e; ++i)
1464  // Insert the new induction variable.
1465  replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]);
1466 
1467  { // Limit the lifetime of SCEVExpander.
1468  BranchInst *BI = cast<BranchInst>(Header->getTerminator());
1469  const DataLayout &DL = Header->getModule()->getDataLayout();
1470  SCEVExpander Expander(*SE, DL, "reroll");
1471  auto Zero = SE->getZero(BackedgeTakenCount->getType());
1472  auto One = SE->getOne(BackedgeTakenCount->getType());
1473  auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap);
1474  Value *NewIV =
1475  Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(),
1476  Header->getFirstNonPHIOrDbg());
1477  // FIXME: This arithmetic can overflow.
1478  auto TripCount = SE->getAddExpr(BackedgeTakenCount, One);
1479  auto ScaledTripCount = SE->getMulExpr(
1480  TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale));
1481  auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One);
1482  Value *TakenCount =
1483  Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(),
1484  Header->getFirstNonPHIOrDbg());
1485  Value *Cond =
1486  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond");
1487  BI->setCondition(Cond);
1488 
1489  if (BI->getSuccessor(1) != Header)
1490  BI->swapSuccessors();
1491  }
1492 
1493  SimplifyInstructionsInBlock(Header, TLI);
1494  DeleteDeadPHIs(Header, TLI);
1495 }
1496 
1497 void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS,
1498  const SCEV *Start,
1499  const SCEV *IncrExpr) {
1500  BasicBlock *Header = L->getHeader();
1501  Instruction *Inst = DRS.BaseInst;
1502 
1503  const SCEV *NewIVSCEV =
1504  SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1505 
1506  { // Limit the lifetime of SCEVExpander.
1507  const DataLayout &DL = Header->getModule()->getDataLayout();
1508  SCEVExpander Expander(*SE, DL, "reroll");
1509  Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1510  Header->getFirstNonPHIOrDbg());
1511 
1512  for (auto &KV : Uses)
1513  if (KV.second.find_first() == 0)
1514  KV.first->replaceUsesOfWith(Inst, NewIV);
1515  }
1516 }
1517 
1518 // Validate the selected reductions. All iterations must have an isomorphic
1519 // part of the reduction chain and, for non-associative reductions, the chain
1520 // entries must appear in order.
1521 bool LoopReroll::ReductionTracker::validateSelected() {
1522  // For a non-associative reduction, the chain entries must appear in order.
1523  for (int i : Reds) {
1524  int PrevIter = 0, BaseCount = 0, Count = 0;
1525  for (Instruction *J : PossibleReds[i]) {
1526  // Note that all instructions in the chain must have been found because
1527  // all instructions in the function must have been assigned to some
1528  // iteration.
1529  int Iter = PossibleRedIter[J];
1530  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1531  !PossibleReds[i].getReducedValue()->isAssociative()) {
1532  LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: "
1533  << J << "\n");
1534  return false;
1535  }
1536 
1537  if (Iter != PrevIter) {
1538  if (Count != BaseCount) {
1539  LLVM_DEBUG(dbgs()
1540  << "LRR: Iteration " << PrevIter << " reduction use count "
1541  << Count << " is not equal to the base use count "
1542  << BaseCount << "\n");
1543  return false;
1544  }
1545 
1546  Count = 0;
1547  }
1548 
1549  ++Count;
1550  if (Iter == 0)
1551  ++BaseCount;
1552 
1553  PrevIter = Iter;
1554  }
1555  }
1556 
1557  return true;
1558 }
1559 
1560 // For all selected reductions, remove all parts except those in the first
1561 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1562 // of the first-iteration reduced value (in other words, reroll the selected
1563 // reductions).
1564 void LoopReroll::ReductionTracker::replaceSelected() {
1565  // Fixup reductions to refer to the last instruction associated with the
1566  // first iteration (not the last).
1567  for (int i : Reds) {
1568  int j = 0;
1569  for (int e = PossibleReds[i].size(); j != e; ++j)
1570  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1571  --j;
1572  break;
1573  }
1574 
1575  // Replace users with the new end-of-chain value.
1576  SmallInstructionVector Users;
1577  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1578  Users.push_back(cast<Instruction>(U));
1579  }
1580 
1581  for (Instruction *User : Users)
1582  User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1583  PossibleReds[i][j]);
1584  }
1585 }
1586 
1587 // Reroll the provided loop with respect to the provided induction variable.
1588 // Generally, we're looking for a loop like this:
1589 //
1590 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1591 // f(%iv)
1592 // %iv.1 = add %iv, 1 <-- a root increment
1593 // f(%iv.1)
1594 // %iv.2 = add %iv, 2 <-- a root increment
1595 // f(%iv.2)
1596 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1597 // f(%iv.scale_m_1)
1598 // ...
1599 // %iv.next = add %iv, scale
1600 // %cmp = icmp(%iv, ...)
1601 // br %cmp, header, exit
1602 //
1603 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1604 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1605 // be intermixed with eachother. The restriction imposed by this algorithm is
1606 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1607 // etc. be the same.
1608 //
1609 // First, we collect the use set of %iv, excluding the other increment roots.
1610 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1611 // times, having collected the use set of f(%iv.(i+1)), during which we:
1612 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1613 // the next unmatched instruction in f(%iv.(i+1)).
1614 // - Ensure that both matched instructions don't have any external users
1615 // (with the exception of last-in-chain reduction instructions).
1616 // - Track the (aliasing) write set, and other side effects, of all
1617 // instructions that belong to future iterations that come before the matched
1618 // instructions. If the matched instructions read from that write set, then
1619 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1620 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1621 // if any of these future instructions had side effects (could not be
1622 // speculatively executed), and so do the matched instructions, when we
1623 // cannot reorder those side-effect-producing instructions, and rerolling
1624 // fails.
1625 //
1626 // Finally, we make sure that all loop instructions are either loop increment
1627 // roots, belong to simple latch code, parts of validated reductions, part of
1628 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1629 // have been validated), then we reroll the loop.
1630 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1631  const SCEV *BackedgeTakenCount,
1632  ReductionTracker &Reductions) {
1633  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1634  IVToIncMap, LoopControlIV);
1635 
1636  if (!DAGRoots.findRoots())
1637  return false;
1638  LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV
1639  << "\n");
1640 
1641  if (!DAGRoots.validate(Reductions))
1642  return false;
1643  if (!Reductions.validateSelected())
1644  return false;
1645  // At this point, we've validated the rerolling, and we're committed to
1646  // making changes!
1647 
1648  Reductions.replaceSelected();
1649  DAGRoots.replace(BackedgeTakenCount);
1650 
1651  ++NumRerolledLoops;
1652  return true;
1653 }
1654 
1655 bool LoopReroll::runOnLoop(Loop *L) {
1656  BasicBlock *Header = L->getHeader();
1657  LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
1658  << Header->getName() << " (" << L->getNumBlocks()
1659  << " block(s))\n");
1660 
1661  // For now, we'll handle only single BB loops.
1662  if (L->getNumBlocks() > 1)
1663  return false;
1664 
1666  return false;
1667 
1668  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1669  LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1670  LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount
1671  << "\n");
1672 
1673  // First, we need to find the induction variable with respect to which we can
1674  // reroll (there may be several possible options).
1675  SmallInstructionVector PossibleIVs;
1676  IVToIncMap.clear();
1677  LoopControlIV = nullptr;
1678  collectPossibleIVs(L, PossibleIVs);
1679 
1680  if (PossibleIVs.empty()) {
1681  LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n");
1682  return false;
1683  }
1684 
1685  ReductionTracker Reductions;
1686  collectPossibleReductions(L, Reductions);
1687  bool Changed = false;
1688 
1689  // For each possible IV, collect the associated possible set of 'root' nodes
1690  // (i+1, i+2, etc.).
1691  for (Instruction *PossibleIV : PossibleIVs)
1692  if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) {
1693  Changed = true;
1694  break;
1695  }
1696  LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1697 
1698  // Trip count of L has changed so SE must be re-evaluated.
1699  if (Changed)
1700  SE->forgetLoop(L);
1701 
1702  return Changed;
1703 }
1704 
1705 bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
1706  if (skipLoop(L))
1707  return false;
1708 
1709  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1710  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1711  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1712  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
1713  *L->getHeader()->getParent());
1714  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1715  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1716 
1717  return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
1718 }
1719 
1722  LPMUpdater &U) {
1723  return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
1726 }
llvm::MapVector::iterator
typename VectorType::iterator iterator
Definition: MapVector.h:50
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::ScalarEvolution::hasLoopInvariantBackedgeTakenCount
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
Definition: ScalarEvolution.cpp:12978
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:755
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4595
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
ScalarEvolutionExpander.h
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1490
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3576
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:144
llvm::AliasSetTracker
Definition: AliasSetTracker.h:326
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::LoopRerollPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopRerollPass.cpp:1720
APInt.h
ScalarEvolution.h
DenseMap.h
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
Module.h
isSimpleArithmeticOp
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
Definition: LoopRerollPass.cpp:747
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
reroll
loop reroll
Definition: LoopRerollPass.cpp:498
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:962
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
STLExtras.h
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:97
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
replace
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Definition: ConstantMerge.cpp:116
Use.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1329
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3051
Instruction.h
CommandLine.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false, false) INITIALIZE_PASS_END(LoopRerollLegacyPass
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:707
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:645
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::replace
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
Definition: STLExtras.h:1820
TargetLibraryInfo.h
DenseSet.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::initializeLoopRerollLegacyPassPass
void initializeLoopRerollLegacyPassPass(PassRegistry &)
llvm::Instruction
Definition: Instruction.h:42
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3185
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
NumToleratedFailedMatches
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" " during fuzzy matching. (default: 400)"))
BitVector.h
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
isLoopIncrement
static bool isLoopIncrement(User *U, Instruction *IV)
Definition: LoopRerollPass.cpp:767
Utils.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:770
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4407
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::createLoopRerollPass
Pass * createLoopRerollPass()
Definition: LoopRerollPass.cpp:501
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:112
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
llvm::LoopPass
Definition: LoopPass.h:28
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition: ScalarEvolution.h:131
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:608
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1672
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1588
isAssociative
static bool isAssociative(const COFFSection &Section)
Definition: WinCOFFObjectWriter.cpp:925
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::BinaryOperator
Definition: InstrTypes.h:188
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:462
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:251
llvm::AArch64ISD::ADR
@ ADR
Definition: AArch64ISelLowering.h:64
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:576
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::User::replaceUsesOfWith
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
users
iv users
Definition: IVUsers.cpp:48
isUnorderedLoadStore
static bool isUnorderedLoadStore(Instruction *I)
Definition: LoopRerollPass.cpp:734
loops
loop Reroll loops
Definition: LoopRerollPass.cpp:498
j
return j(j<< 16)
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:486
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8037
isIgnorableInst
static bool isIgnorableInst(const Instruction *I)
Definition: LoopRerollPass.cpp:1135
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4524
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:344
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:162
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
AA
ScalarEvolutionExpressions.h
LoopReroll.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
Dominators.h
N
#define N
llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition: ScalarEvolution.h:642
Users
iv Induction Variable Users
Definition: IVUsers.cpp:48
llvm::BasicBlock::getFirstNonPHIOrDbg
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:216
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:393
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2454
AliasSetTracker.h
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
raw_ostream.h
llvm::Instruction::isSameOperationAs
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one.
Definition: Instruction.cpp:534
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7907
BasicBlockUtils.h
hasUsesOutsideLoop
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
Definition: LoopRerollPass.cpp:506
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:57
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37