LLVM  13.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/LoopPass.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/User.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Debug.h"
46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
50 #include <cassert>
51 #include <utility>
52 #include <vector>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "loop-interchange"
57 
58 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
59 
61  "loop-interchange-threshold", cl::init(0), cl::Hidden,
62  cl::desc("Interchange if you gain more than this number"));
63 
64 namespace {
65 
66 using LoopVector = SmallVector<Loop *, 8>;
67 
68 // TODO: Check if we can use a sparse matrix here.
69 using CharMatrix = std::vector<std::vector<char>>;
70 
71 } // end anonymous namespace
72 
73 // Maximum number of dependencies that can be handled in the dependency matrix.
74 static const unsigned MaxMemInstrCount = 100;
75 
76 // Maximum loop depth supported.
77 static const unsigned MaxLoopNestDepth = 10;
78 
79 #ifdef DUMP_DEP_MATRICIES
80 static void printDepMatrix(CharMatrix &DepMatrix) {
81  for (auto &Row : DepMatrix) {
82  for (auto D : Row)
83  LLVM_DEBUG(dbgs() << D << " ");
84  LLVM_DEBUG(dbgs() << "\n");
85  }
86 }
87 #endif
88 
89 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90  Loop *L, DependenceInfo *DI) {
91  using ValueVector = SmallVector<Value *, 16>;
92 
93  ValueVector MemInstr;
94 
95  // For each block.
96  for (BasicBlock *BB : L->blocks()) {
97  // Scan the BB and collect legal loads and stores.
98  for (Instruction &I : *BB) {
99  if (!isa<Instruction>(I))
100  return false;
101  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
102  if (!Ld->isSimple())
103  return false;
104  MemInstr.push_back(&I);
105  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
106  if (!St->isSimple())
107  return false;
108  MemInstr.push_back(&I);
109  }
110  }
111  }
112 
113  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
114  << " Loads and Stores to analyze\n");
115 
116  ValueVector::iterator I, IE, J, JE;
117 
118  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
119  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
120  std::vector<char> Dep;
121  Instruction *Src = cast<Instruction>(*I);
122  Instruction *Dst = cast<Instruction>(*J);
123  if (Src == Dst)
124  continue;
125  // Ignore Input dependencies.
126  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
127  continue;
128  // Track Output, Flow, and Anti dependencies.
129  if (auto D = DI->depends(Src, Dst, true)) {
130  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
131  LLVM_DEBUG(StringRef DepType =
132  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
133  dbgs() << "Found " << DepType
134  << " dependency between Src and Dst\n"
135  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
136  unsigned Levels = D->getLevels();
137  char Direction;
138  for (unsigned II = 1; II <= Levels; ++II) {
139  const SCEV *Distance = D->getDistance(II);
140  const SCEVConstant *SCEVConst =
141  dyn_cast_or_null<SCEVConstant>(Distance);
142  if (SCEVConst) {
143  const ConstantInt *CI = SCEVConst->getValue();
144  if (CI->isNegative())
145  Direction = '<';
146  else if (CI->isZero())
147  Direction = '=';
148  else
149  Direction = '>';
150  Dep.push_back(Direction);
151  } else if (D->isScalar(II)) {
152  Direction = 'S';
153  Dep.push_back(Direction);
154  } else {
155  unsigned Dir = D->getDirection(II);
156  if (Dir == Dependence::DVEntry::LT ||
158  Direction = '<';
159  else if (Dir == Dependence::DVEntry::GT ||
161  Direction = '>';
162  else if (Dir == Dependence::DVEntry::EQ)
163  Direction = '=';
164  else
165  Direction = '*';
166  Dep.push_back(Direction);
167  }
168  }
169  while (Dep.size() != Level) {
170  Dep.push_back('I');
171  }
172 
173  DepMatrix.push_back(Dep);
174  if (DepMatrix.size() > MaxMemInstrCount) {
175  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
176  << " dependencies inside loop\n");
177  return false;
178  }
179  }
180  }
181  }
182 
183  return true;
184 }
185 
186 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
187 // matrix by exchanging the two columns.
188 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
189  unsigned ToIndx) {
190  for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
191  std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
192 }
193 
194 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
195 // '>'
196 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
197  unsigned Column) {
198  for (unsigned i = 0; i <= Column; ++i) {
199  if (DepMatrix[Row][i] == '<')
200  return false;
201  if (DepMatrix[Row][i] == '>')
202  return true;
203  }
204  // All dependencies were '=','S' or 'I'
205  return false;
206 }
207 
208 // Checks if no dependence exist in the dependency matrix in Row before Column.
209 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
210  unsigned Column) {
211  for (unsigned i = 0; i < Column; ++i) {
212  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
213  DepMatrix[Row][i] != 'I')
214  return false;
215  }
216  return true;
217 }
218 
219 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
220  unsigned OuterLoopId, char InnerDep,
221  char OuterDep) {
222  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
223  return false;
224 
225  if (InnerDep == OuterDep)
226  return true;
227 
228  // It is legal to interchange if and only if after interchange no row has a
229  // '>' direction as the leftmost non-'='.
230 
231  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
232  return true;
233 
234  if (InnerDep == '<')
235  return true;
236 
237  if (InnerDep == '>') {
238  // If OuterLoopId represents outermost loop then interchanging will make the
239  // 1st dependency as '>'
240  if (OuterLoopId == 0)
241  return false;
242 
243  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
244  // interchanging will result in this row having an outermost non '='
245  // dependency of '>'
246  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
247  return true;
248  }
249 
250  return false;
251 }
252 
253 // Checks if it is legal to interchange 2 loops.
254 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
255 // if the direction matrix, after the same permutation is applied to its
256 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
257 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258  unsigned InnerLoopId,
259  unsigned OuterLoopId) {
260  unsigned NumRows = DepMatrix.size();
261  // For each row check if it is valid to interchange.
262  for (unsigned Row = 0; Row < NumRows; ++Row) {
263  char InnerDep = DepMatrix[Row][InnerLoopId];
264  char OuterDep = DepMatrix[Row][OuterLoopId];
265  if (InnerDep == '*' || OuterDep == '*')
266  return false;
267  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
268  return false;
269  }
270  return true;
271 }
272 
273 static LoopVector populateWorklist(Loop &L) {
274  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
275  << L.getHeader()->getParent()->getName() << " Loop: %"
276  << L.getHeader()->getName() << '\n');
277  LoopVector LoopList;
278  Loop *CurrentLoop = &L;
279  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280  while (!Vec->empty()) {
281  // The current loop has multiple subloops in it hence it is not tightly
282  // nested.
283  // Discard all loops above it added into Worklist.
284  if (Vec->size() != 1)
285  return {};
286 
287  LoopList.push_back(CurrentLoop);
288  CurrentLoop = Vec->front();
289  Vec = &CurrentLoop->getSubLoops();
290  }
291  LoopList.push_back(CurrentLoop);
292  return LoopList;
293 }
294 
296  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
297  if (InnerIndexVar)
298  return InnerIndexVar;
299  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
300  return nullptr;
301  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
302  PHINode *PhiVar = cast<PHINode>(I);
303  Type *PhiTy = PhiVar->getType();
304  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
305  !PhiTy->isPointerTy())
306  return nullptr;
307  const SCEVAddRecExpr *AddRec =
308  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
309  if (!AddRec || !AddRec->isAffine())
310  continue;
311  const SCEV *Step = AddRec->getStepRecurrence(*SE);
312  if (!isa<SCEVConstant>(Step))
313  continue;
314  // Found the induction variable.
315  // FIXME: Handle loops with more than one induction variable. Note that,
316  // currently, legality makes sure we have only one induction variable.
317  return PhiVar;
318  }
319  return nullptr;
320 }
321 
322 namespace {
323 
324 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
325 class LoopInterchangeLegality {
326 public:
327  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
329  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
330 
331  /// Check if the loops can be interchanged.
332  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
333  CharMatrix &DepMatrix);
334 
335  /// Check if the loop structure is understood. We do not handle triangular
336  /// loops for now.
337  bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
338 
339  bool currentLimitations();
340 
341  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
342  return OuterInnerReductions;
343  }
344 
345 private:
346  bool tightlyNested(Loop *Outer, Loop *Inner);
347  bool containsUnsafeInstructions(BasicBlock *BB);
348 
349  /// Discover induction and reduction PHIs in the header of \p L. Induction
350  /// PHIs are added to \p Inductions, reductions are added to
351  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
352  /// to be passed as \p InnerLoop.
353  bool findInductionAndReductions(Loop *L,
354  SmallVector<PHINode *, 8> &Inductions,
355  Loop *InnerLoop);
356 
357  Loop *OuterLoop;
358  Loop *InnerLoop;
359 
360  ScalarEvolution *SE;
361 
362  /// Interface to emit optimization remarks.
364 
365  /// Set of reduction PHIs taking part of a reduction across the inner and
366  /// outer loop.
367  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
368 };
369 
370 /// LoopInterchangeProfitability checks if it is profitable to interchange the
371 /// loop.
372 class LoopInterchangeProfitability {
373 public:
374  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
376  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
377 
378  /// Check if the loop interchange is profitable.
379  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
380  CharMatrix &DepMatrix);
381 
382 private:
383  int getInstrOrderCost();
384 
385  Loop *OuterLoop;
386  Loop *InnerLoop;
387 
388  /// Scev analysis.
389  ScalarEvolution *SE;
390 
391  /// Interface to emit optimization remarks.
393 };
394 
395 /// LoopInterchangeTransform interchanges the loop.
396 class LoopInterchangeTransform {
397 public:
398  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
399  LoopInfo *LI, DominatorTree *DT,
400  const LoopInterchangeLegality &LIL)
401  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
402 
403  /// Interchange OuterLoop and InnerLoop.
404  bool transform();
405  void restructureLoops(Loop *NewInner, Loop *NewOuter,
406  BasicBlock *OrigInnerPreHeader,
407  BasicBlock *OrigOuterPreHeader);
408  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
409 
410 private:
411  bool adjustLoopLinks();
412  bool adjustLoopBranches();
413 
414  Loop *OuterLoop;
415  Loop *InnerLoop;
416 
417  /// Scev analysis.
418  ScalarEvolution *SE;
419 
420  LoopInfo *LI;
421  DominatorTree *DT;
422 
423  const LoopInterchangeLegality &LIL;
424 };
425 
426 struct LoopInterchange {
427  ScalarEvolution *SE = nullptr;
428  LoopInfo *LI = nullptr;
429  DependenceInfo *DI = nullptr;
430  DominatorTree *DT = nullptr;
431 
432  /// Interface to emit optimization remarks.
434 
435  LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
437  : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
438 
439  bool run(Loop *L) {
440  if (L->getParentLoop())
441  return false;
442 
443  return processLoopList(populateWorklist(*L));
444  }
445 
446  bool run(LoopNest &LN) {
447  const auto &LoopList = LN.getLoops();
448  for (unsigned I = 1; I < LoopList.size(); ++I)
449  if (LoopList[I]->getParentLoop() != LoopList[I - 1])
450  return false;
451  return processLoopList(LoopList);
452  }
453 
454  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
455  for (Loop *L : LoopList) {
456  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
457  if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
458  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
459  return false;
460  }
461  if (L->getNumBackEdges() != 1) {
462  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
463  return false;
464  }
465  if (!L->getExitingBlock()) {
466  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
467  return false;
468  }
469  }
470  return true;
471  }
472 
473  unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
474  // TODO: Add a better heuristic to select the loop to be interchanged based
475  // on the dependence matrix. Currently we select the innermost loop.
476  return LoopList.size() - 1;
477  }
478 
479  bool processLoopList(ArrayRef<Loop *> LoopList) {
480  bool Changed = false;
481  unsigned LoopNestDepth = LoopList.size();
482  if (LoopNestDepth < 2) {
483  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
484  return false;
485  }
486  if (LoopNestDepth > MaxLoopNestDepth) {
487  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
488  << MaxLoopNestDepth << "\n");
489  return false;
490  }
491  if (!isComputableLoopNest(LoopList)) {
492  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
493  return false;
494  }
495 
496  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
497  << "\n");
498 
499  CharMatrix DependencyMatrix;
500  Loop *OuterMostLoop = *(LoopList.begin());
501  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
502  OuterMostLoop, DI)) {
503  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
504  return false;
505  }
506 #ifdef DUMP_DEP_MATRICIES
507  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
508  printDepMatrix(DependencyMatrix);
509 #endif
510 
511  // Get the Outermost loop exit.
512  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
513  if (!LoopNestExit) {
514  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
515  return false;
516  }
517 
518  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
519  // Move the selected loop outwards to the best possible position.
520  Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
521  for (unsigned i = SelecLoopId; i > 0; i--) {
522  bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
523  i - 1, DependencyMatrix);
524  if (!Interchanged)
525  return Changed;
526  // Update the DependencyMatrix
527  interChangeDependencies(DependencyMatrix, i, i - 1);
528 #ifdef DUMP_DEP_MATRICIES
529  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
530  printDepMatrix(DependencyMatrix);
531 #endif
532  Changed |= Interchanged;
533  }
534  return Changed;
535  }
536 
537  bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
538  unsigned OuterLoopId,
539  std::vector<std::vector<char>> &DependencyMatrix) {
540  LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
541  << " and OuterLoopId = " << OuterLoopId << "\n");
542  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
543  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
544  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
545  return false;
546  }
547  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
548  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
549  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
550  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
551  return false;
552  }
553 
554  ORE->emit([&]() {
555  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
556  InnerLoop->getStartLoc(),
557  InnerLoop->getHeader())
558  << "Loop interchanged with enclosing loop.";
559  });
560 
561  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
562  LIT.transform();
563  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
564  LoopsInterchanged++;
565 
566  assert(InnerLoop->isLCSSAForm(*DT) &&
567  "Inner loop not left in LCSSA form after loop interchange!");
568  assert(OuterLoop->isLCSSAForm(*DT) &&
569  "Outer loop not left in LCSSA form after loop interchange!");
570 
571  return true;
572  }
573 };
574 
575 } // end anonymous namespace
576 
577 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
578  return any_of(*BB, [](const Instruction &I) {
579  return I.mayHaveSideEffects() || I.mayReadFromMemory();
580  });
581 }
582 
583 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
584  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
585  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
586  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
587 
588  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
589 
590  // A perfectly nested loop will not have any branch in between the outer and
591  // inner block i.e. outer header will branch to either inner preheader and
592  // outerloop latch.
593  BranchInst *OuterLoopHeaderBI =
594  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
595  if (!OuterLoopHeaderBI)
596  return false;
597 
598  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
599  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
600  Succ != OuterLoopLatch)
601  return false;
602 
603  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
604  // We do not have any basic block in between now make sure the outer header
605  // and outer loop latch doesn't contain any unsafe instructions.
606  if (containsUnsafeInstructions(OuterLoopHeader) ||
607  containsUnsafeInstructions(OuterLoopLatch))
608  return false;
609 
610  // Also make sure the inner loop preheader does not contain any unsafe
611  // instructions. Note that all instructions in the preheader will be moved to
612  // the outer loop header when interchanging.
613  if (InnerLoopPreHeader != OuterLoopHeader &&
614  containsUnsafeInstructions(InnerLoopPreHeader))
615  return false;
616 
617  BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
618  // Ensure the inner loop exit block flows to the outer loop latch possibly
619  // through empty blocks.
620  const BasicBlock &SuccInner =
621  LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
622  if (&SuccInner != OuterLoopLatch) {
623  LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
624  << " does not lead to the outer loop latch.\n";);
625  return false;
626  }
627  // The inner loop exit block does flow to the outer loop latch and not some
628  // other BBs, now make sure it contains safe instructions, since it will be
629  // moved into the (new) inner loop after interchange.
630  if (containsUnsafeInstructions(InnerLoopExit))
631  return false;
632 
633  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
634  // We have a perfect loop nest.
635  return true;
636 }
637 
638 bool LoopInterchangeLegality::isLoopStructureUnderstood(
639  PHINode *InnerInduction) {
640  unsigned Num = InnerInduction->getNumOperands();
641  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
642  for (unsigned i = 0; i < Num; ++i) {
643  Value *Val = InnerInduction->getOperand(i);
644  if (isa<Constant>(Val))
645  continue;
646  Instruction *I = dyn_cast<Instruction>(Val);
647  if (!I)
648  return false;
649  // TODO: Handle triangular loops.
650  // e.g. for(int i=0;i<N;i++)
651  // for(int j=i;j<N;j++)
652  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
653  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
654  InnerLoopPreheader &&
655  !OuterLoop->isLoopInvariant(I)) {
656  return false;
657  }
658  }
659  return true;
660 }
661 
662 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
663 // value.
664 static Value *followLCSSA(Value *SV) {
665  PHINode *PHI = dyn_cast<PHINode>(SV);
666  if (!PHI)
667  return SV;
668 
669  if (PHI->getNumIncomingValues() != 1)
670  return SV;
671  return followLCSSA(PHI->getIncomingValue(0));
672 }
673 
674 // Check V's users to see if it is involved in a reduction in L.
676  // Reduction variables cannot be constants.
677  if (isa<Constant>(V))
678  return nullptr;
679 
680  for (Value *User : V->users()) {
681  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
682  if (PHI->getNumIncomingValues() == 1)
683  continue;
685  if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
686  return PHI;
687  return nullptr;
688  }
689  }
690 
691  return nullptr;
692 }
693 
694 bool LoopInterchangeLegality::findInductionAndReductions(
695  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
696  if (!L->getLoopLatch() || !L->getLoopPredecessor())
697  return false;
698  for (PHINode &PHI : L->getHeader()->phis()) {
701  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
702  Inductions.push_back(&PHI);
703  else {
704  // PHIs in inner loops need to be part of a reduction in the outer loop,
705  // discovered when checking the PHIs of the outer loop earlier.
706  if (!InnerLoop) {
707  if (!OuterInnerReductions.count(&PHI)) {
708  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
709  "across the outer loop.\n");
710  return false;
711  }
712  } else {
713  assert(PHI.getNumIncomingValues() == 2 &&
714  "Phis in loop header should have exactly 2 incoming values");
715  // Check if we have a PHI node in the outer loop that has a reduction
716  // result from the inner loop as an incoming value.
717  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
718  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
719  if (!InnerRedPhi ||
720  !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
721  LLVM_DEBUG(
722  dbgs()
723  << "Failed to recognize PHI as an induction or reduction.\n");
724  return false;
725  }
726  OuterInnerReductions.insert(&PHI);
727  OuterInnerReductions.insert(InnerRedPhi);
728  }
729  }
730  }
731  return true;
732 }
733 
734 // This function indicates the current limitations in the transform as a result
735 // of which we do not proceed.
736 bool LoopInterchangeLegality::currentLimitations() {
737  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
738  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
739 
740  // transform currently expects the loop latches to also be the exiting
741  // blocks.
742  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
743  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
744  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
745  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
746  LLVM_DEBUG(
747  dbgs() << "Loops where the latch is not the exiting block are not"
748  << " supported currently.\n");
749  ORE->emit([&]() {
750  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
751  OuterLoop->getStartLoc(),
752  OuterLoop->getHeader())
753  << "Loops where the latch is not the exiting block cannot be"
754  " interchange currently.";
755  });
756  return true;
757  }
758 
759  PHINode *InnerInductionVar;
760  SmallVector<PHINode *, 8> Inductions;
761  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
762  LLVM_DEBUG(
763  dbgs() << "Only outer loops with induction or reduction PHI nodes "
764  << "are supported currently.\n");
765  ORE->emit([&]() {
766  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
767  OuterLoop->getStartLoc(),
768  OuterLoop->getHeader())
769  << "Only outer loops with induction or reduction PHI nodes can be"
770  " interchanged currently.";
771  });
772  return true;
773  }
774 
775  // TODO: Currently we handle only loops with 1 induction variable.
776  if (Inductions.size() != 1) {
777  LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
778  << "supported currently.\n");
779  ORE->emit([&]() {
780  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
781  OuterLoop->getStartLoc(),
782  OuterLoop->getHeader())
783  << "Only outer loops with 1 induction variable can be "
784  "interchanged currently.";
785  });
786  return true;
787  }
788 
789  Inductions.clear();
790  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
791  LLVM_DEBUG(
792  dbgs() << "Only inner loops with induction or reduction PHI nodes "
793  << "are supported currently.\n");
794  ORE->emit([&]() {
795  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
796  InnerLoop->getStartLoc(),
797  InnerLoop->getHeader())
798  << "Only inner loops with induction or reduction PHI nodes can be"
799  " interchange currently.";
800  });
801  return true;
802  }
803 
804  // TODO: Currently we handle only loops with 1 induction variable.
805  if (Inductions.size() != 1) {
806  LLVM_DEBUG(
807  dbgs() << "We currently only support loops with 1 induction variable."
808  << "Failed to interchange due to current limitation\n");
809  ORE->emit([&]() {
810  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
811  InnerLoop->getStartLoc(),
812  InnerLoop->getHeader())
813  << "Only inner loops with 1 induction variable can be "
814  "interchanged currently.";
815  });
816  return true;
817  }
818  InnerInductionVar = Inductions.pop_back_val();
819 
820  // TODO: Triangular loops are not handled for now.
821  if (!isLoopStructureUnderstood(InnerInductionVar)) {
822  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
823  ORE->emit([&]() {
824  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
825  InnerLoop->getStartLoc(),
826  InnerLoop->getHeader())
827  << "Inner loop structure not understood currently.";
828  });
829  return true;
830  }
831 
832  // TODO: Current limitation: Since we split the inner loop latch at the point
833  // were induction variable is incremented (induction.next); We cannot have
834  // more than 1 user of induction.next since it would result in broken code
835  // after split.
836  // e.g.
837  // for(i=0;i<N;i++) {
838  // for(j = 0;j<M;j++) {
839  // A[j+1][i+2] = A[j][i]+k;
840  // }
841  // }
842  Instruction *InnerIndexVarInc = nullptr;
843  if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
844  InnerIndexVarInc =
845  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
846  else
847  InnerIndexVarInc =
848  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
849 
850  if (!InnerIndexVarInc) {
851  LLVM_DEBUG(
852  dbgs() << "Did not find an instruction to increment the induction "
853  << "variable.\n");
854  ORE->emit([&]() {
855  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
856  InnerLoop->getStartLoc(),
857  InnerLoop->getHeader())
858  << "The inner loop does not increment the induction variable.";
859  });
860  return true;
861  }
862 
863  // Since we split the inner loop latch on this induction variable. Make sure
864  // we do not have any instruction between the induction variable and branch
865  // instruction.
866 
867  bool FoundInduction = false;
868  for (const Instruction &I :
869  llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
870  if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
871  isa<ZExtInst>(I))
872  continue;
873 
874  // We found an instruction. If this is not induction variable then it is not
875  // safe to split this loop latch.
876  if (!I.isIdenticalTo(InnerIndexVarInc)) {
877  LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "
878  << "variable increment and branch.\n");
879  ORE->emit([&]() {
881  DEBUG_TYPE, "UnsupportedInsBetweenInduction",
882  InnerLoop->getStartLoc(), InnerLoop->getHeader())
883  << "Found unsupported instruction between induction variable "
884  "increment and branch.";
885  });
886  return true;
887  }
888 
889  FoundInduction = true;
890  break;
891  }
892  // The loop latch ended and we didn't find the induction variable return as
893  // current limitation.
894  if (!FoundInduction) {
895  LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n");
896  ORE->emit([&]() {
897  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
898  InnerLoop->getStartLoc(),
899  InnerLoop->getHeader())
900  << "Did not find the induction variable.";
901  });
902  return true;
903  }
904  return false;
905 }
906 
907 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
908 // users are either reduction PHIs or PHIs outside the outer loop (which means
909 // the we are only interested in the final value after the loop).
910 static bool
912  SmallPtrSetImpl<PHINode *> &Reductions) {
913  BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
914  for (PHINode &PHI : InnerExit->phis()) {
915  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
916  if (PHI.getNumIncomingValues() > 1)
917  return false;
918  if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
919  PHINode *PN = dyn_cast<PHINode>(U);
920  return !PN ||
921  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
922  })) {
923  return false;
924  }
925  }
926  return true;
927 }
928 
929 // We currently support LCSSA PHI nodes in the outer loop exit, if their
930 // incoming values do not come from the outer loop latch or if the
931 // outer loop latch has a single predecessor. In that case, the value will
932 // be available if both the inner and outer loop conditions are true, which
933 // will still be true after interchanging. If we have multiple predecessor,
934 // that may not be the case, e.g. because the outer loop latch may be executed
935 // if the inner loop is not executed.
936 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
937  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
938  for (PHINode &PHI : LoopNestExit->phis()) {
939  // FIXME: We currently are not able to detect floating point reductions
940  // and have to use floating point PHIs as a proxy to prevent
941  // interchanging in the presence of floating point reductions.
942  if (PHI.getType()->isFloatingPointTy())
943  return false;
944  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
945  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
946  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
947  continue;
948 
949  // The incoming value is defined in the outer loop latch. Currently we
950  // only support that in case the outer loop latch has a single predecessor.
951  // This guarantees that the outer loop latch is executed if and only if
952  // the inner loop is executed (because tightlyNested() guarantees that the
953  // outer loop header only branches to the inner loop or the outer loop
954  // latch).
955  // FIXME: We could weaken this logic and allow multiple predecessors,
956  // if the values are produced outside the loop latch. We would need
957  // additional logic to update the PHI nodes in the exit block as
958  // well.
959  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
960  return false;
961  }
962  }
963  return true;
964 }
965 
966 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
967  unsigned OuterLoopId,
968  CharMatrix &DepMatrix) {
969  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
970  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
971  << " and OuterLoopId = " << OuterLoopId
972  << " due to dependence\n");
973  ORE->emit([&]() {
974  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
975  InnerLoop->getStartLoc(),
976  InnerLoop->getHeader())
977  << "Cannot interchange loops due to dependences.";
978  });
979  return false;
980  }
981  // Check if outer and inner loop contain legal instructions only.
982  for (auto *BB : OuterLoop->blocks())
983  for (Instruction &I : BB->instructionsWithoutDebug())
984  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
985  // readnone functions do not prevent interchanging.
986  if (CI->doesNotReadMemory())
987  continue;
988  LLVM_DEBUG(
989  dbgs() << "Loops with call instructions cannot be interchanged "
990  << "safely.");
991  ORE->emit([&]() {
992  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
993  CI->getDebugLoc(),
994  CI->getParent())
995  << "Cannot interchange loops due to call instruction.";
996  });
997 
998  return false;
999  }
1000 
1001  // TODO: The loops could not be interchanged due to current limitations in the
1002  // transform module.
1003  if (currentLimitations()) {
1004  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1005  return false;
1006  }
1007 
1008  // Check if the loops are tightly nested.
1009  if (!tightlyNested(OuterLoop, InnerLoop)) {
1010  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1011  ORE->emit([&]() {
1012  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1013  InnerLoop->getStartLoc(),
1014  InnerLoop->getHeader())
1015  << "Cannot interchange loops because they are not tightly "
1016  "nested.";
1017  });
1018  return false;
1019  }
1020 
1021  if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1022  OuterInnerReductions)) {
1023  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1024  ORE->emit([&]() {
1025  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1026  InnerLoop->getStartLoc(),
1027  InnerLoop->getHeader())
1028  << "Found unsupported PHI node in loop exit.";
1029  });
1030  return false;
1031  }
1032 
1033  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1034  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1035  ORE->emit([&]() {
1036  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1037  OuterLoop->getStartLoc(),
1038  OuterLoop->getHeader())
1039  << "Found unsupported PHI node in loop exit.";
1040  });
1041  return false;
1042  }
1043 
1044  return true;
1045 }
1046 
1047 int LoopInterchangeProfitability::getInstrOrderCost() {
1048  unsigned GoodOrder, BadOrder;
1049  BadOrder = GoodOrder = 0;
1050  for (BasicBlock *BB : InnerLoop->blocks()) {
1051  for (Instruction &Ins : *BB) {
1052  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1053  unsigned NumOp = GEP->getNumOperands();
1054  bool FoundInnerInduction = false;
1055  bool FoundOuterInduction = false;
1056  for (unsigned i = 0; i < NumOp; ++i) {
1057  // Skip operands that are not SCEV-able.
1058  if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1059  continue;
1060 
1061  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1062  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1063  if (!AR)
1064  continue;
1065 
1066  // If we find the inner induction after an outer induction e.g.
1067  // for(int i=0;i<N;i++)
1068  // for(int j=0;j<N;j++)
1069  // A[i][j] = A[i-1][j-1]+k;
1070  // then it is a good order.
1071  if (AR->getLoop() == InnerLoop) {
1072  // We found an InnerLoop induction after OuterLoop induction. It is
1073  // a good order.
1074  FoundInnerInduction = true;
1075  if (FoundOuterInduction) {
1076  GoodOrder++;
1077  break;
1078  }
1079  }
1080  // If we find the outer induction after an inner induction e.g.
1081  // for(int i=0;i<N;i++)
1082  // for(int j=0;j<N;j++)
1083  // A[j][i] = A[j-1][i-1]+k;
1084  // then it is a bad order.
1085  if (AR->getLoop() == OuterLoop) {
1086  // We found an OuterLoop induction after InnerLoop induction. It is
1087  // a bad order.
1088  FoundOuterInduction = true;
1089  if (FoundInnerInduction) {
1090  BadOrder++;
1091  break;
1092  }
1093  }
1094  }
1095  }
1096  }
1097  }
1098  return GoodOrder - BadOrder;
1099 }
1100 
1101 static bool isProfitableForVectorization(unsigned InnerLoopId,
1102  unsigned OuterLoopId,
1103  CharMatrix &DepMatrix) {
1104  // TODO: Improve this heuristic to catch more cases.
1105  // If the inner loop is loop independent or doesn't carry any dependency it is
1106  // profitable to move this to outer position.
1107  for (auto &Row : DepMatrix) {
1108  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1109  return false;
1110  // TODO: We need to improve this heuristic.
1111  if (Row[OuterLoopId] != '=')
1112  return false;
1113  }
1114  // If outer loop has dependence and inner loop is loop independent then it is
1115  // profitable to interchange to enable parallelism.
1116  // If there are no dependences, interchanging will not improve anything.
1117  return !DepMatrix.empty();
1118 }
1119 
1120 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1121  unsigned OuterLoopId,
1122  CharMatrix &DepMatrix) {
1123  // TODO: Add better profitability checks.
1124  // e.g
1125  // 1) Construct dependency matrix and move the one with no loop carried dep
1126  // inside to enable vectorization.
1127 
1128  // This is rough cost estimation algorithm. It counts the good and bad order
1129  // of induction variables in the instruction and allows reordering if number
1130  // of bad orders is more than good.
1131  int Cost = getInstrOrderCost();
1132  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1133  if (Cost < -LoopInterchangeCostThreshold)
1134  return true;
1135 
1136  // It is not profitable as per current cache profitability model. But check if
1137  // we can move this loop outside to improve parallelism.
1138  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1139  return true;
1140 
1141  ORE->emit([&]() {
1142  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1143  InnerLoop->getStartLoc(),
1144  InnerLoop->getHeader())
1145  << "Interchanging loops is too costly (cost="
1146  << ore::NV("Cost", Cost) << ", threshold="
1147  << ore::NV("Threshold", LoopInterchangeCostThreshold)
1148  << ") and it does not improve parallelism.";
1149  });
1150  return false;
1151 }
1152 
1153 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1154  Loop *InnerLoop) {
1155  for (Loop *L : *OuterLoop)
1156  if (L == InnerLoop) {
1157  OuterLoop->removeChildLoop(L);
1158  return;
1159  }
1160  llvm_unreachable("Couldn't find loop");
1161 }
1162 
1163 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1164 /// new inner and outer loop after interchanging: NewInner is the original
1165 /// outer loop and NewOuter is the original inner loop.
1166 ///
1167 /// Before interchanging, we have the following structure
1168 /// Outer preheader
1169 // Outer header
1170 // Inner preheader
1171 // Inner header
1172 // Inner body
1173 // Inner latch
1174 // outer bbs
1175 // Outer latch
1176 //
1177 // After interchanging:
1178 // Inner preheader
1179 // Inner header
1180 // Outer preheader
1181 // Outer header
1182 // Inner body
1183 // outer bbs
1184 // Outer latch
1185 // Inner latch
1186 void LoopInterchangeTransform::restructureLoops(
1187  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1188  BasicBlock *OrigOuterPreHeader) {
1189  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1190  // The original inner loop preheader moves from the new inner loop to
1191  // the parent loop, if there is one.
1192  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1193  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1194 
1195  // Switch the loop levels.
1196  if (OuterLoopParent) {
1197  // Remove the loop from its parent loop.
1198  removeChildLoop(OuterLoopParent, NewInner);
1199  removeChildLoop(NewInner, NewOuter);
1200  OuterLoopParent->addChildLoop(NewOuter);
1201  } else {
1202  removeChildLoop(NewInner, NewOuter);
1203  LI->changeTopLevelLoop(NewInner, NewOuter);
1204  }
1205  while (!NewOuter->isInnermost())
1206  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1207  NewOuter->addChildLoop(NewInner);
1208 
1209  // BBs from the original inner loop.
1210  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1211 
1212  // Add BBs from the original outer loop to the original inner loop (excluding
1213  // BBs already in inner loop)
1214  for (BasicBlock *BB : NewInner->blocks())
1215  if (LI->getLoopFor(BB) == NewInner)
1216  NewOuter->addBlockEntry(BB);
1217 
1218  // Now remove inner loop header and latch from the new inner loop and move
1219  // other BBs (the loop body) to the new inner loop.
1220  BasicBlock *OuterHeader = NewOuter->getHeader();
1221  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1222  for (BasicBlock *BB : OrigInnerBBs) {
1223  // Nothing will change for BBs in child loops.
1224  if (LI->getLoopFor(BB) != NewOuter)
1225  continue;
1226  // Remove the new outer loop header and latch from the new inner loop.
1227  if (BB == OuterHeader || BB == OuterLatch)
1228  NewInner->removeBlockFromLoop(BB);
1229  else
1230  LI->changeLoopFor(BB, NewInner);
1231  }
1232 
1233  // The preheader of the original outer loop becomes part of the new
1234  // outer loop.
1235  NewOuter->addBlockEntry(OrigOuterPreHeader);
1236  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1237 
1238  // Tell SE that we move the loops around.
1239  SE->forgetLoop(NewOuter);
1240  SE->forgetLoop(NewInner);
1241 }
1242 
1244  bool Transformed = false;
1245  Instruction *InnerIndexVar;
1246 
1247  if (InnerLoop->getSubLoops().empty()) {
1248  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1249  LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1250  PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1251  if (!InductionPHI) {
1252  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1253  return false;
1254  }
1255 
1256  if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1257  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1258  else
1259  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1260 
1261  // Ensure that InductionPHI is the first Phi node.
1262  if (&InductionPHI->getParent()->front() != InductionPHI)
1263  InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1264 
1265  // Create a new latch block for the inner loop. We split at the
1266  // current latch's terminator and then move the condition and all
1267  // operands that are not either loop-invariant or the induction PHI into the
1268  // new latch block.
1269  BasicBlock *NewLatch =
1270  SplitBlock(InnerLoop->getLoopLatch(),
1271  InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1272 
1274  unsigned i = 0;
1275  auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
1276  for (; i < WorkList.size(); i++) {
1277  // Duplicate instruction and move it the new latch. Update uses that
1278  // have been moved.
1279  Instruction *NewI = WorkList[i]->clone();
1280  NewI->insertBefore(NewLatch->getFirstNonPHI());
1281  assert(!NewI->mayHaveSideEffects() &&
1282  "Moving instructions with side-effects may change behavior of "
1283  "the loop nest!");
1284  for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1285  Instruction *UserI = cast<Instruction>(U.getUser());
1286  if (!InnerLoop->contains(UserI->getParent()) ||
1287  UserI->getParent() == NewLatch || UserI == InductionPHI)
1288  U.set(NewI);
1289  }
1290  // Add operands of moved instruction to the worklist, except if they are
1291  // outside the inner loop or are the induction PHI.
1292  for (Value *Op : WorkList[i]->operands()) {
1293  Instruction *OpI = dyn_cast<Instruction>(Op);
1294  if (!OpI ||
1295  this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1296  OpI == InductionPHI)
1297  continue;
1298  WorkList.insert(OpI);
1299  }
1300  }
1301  };
1302 
1303  // FIXME: Should we interchange when we have a constant condition?
1304  Instruction *CondI = dyn_cast<Instruction>(
1305  cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1306  ->getCondition());
1307  if (CondI)
1308  WorkList.insert(CondI);
1309  MoveInstructions();
1310  WorkList.insert(cast<Instruction>(InnerIndexVar));
1311  MoveInstructions();
1312 
1313  // Splits the inner loops phi nodes out into a separate basic block.
1314  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1315  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1316  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1317  }
1318 
1319  // Instructions in the original inner loop preheader may depend on values
1320  // defined in the outer loop header. Move them there, because the original
1321  // inner loop preheader will become the entry into the interchanged loop nest.
1322  // Currently we move all instructions and rely on LICM to move invariant
1323  // instructions outside the loop nest.
1324  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1325  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1326  if (InnerLoopPreHeader != OuterLoopHeader) {
1327  SmallPtrSet<Instruction *, 4> NeedsMoving;
1328  for (Instruction &I :
1329  make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1330  std::prev(InnerLoopPreHeader->end()))))
1331  I.moveBefore(OuterLoopHeader->getTerminator());
1332  }
1333 
1334  Transformed |= adjustLoopLinks();
1335  if (!Transformed) {
1336  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1337  return false;
1338  }
1339 
1340  return true;
1341 }
1342 
1343 /// \brief Move all instructions except the terminator from FromBB right before
1344 /// InsertBefore
1345 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1346  auto &ToList = InsertBefore->getParent()->getInstList();
1347  auto &FromList = FromBB->getInstList();
1348 
1349  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1350  FromBB->getTerminator()->getIterator());
1351 }
1352 
1353 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1354 static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1355  // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1356  // from BB1 afterwards.
1357  auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1358  SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1359  for (Instruction *I : TempInstrs)
1360  I->removeFromParent();
1361 
1362  // Move instructions from BB2 to BB1.
1363  moveBBContents(BB2, BB1->getTerminator());
1364 
1365  // Move instructions from TempInstrs to BB2.
1366  for (Instruction *I : TempInstrs)
1367  I->insertBefore(BB2->getTerminator());
1368 }
1369 
1370 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1371 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1372 // \p OldBB is exactly once in BI's successor list.
1373 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1374  BasicBlock *NewBB,
1375  std::vector<DominatorTree::UpdateType> &DTUpdates,
1376  bool MustUpdateOnce = true) {
1377  assert((!MustUpdateOnce ||
1379  [OldBB](BasicBlock *BB) {
1380  return BB == OldBB;
1381  }) == 1) && "BI must jump to OldBB exactly once.");
1382  bool Changed = false;
1383  for (Use &Op : BI->operands())
1384  if (Op == OldBB) {
1385  Op.set(NewBB);
1386  Changed = true;
1387  }
1388 
1389  if (Changed) {
1390  DTUpdates.push_back(
1392  DTUpdates.push_back(
1394  }
1395  assert(Changed && "Expected a successor to be updated");
1396 }
1397 
1398 // Move Lcssa PHIs to the right place.
1399 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1400  BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1401  BasicBlock *OuterLatch, BasicBlock *OuterExit,
1402  Loop *InnerLoop, LoopInfo *LI) {
1403 
1404  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1405  // defined either in the header or latch. Those blocks will become header and
1406  // latch of the new outer loop, and the only possible users can PHI nodes
1407  // in the exit block of the loop nest or the outer loop header (reduction
1408  // PHIs, in that case, the incoming value must be defined in the inner loop
1409  // header). We can just substitute the user with the incoming value and remove
1410  // the PHI.
1411  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1412  assert(P.getNumIncomingValues() == 1 &&
1413  "Only loops with a single exit are supported!");
1414 
1415  // Incoming values are guaranteed be instructions currently.
1416  auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1417  // Skip phis with incoming values from the inner loop body, excluding the
1418  // header and latch.
1419  if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1420  continue;
1421 
1422  assert(all_of(P.users(),
1423  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1424  return (cast<PHINode>(U)->getParent() == OuterHeader &&
1425  IncI->getParent() == InnerHeader) ||
1426  cast<PHINode>(U)->getParent() == OuterExit;
1427  }) &&
1428  "Can only replace phis iff the uses are in the loop nest exit or "
1429  "the incoming value is defined in the inner header (it will "
1430  "dominate all loop blocks after interchanging)");
1431  P.replaceAllUsesWith(IncI);
1432  P.eraseFromParent();
1433  }
1434 
1435  SmallVector<PHINode *, 8> LcssaInnerExit;
1436  for (PHINode &P : InnerExit->phis())
1437  LcssaInnerExit.push_back(&P);
1438 
1439  SmallVector<PHINode *, 8> LcssaInnerLatch;
1440  for (PHINode &P : InnerLatch->phis())
1441  LcssaInnerLatch.push_back(&P);
1442 
1443  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1444  // If a PHI node has users outside of InnerExit, it has a use outside the
1445  // interchanged loop and we have to preserve it. We move these to
1446  // InnerLatch, which will become the new exit block for the innermost
1447  // loop after interchanging.
1448  for (PHINode *P : LcssaInnerExit)
1449  P->moveBefore(InnerLatch->getFirstNonPHI());
1450 
1451  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1452  // and we have to move them to the new inner latch.
1453  for (PHINode *P : LcssaInnerLatch)
1454  P->moveBefore(InnerExit->getFirstNonPHI());
1455 
1456  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1457  // incoming values defined in the outer loop, we have to add a new PHI
1458  // in the inner loop latch, which became the exit block of the outer loop,
1459  // after interchanging.
1460  if (OuterExit) {
1461  for (PHINode &P : OuterExit->phis()) {
1462  if (P.getNumIncomingValues() != 1)
1463  continue;
1464  // Skip Phis with incoming values defined in the inner loop. Those should
1465  // already have been updated.
1466  auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1467  if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1468  continue;
1469 
1470  PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1471  NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1472  NewPhi->setIncomingBlock(0, OuterLatch);
1473  NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1474  P.setIncomingValue(0, NewPhi);
1475  }
1476  }
1477 
1478  // Now adjust the incoming blocks for the LCSSA PHIs.
1479  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1480  // with the new latch.
1481  InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1482 }
1483 
1484 bool LoopInterchangeTransform::adjustLoopBranches() {
1485  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1486  std::vector<DominatorTree::UpdateType> DTUpdates;
1487 
1488  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1489  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1490 
1491  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1492  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1493  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1494  // Ensure that both preheaders do not contain PHI nodes and have single
1495  // predecessors. This allows us to move them easily. We use
1496  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1497  // preheaders do not satisfy those conditions.
1498  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1499  !OuterLoopPreHeader->getUniquePredecessor())
1500  OuterLoopPreHeader =
1501  InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1502  if (InnerLoopPreHeader == OuterLoop->getHeader())
1503  InnerLoopPreHeader =
1504  InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1505 
1506  // Adjust the loop preheader
1507  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1508  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1509  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1510  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1511  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1512  BasicBlock *InnerLoopLatchPredecessor =
1513  InnerLoopLatch->getUniquePredecessor();
1514  BasicBlock *InnerLoopLatchSuccessor;
1515  BasicBlock *OuterLoopLatchSuccessor;
1516 
1517  BranchInst *OuterLoopLatchBI =
1518  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1519  BranchInst *InnerLoopLatchBI =
1520  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1521  BranchInst *OuterLoopHeaderBI =
1522  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1523  BranchInst *InnerLoopHeaderBI =
1524  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1525 
1526  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1527  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1528  !InnerLoopHeaderBI)
1529  return false;
1530 
1531  BranchInst *InnerLoopLatchPredecessorBI =
1532  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1533  BranchInst *OuterLoopPredecessorBI =
1534  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1535 
1536  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1537  return false;
1538  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1539  if (!InnerLoopHeaderSuccessor)
1540  return false;
1541 
1542  // Adjust Loop Preheader and headers.
1543  // The branches in the outer loop predecessor and the outer loop header can
1544  // be unconditional branches or conditional branches with duplicates. Consider
1545  // this when updating the successors.
1546  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1547  InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1548  // The outer loop header might or might not branch to the outer latch.
1549  // We are guaranteed to branch to the inner loop preheader.
1550  if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1551  // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1552  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1553  DTUpdates,
1554  /*MustUpdateOnce=*/false);
1555  }
1556  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1557  InnerLoopHeaderSuccessor, DTUpdates,
1558  /*MustUpdateOnce=*/false);
1559 
1560  // Adjust reduction PHI's now that the incoming block has changed.
1561  InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1562  OuterLoopHeader);
1563 
1564  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1565  OuterLoopPreHeader, DTUpdates);
1566 
1567  // -------------Adjust loop latches-----------
1568  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1569  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1570  else
1571  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1572 
1573  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1574  InnerLoopLatchSuccessor, DTUpdates);
1575 
1576 
1577  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1578  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1579  else
1580  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1581 
1582  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1583  OuterLoopLatchSuccessor, DTUpdates);
1584  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1585  DTUpdates);
1586 
1587  DT->applyUpdates(DTUpdates);
1588  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1589  OuterLoopPreHeader);
1590 
1591  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1592  OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1593  InnerLoop, LI);
1594  // For PHIs in the exit block of the outer loop, outer's latch has been
1595  // replaced by Inners'.
1596  OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1597 
1598  // Now update the reduction PHIs in the inner and outer loop headers.
1599  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1600  for (PHINode &PHI : drop_begin(InnerLoopHeader->phis()))
1601  InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1602  for (PHINode &PHI : drop_begin(OuterLoopHeader->phis()))
1603  OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
1604 
1605  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1606  (void)OuterInnerReductions;
1607 
1608  // Now move the remaining reduction PHIs from outer to inner loop header and
1609  // vice versa. The PHI nodes must be part of a reduction across the inner and
1610  // outer loop and all the remains to do is and updating the incoming blocks.
1611  for (PHINode *PHI : OuterLoopPHIs) {
1612  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1613  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1614  }
1615  for (PHINode *PHI : InnerLoopPHIs) {
1616  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1617  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1618  }
1619 
1620  // Update the incoming blocks for moved PHI nodes.
1621  OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1622  OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1623  InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1624  InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1625 
1626  // Values defined in the outer loop header could be used in the inner loop
1627  // latch. In that case, we need to create LCSSA phis for them, because after
1628  // interchanging they will be defined in the new inner loop and used in the
1629  // new outer loop.
1630  IRBuilder<> Builder(OuterLoopHeader->getContext());
1631  SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1632  for (Instruction &I :
1633  make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1634  MayNeedLCSSAPhis.push_back(&I);
1635  formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1636 
1637  return true;
1638 }
1639 
1640 bool LoopInterchangeTransform::adjustLoopLinks() {
1641  // Adjust all branches in the inner and outer loop.
1642  bool Changed = adjustLoopBranches();
1643  if (Changed) {
1644  // We have interchanged the preheaders so we need to interchange the data in
1645  // the preheaders as well. This is because the content of the inner
1646  // preheader was previously executed inside the outer loop.
1647  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1648  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1649  swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1650  }
1651  return Changed;
1652 }
1653 
1654 /// Main LoopInterchange Pass.
1656  static char ID;
1657 
1660  }
1661 
1662  void getAnalysisUsage(AnalysisUsage &AU) const override {
1665 
1667  }
1668 
1669  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1670  if (skipLoop(L))
1671  return false;
1672 
1673  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1674  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1675  auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1676  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1677  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1678 
1679  return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1680  }
1681 };
1682 
1684 
1686  "Interchanges loops for cache reuse", false, false)
1690 
1692  "Interchanges loops for cache reuse", false, false)
1693 
1695  return new LoopInterchangeLegacyPass();
1696 }
1697 
1699  LoopAnalysisManager &AM,
1701  LPMUpdater &U) {
1702  Function &F = *LN.getParent();
1703 
1704  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1706  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1707  return PreservedAnalyses::all();
1709 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::initializeLoopInterchangeLegacyPassPass
void initializeLoopInterchangeLegacyPassPass(PassRegistry &)
isLegalToInterChangeLoops
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Definition: LoopInterchange.cpp:257
moveBBContents
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
Definition: LoopInterchange.cpp:1345
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
followLCSSA
static Value * followLCSSA(Value *SV)
Definition: LoopInterchange.cpp:664
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:729
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:63
llvm
Definition: AllocatorList.h:23
llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:138
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:275
llvm::BranchInst::successors
iterator_range< succ_op_iterator > successors()
Definition: Instructions.h:3115
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2656
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:647
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:378
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:979
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::cfg::UpdateKind::Insert
@ Insert
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopInterchange.cpp:56
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector< Loop *, 8 >
Statistic.h
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:195
llvm::Dependence::DVEntry::EQ
@ EQ
Definition: DependenceAnalysis.h:91
ErrorHandling.h
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:626
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1002
moveLCSSAPhis
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
Definition: LoopInterchange.cpp:1399
OptimizationRemarkEmitter.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
isOuterMostDepPositive
static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:196
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::cfg::UpdateKind::Delete
@ Delete
interchange
loop interchange
Definition: LoopInterchange.cpp:1691
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
findInnerReductionPhi
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
Definition: LoopInterchange.cpp:675
llvm::SmallPtrSet< PHINode *, 4 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
LoopInterchangeLegacyPass::LoopInterchangeLegacyPass
LoopInterchangeLegacyPass()
Definition: LoopInterchange.cpp:1658
llvm::codeview::VFTableSlotKind::Outer
@ Outer
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
LoopInterchangeLegacyPass
Main LoopInterchange Pass.
Definition: LoopInterchange.cpp:1655
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:56
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2669
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1595
loops
loops
Definition: LoopInfo.cpp:1097
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:302
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::Dependence::DVEntry::GE
@ GE
Definition: DependenceAnalysis.h:95
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:267
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::createLoopInterchangePass
Pass * createLoopInterchangePass()
Definition: LoopInterchange.cpp:1694
Constants.h
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2666
areInnerLoopExitPHIsSupported
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
Definition: LoopInterchange.cpp:911
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::LoopNest::skipEmptyBlockUntil
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
Definition: LoopNestAnalysis.cpp:208
llvm::User
Definition: User.h:44
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
MaxLoopNestDepth
static const unsigned MaxLoopNestDepth
Definition: LoopInterchange.cpp:77
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
swapBBContents
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
Definition: LoopInterchange.cpp:1354
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
populateWorklist
static LoopVector populateWorklist(Loop &L)
Definition: LoopInterchange.cpp:273
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
false
Definition: StackSlotColoring.cpp:142
populateDependencyMatrix
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI)
Definition: LoopInterchange.cpp:89
llvm::Instruction
Definition: Instruction.h:45
isProfitableForVectorization
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
Definition: LoopInterchange.cpp:1101
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:75
LoopInterchangeCostThreshold
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchangeLegacyPass
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
llvm::LoopBase::removeBlockFromLoop
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
Definition: LoopInfo.h:460
Utils.h
getInductionVariable
static PHINode * getInductionVariable(Loop *L, ScalarEvolution *SE)
Definition: LoopInterchange.cpp:295
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2662
Type.h
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:698
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3885
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2680
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:123
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::LoopPass
Definition: LoopPass.h:27
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:243
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2705
containsNoDependence
static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:209
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::Dependence::DVEntry::GT
@ GT
Definition: DependenceAnalysis.h:93
LoopInterchangeLegacyPass::ID
static char ID
Definition: LoopInterchange.cpp:1656
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:964
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:581
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::Dependence::DVEntry::LE
@ LE
Definition: DependenceAnalysis.h:92
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1570
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
LoopInterchangeLegacyPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoopInterchange.cpp:1662
validDepInterchange
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep)
Definition: LoopInterchange.cpp:219
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::Dependence::DVEntry::LT
@ LT
Definition: DependenceAnalysis.h:90
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:629
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
LoopNestAnalysis.h
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
LoopInterchange.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:821
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=false) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:100
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LoopPass.h
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:149
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1602
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:143
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
areOuterLoopExitPHIsSupported
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:936
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1184
llvm::BasicBlock::replacePhiUsesWith
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:440
llvm::map_range
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:311
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:138
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7173
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:363
interChangeDependencies
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
Definition: LoopInterchange.cpp:188
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:454
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
reuse
loop Interchanges loops for cache reuse
Definition: LoopInterchange.cpp:1692
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Direction
Loop::LoopBounds::Direction Direction
Definition: LoopInfo.cpp:229
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3751
llvm::LoopBase::addBlockEntry
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:423
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
updateSuccessor
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
Definition: LoopInterchange.cpp:1373
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:99
ScalarEvolutionExpressions.h
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:66
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3481
llvm::formLCSSAForInstructions
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:79
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::ConstantInt::isNegative
bool isNegative() const
Definition: Constants.h:187
SmallVector.h
User.h
llvm::LoopInfoBase::changeTopLevelLoop
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
Definition: LoopInfo.h:1012
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2686
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::PHINode
Definition: Instructions.h:2572
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::LoopInterchangePass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopInterchange.cpp:1698
llvm::SmallPtrSetImpl< PHINode * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
LoopInterchangeLegacyPass::runOnLoop
bool runOnLoop(Loop *L, LPPassManager &LPM) override
Definition: LoopInterchange.cpp:1669
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
MaxMemInstrCount
static const unsigned MaxMemInstrCount
Definition: LoopInterchange.cpp:74
llvm::cl::desc
Definition: CommandLine.h:411
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7005
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:820
Value.h
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:27
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3098
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:369
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38