LLVM  14.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/LoopPass.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/User.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Debug.h"
46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
50 #include <cassert>
51 #include <utility>
52 #include <vector>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "loop-interchange"
57 
58 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
59 
61  "loop-interchange-threshold", cl::init(0), cl::Hidden,
62  cl::desc("Interchange if you gain more than this number"));
63 
64 namespace {
65 
66 using LoopVector = SmallVector<Loop *, 8>;
67 
68 // TODO: Check if we can use a sparse matrix here.
69 using CharMatrix = std::vector<std::vector<char>>;
70 
71 } // end anonymous namespace
72 
73 // Maximum number of dependencies that can be handled in the dependency matrix.
74 static const unsigned MaxMemInstrCount = 100;
75 
76 // Maximum loop depth supported.
77 static const unsigned MaxLoopNestDepth = 10;
78 
79 #ifdef DUMP_DEP_MATRICIES
80 static void printDepMatrix(CharMatrix &DepMatrix) {
81  for (auto &Row : DepMatrix) {
82  for (auto D : Row)
83  LLVM_DEBUG(dbgs() << D << " ");
84  LLVM_DEBUG(dbgs() << "\n");
85  }
86 }
87 #endif
88 
89 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90  Loop *L, DependenceInfo *DI) {
91  using ValueVector = SmallVector<Value *, 16>;
92 
93  ValueVector MemInstr;
94 
95  // For each block.
96  for (BasicBlock *BB : L->blocks()) {
97  // Scan the BB and collect legal loads and stores.
98  for (Instruction &I : *BB) {
99  if (!isa<Instruction>(I))
100  return false;
101  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
102  if (!Ld->isSimple())
103  return false;
104  MemInstr.push_back(&I);
105  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
106  if (!St->isSimple())
107  return false;
108  MemInstr.push_back(&I);
109  }
110  }
111  }
112 
113  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
114  << " Loads and Stores to analyze\n");
115 
116  ValueVector::iterator I, IE, J, JE;
117 
118  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
119  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
120  std::vector<char> Dep;
121  Instruction *Src = cast<Instruction>(*I);
122  Instruction *Dst = cast<Instruction>(*J);
123  if (Src == Dst)
124  continue;
125  // Ignore Input dependencies.
126  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
127  continue;
128  // Track Output, Flow, and Anti dependencies.
129  if (auto D = DI->depends(Src, Dst, true)) {
130  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
131  LLVM_DEBUG(StringRef DepType =
132  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
133  dbgs() << "Found " << DepType
134  << " dependency between Src and Dst\n"
135  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
136  unsigned Levels = D->getLevels();
137  char Direction;
138  for (unsigned II = 1; II <= Levels; ++II) {
139  const SCEV *Distance = D->getDistance(II);
140  const SCEVConstant *SCEVConst =
141  dyn_cast_or_null<SCEVConstant>(Distance);
142  if (SCEVConst) {
143  const ConstantInt *CI = SCEVConst->getValue();
144  if (CI->isNegative())
145  Direction = '<';
146  else if (CI->isZero())
147  Direction = '=';
148  else
149  Direction = '>';
150  Dep.push_back(Direction);
151  } else if (D->isScalar(II)) {
152  Direction = 'S';
153  Dep.push_back(Direction);
154  } else {
155  unsigned Dir = D->getDirection(II);
156  if (Dir == Dependence::DVEntry::LT ||
158  Direction = '<';
159  else if (Dir == Dependence::DVEntry::GT ||
161  Direction = '>';
162  else if (Dir == Dependence::DVEntry::EQ)
163  Direction = '=';
164  else
165  Direction = '*';
166  Dep.push_back(Direction);
167  }
168  }
169  while (Dep.size() != Level) {
170  Dep.push_back('I');
171  }
172 
173  DepMatrix.push_back(Dep);
174  if (DepMatrix.size() > MaxMemInstrCount) {
175  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
176  << " dependencies inside loop\n");
177  return false;
178  }
179  }
180  }
181  }
182 
183  return true;
184 }
185 
186 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
187 // matrix by exchanging the two columns.
188 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
189  unsigned ToIndx) {
190  for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
191  std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
192 }
193 
194 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
195 // '>'
196 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
197  unsigned Column) {
198  for (unsigned i = 0; i <= Column; ++i) {
199  if (DepMatrix[Row][i] == '<')
200  return false;
201  if (DepMatrix[Row][i] == '>')
202  return true;
203  }
204  // All dependencies were '=','S' or 'I'
205  return false;
206 }
207 
208 // Checks if no dependence exist in the dependency matrix in Row before Column.
209 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
210  unsigned Column) {
211  for (unsigned i = 0; i < Column; ++i) {
212  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
213  DepMatrix[Row][i] != 'I')
214  return false;
215  }
216  return true;
217 }
218 
219 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
220  unsigned OuterLoopId, char InnerDep,
221  char OuterDep) {
222  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
223  return false;
224 
225  if (InnerDep == OuterDep)
226  return true;
227 
228  // It is legal to interchange if and only if after interchange no row has a
229  // '>' direction as the leftmost non-'='.
230 
231  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
232  return true;
233 
234  if (InnerDep == '<')
235  return true;
236 
237  if (InnerDep == '>') {
238  // If OuterLoopId represents outermost loop then interchanging will make the
239  // 1st dependency as '>'
240  if (OuterLoopId == 0)
241  return false;
242 
243  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
244  // interchanging will result in this row having an outermost non '='
245  // dependency of '>'
246  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
247  return true;
248  }
249 
250  return false;
251 }
252 
253 // Checks if it is legal to interchange 2 loops.
254 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
255 // if the direction matrix, after the same permutation is applied to its
256 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
257 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258  unsigned InnerLoopId,
259  unsigned OuterLoopId) {
260  unsigned NumRows = DepMatrix.size();
261  // For each row check if it is valid to interchange.
262  for (unsigned Row = 0; Row < NumRows; ++Row) {
263  char InnerDep = DepMatrix[Row][InnerLoopId];
264  char OuterDep = DepMatrix[Row][OuterLoopId];
265  if (InnerDep == '*' || OuterDep == '*')
266  return false;
267  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
268  return false;
269  }
270  return true;
271 }
272 
273 static LoopVector populateWorklist(Loop &L) {
274  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
275  << L.getHeader()->getParent()->getName() << " Loop: %"
276  << L.getHeader()->getName() << '\n');
277  LoopVector LoopList;
278  Loop *CurrentLoop = &L;
279  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280  while (!Vec->empty()) {
281  // The current loop has multiple subloops in it hence it is not tightly
282  // nested.
283  // Discard all loops above it added into Worklist.
284  if (Vec->size() != 1)
285  return {};
286 
287  LoopList.push_back(CurrentLoop);
288  CurrentLoop = Vec->front();
289  Vec = &CurrentLoop->getSubLoops();
290  }
291  LoopList.push_back(CurrentLoop);
292  return LoopList;
293 }
294 
295 namespace {
296 
297 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
298 class LoopInterchangeLegality {
299 public:
300  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
302  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
303 
304  /// Check if the loops can be interchanged.
305  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
306  CharMatrix &DepMatrix);
307 
308  /// Discover induction PHIs in the header of \p L. Induction
309  /// PHIs are added to \p Inductions.
310  bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
311 
312  /// Check if the loop structure is understood. We do not handle triangular
313  /// loops for now.
314  bool isLoopStructureUnderstood();
315 
316  bool currentLimitations();
317 
318  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
319  return OuterInnerReductions;
320  }
321 
322  const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
323  return InnerLoopInductions;
324  }
325 
326 private:
327  bool tightlyNested(Loop *Outer, Loop *Inner);
328  bool containsUnsafeInstructions(BasicBlock *BB);
329 
330  /// Discover induction and reduction PHIs in the header of \p L. Induction
331  /// PHIs are added to \p Inductions, reductions are added to
332  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
333  /// to be passed as \p InnerLoop.
334  bool findInductionAndReductions(Loop *L,
335  SmallVector<PHINode *, 8> &Inductions,
336  Loop *InnerLoop);
337 
338  Loop *OuterLoop;
339  Loop *InnerLoop;
340 
341  ScalarEvolution *SE;
342 
343  /// Interface to emit optimization remarks.
345 
346  /// Set of reduction PHIs taking part of a reduction across the inner and
347  /// outer loop.
348  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
349 
350  /// Set of inner loop induction PHIs
351  SmallVector<PHINode *, 8> InnerLoopInductions;
352 };
353 
354 /// LoopInterchangeProfitability checks if it is profitable to interchange the
355 /// loop.
356 class LoopInterchangeProfitability {
357 public:
358  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
360  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
361 
362  /// Check if the loop interchange is profitable.
363  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
364  CharMatrix &DepMatrix);
365 
366 private:
367  int getInstrOrderCost();
368 
369  Loop *OuterLoop;
370  Loop *InnerLoop;
371 
372  /// Scev analysis.
373  ScalarEvolution *SE;
374 
375  /// Interface to emit optimization remarks.
377 };
378 
379 /// LoopInterchangeTransform interchanges the loop.
380 class LoopInterchangeTransform {
381 public:
382  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
383  LoopInfo *LI, DominatorTree *DT,
384  const LoopInterchangeLegality &LIL)
385  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
386 
387  /// Interchange OuterLoop and InnerLoop.
388  bool transform();
389  void restructureLoops(Loop *NewInner, Loop *NewOuter,
390  BasicBlock *OrigInnerPreHeader,
391  BasicBlock *OrigOuterPreHeader);
392  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
393 
394 private:
395  bool adjustLoopLinks();
396  bool adjustLoopBranches();
397 
398  Loop *OuterLoop;
399  Loop *InnerLoop;
400 
401  /// Scev analysis.
402  ScalarEvolution *SE;
403 
404  LoopInfo *LI;
405  DominatorTree *DT;
406 
407  const LoopInterchangeLegality &LIL;
408 };
409 
410 struct LoopInterchange {
411  ScalarEvolution *SE = nullptr;
412  LoopInfo *LI = nullptr;
413  DependenceInfo *DI = nullptr;
414  DominatorTree *DT = nullptr;
415 
416  /// Interface to emit optimization remarks.
418 
419  LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
421  : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
422 
423  bool run(Loop *L) {
424  if (L->getParentLoop())
425  return false;
426 
427  return processLoopList(populateWorklist(*L));
428  }
429 
430  bool run(LoopNest &LN) {
431  const auto &LoopList = LN.getLoops();
432  for (unsigned I = 1; I < LoopList.size(); ++I)
433  if (LoopList[I]->getParentLoop() != LoopList[I - 1])
434  return false;
435  return processLoopList(LoopList);
436  }
437 
438  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
439  for (Loop *L : LoopList) {
440  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
441  if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
442  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
443  return false;
444  }
445  if (L->getNumBackEdges() != 1) {
446  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
447  return false;
448  }
449  if (!L->getExitingBlock()) {
450  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
451  return false;
452  }
453  }
454  return true;
455  }
456 
457  unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
458  // TODO: Add a better heuristic to select the loop to be interchanged based
459  // on the dependence matrix. Currently we select the innermost loop.
460  return LoopList.size() - 1;
461  }
462 
463  bool processLoopList(ArrayRef<Loop *> LoopList) {
464  bool Changed = false;
465  unsigned LoopNestDepth = LoopList.size();
466  if (LoopNestDepth < 2) {
467  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
468  return false;
469  }
470  if (LoopNestDepth > MaxLoopNestDepth) {
471  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
472  << MaxLoopNestDepth << "\n");
473  return false;
474  }
475  if (!isComputableLoopNest(LoopList)) {
476  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
477  return false;
478  }
479 
480  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
481  << "\n");
482 
483  CharMatrix DependencyMatrix;
484  Loop *OuterMostLoop = *(LoopList.begin());
485  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
486  OuterMostLoop, DI)) {
487  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
488  return false;
489  }
490 #ifdef DUMP_DEP_MATRICIES
491  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
492  printDepMatrix(DependencyMatrix);
493 #endif
494 
495  // Get the Outermost loop exit.
496  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
497  if (!LoopNestExit) {
498  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
499  return false;
500  }
501 
502  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
503  // Move the selected loop outwards to the best possible position.
504  Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
505  for (unsigned i = SelecLoopId; i > 0; i--) {
506  bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
507  i - 1, DependencyMatrix);
508  if (!Interchanged)
509  return Changed;
510  // Update the DependencyMatrix
511  interChangeDependencies(DependencyMatrix, i, i - 1);
512 #ifdef DUMP_DEP_MATRICIES
513  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
514  printDepMatrix(DependencyMatrix);
515 #endif
516  Changed |= Interchanged;
517  }
518  return Changed;
519  }
520 
521  bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
522  unsigned OuterLoopId,
523  std::vector<std::vector<char>> &DependencyMatrix) {
524  LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
525  << " and OuterLoopId = " << OuterLoopId << "\n");
526  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
527  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
528  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
529  return false;
530  }
531  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
532  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
533  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
534  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
535  return false;
536  }
537 
538  ORE->emit([&]() {
539  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
540  InnerLoop->getStartLoc(),
541  InnerLoop->getHeader())
542  << "Loop interchanged with enclosing loop.";
543  });
544 
545  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
546  LIT.transform();
547  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
548  LoopsInterchanged++;
549 
550  assert(InnerLoop->isLCSSAForm(*DT) &&
551  "Inner loop not left in LCSSA form after loop interchange!");
552  assert(OuterLoop->isLCSSAForm(*DT) &&
553  "Outer loop not left in LCSSA form after loop interchange!");
554 
555  return true;
556  }
557 };
558 
559 } // end anonymous namespace
560 
561 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
562  return any_of(*BB, [](const Instruction &I) {
563  return I.mayHaveSideEffects() || I.mayReadFromMemory();
564  });
565 }
566 
567 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
568  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
569  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
570  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
571 
572  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
573 
574  // A perfectly nested loop will not have any branch in between the outer and
575  // inner block i.e. outer header will branch to either inner preheader and
576  // outerloop latch.
577  BranchInst *OuterLoopHeaderBI =
578  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
579  if (!OuterLoopHeaderBI)
580  return false;
581 
582  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
583  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
584  Succ != OuterLoopLatch)
585  return false;
586 
587  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
588  // We do not have any basic block in between now make sure the outer header
589  // and outer loop latch doesn't contain any unsafe instructions.
590  if (containsUnsafeInstructions(OuterLoopHeader) ||
591  containsUnsafeInstructions(OuterLoopLatch))
592  return false;
593 
594  // Also make sure the inner loop preheader does not contain any unsafe
595  // instructions. Note that all instructions in the preheader will be moved to
596  // the outer loop header when interchanging.
597  if (InnerLoopPreHeader != OuterLoopHeader &&
598  containsUnsafeInstructions(InnerLoopPreHeader))
599  return false;
600 
601  BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
602  // Ensure the inner loop exit block flows to the outer loop latch possibly
603  // through empty blocks.
604  const BasicBlock &SuccInner =
605  LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
606  if (&SuccInner != OuterLoopLatch) {
607  LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
608  << " does not lead to the outer loop latch.\n";);
609  return false;
610  }
611  // The inner loop exit block does flow to the outer loop latch and not some
612  // other BBs, now make sure it contains safe instructions, since it will be
613  // moved into the (new) inner loop after interchange.
614  if (containsUnsafeInstructions(InnerLoopExit))
615  return false;
616 
617  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
618  // We have a perfect loop nest.
619  return true;
620 }
621 
622 bool LoopInterchangeLegality::isLoopStructureUnderstood() {
623  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
624  for (PHINode *InnerInduction : InnerLoopInductions) {
625  unsigned Num = InnerInduction->getNumOperands();
626  for (unsigned i = 0; i < Num; ++i) {
627  Value *Val = InnerInduction->getOperand(i);
628  if (isa<Constant>(Val))
629  continue;
630  Instruction *I = dyn_cast<Instruction>(Val);
631  if (!I)
632  return false;
633  // TODO: Handle triangular loops.
634  // e.g. for(int i=0;i<N;i++)
635  // for(int j=i;j<N;j++)
636  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
637  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
638  InnerLoopPreheader &&
639  !OuterLoop->isLoopInvariant(I)) {
640  return false;
641  }
642  }
643  }
644 
645  // TODO: Handle triangular loops of another form.
646  // e.g. for(int i=0;i<N;i++)
647  // for(int j=0;j<i;j++)
648  // or,
649  // for(int i=0;i<N;i++)
650  // for(int j=0;j*i<N;j++)
651  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
652  BranchInst *InnerLoopLatchBI =
653  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
654  if (!InnerLoopLatchBI->isConditional())
655  return false;
656  if (CmpInst *InnerLoopCmp =
657  dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
658  Value *Op0 = InnerLoopCmp->getOperand(0);
659  Value *Op1 = InnerLoopCmp->getOperand(1);
660 
661  // LHS and RHS of the inner loop exit condition, e.g.,
662  // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
663  Value *Left = nullptr;
664  Value *Right = nullptr;
665 
666  // Check if V only involves inner loop induction variable.
667  // Return true if V is InnerInduction, or a cast from
668  // InnerInduction, or a binary operator that involves
669  // InnerInduction and a constant.
670  std::function<bool(Value *)> IsPathToInnerIndVar;
671  IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
672  if (llvm::is_contained(InnerLoopInductions, V))
673  return true;
674  if (isa<Constant>(V))
675  return true;
676  const Instruction *I = dyn_cast<Instruction>(V);
677  if (!I)
678  return false;
679  if (isa<CastInst>(I))
680  return IsPathToInnerIndVar(I->getOperand(0));
681  if (isa<BinaryOperator>(I))
682  return IsPathToInnerIndVar(I->getOperand(0)) &&
683  IsPathToInnerIndVar(I->getOperand(1));
684  return false;
685  };
686 
687  // In case of multiple inner loop indvars, it is okay if LHS and RHS
688  // are both inner indvar related variables.
689  if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
690  return true;
691 
692  // Otherwise we check if the cmp instruction compares an inner indvar
693  // related variable (Left) with a outer loop invariant (Right).
694  if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
695  Left = Op0;
696  Right = Op1;
697  } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
698  Left = Op1;
699  Right = Op0;
700  }
701 
702  if (Left == nullptr)
703  return false;
704 
705  const SCEV *S = SE->getSCEV(Right);
706  if (!SE->isLoopInvariant(S, OuterLoop))
707  return false;
708  }
709 
710  return true;
711 }
712 
713 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
714 // value.
715 static Value *followLCSSA(Value *SV) {
716  PHINode *PHI = dyn_cast<PHINode>(SV);
717  if (!PHI)
718  return SV;
719 
720  if (PHI->getNumIncomingValues() != 1)
721  return SV;
722  return followLCSSA(PHI->getIncomingValue(0));
723 }
724 
725 // Check V's users to see if it is involved in a reduction in L.
727  // Reduction variables cannot be constants.
728  if (isa<Constant>(V))
729  return nullptr;
730 
731  for (Value *User : V->users()) {
732  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
733  if (PHI->getNumIncomingValues() == 1)
734  continue;
736  if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
737  return PHI;
738  return nullptr;
739  }
740  }
741 
742  return nullptr;
743 }
744 
745 bool LoopInterchangeLegality::findInductionAndReductions(
746  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
747  if (!L->getLoopLatch() || !L->getLoopPredecessor())
748  return false;
749  for (PHINode &PHI : L->getHeader()->phis()) {
752  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
753  Inductions.push_back(&PHI);
754  else {
755  // PHIs in inner loops need to be part of a reduction in the outer loop,
756  // discovered when checking the PHIs of the outer loop earlier.
757  if (!InnerLoop) {
758  if (!OuterInnerReductions.count(&PHI)) {
759  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
760  "across the outer loop.\n");
761  return false;
762  }
763  } else {
764  assert(PHI.getNumIncomingValues() == 2 &&
765  "Phis in loop header should have exactly 2 incoming values");
766  // Check if we have a PHI node in the outer loop that has a reduction
767  // result from the inner loop as an incoming value.
768  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
769  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
770  if (!InnerRedPhi ||
771  !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
772  LLVM_DEBUG(
773  dbgs()
774  << "Failed to recognize PHI as an induction or reduction.\n");
775  return false;
776  }
777  OuterInnerReductions.insert(&PHI);
778  OuterInnerReductions.insert(InnerRedPhi);
779  }
780  }
781  }
782  return true;
783 }
784 
785 // This function indicates the current limitations in the transform as a result
786 // of which we do not proceed.
787 bool LoopInterchangeLegality::currentLimitations() {
788  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
789 
790  // transform currently expects the loop latches to also be the exiting
791  // blocks.
792  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
793  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
794  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
795  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
796  LLVM_DEBUG(
797  dbgs() << "Loops where the latch is not the exiting block are not"
798  << " supported currently.\n");
799  ORE->emit([&]() {
800  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
801  OuterLoop->getStartLoc(),
802  OuterLoop->getHeader())
803  << "Loops where the latch is not the exiting block cannot be"
804  " interchange currently.";
805  });
806  return true;
807  }
808 
809  SmallVector<PHINode *, 8> Inductions;
810  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
811  LLVM_DEBUG(
812  dbgs() << "Only outer loops with induction or reduction PHI nodes "
813  << "are supported currently.\n");
814  ORE->emit([&]() {
815  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
816  OuterLoop->getStartLoc(),
817  OuterLoop->getHeader())
818  << "Only outer loops with induction or reduction PHI nodes can be"
819  " interchanged currently.";
820  });
821  return true;
822  }
823 
824  Inductions.clear();
825  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
826  LLVM_DEBUG(
827  dbgs() << "Only inner loops with induction or reduction PHI nodes "
828  << "are supported currently.\n");
829  ORE->emit([&]() {
830  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
831  InnerLoop->getStartLoc(),
832  InnerLoop->getHeader())
833  << "Only inner loops with induction or reduction PHI nodes can be"
834  " interchange currently.";
835  });
836  return true;
837  }
838 
839  // TODO: Triangular loops are not handled for now.
840  if (!isLoopStructureUnderstood()) {
841  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
842  ORE->emit([&]() {
843  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
844  InnerLoop->getStartLoc(),
845  InnerLoop->getHeader())
846  << "Inner loop structure not understood currently.";
847  });
848  return true;
849  }
850 
851  return false;
852 }
853 
854 bool LoopInterchangeLegality::findInductions(
855  Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
856  for (PHINode &PHI : L->getHeader()->phis()) {
858  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
859  Inductions.push_back(&PHI);
860  }
861  return !Inductions.empty();
862 }
863 
864 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
865 // users are either reduction PHIs or PHIs outside the outer loop (which means
866 // the we are only interested in the final value after the loop).
867 static bool
869  SmallPtrSetImpl<PHINode *> &Reductions) {
870  BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
871  for (PHINode &PHI : InnerExit->phis()) {
872  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
873  if (PHI.getNumIncomingValues() > 1)
874  return false;
875  if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
876  PHINode *PN = dyn_cast<PHINode>(U);
877  return !PN ||
878  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
879  })) {
880  return false;
881  }
882  }
883  return true;
884 }
885 
886 // We currently support LCSSA PHI nodes in the outer loop exit, if their
887 // incoming values do not come from the outer loop latch or if the
888 // outer loop latch has a single predecessor. In that case, the value will
889 // be available if both the inner and outer loop conditions are true, which
890 // will still be true after interchanging. If we have multiple predecessor,
891 // that may not be the case, e.g. because the outer loop latch may be executed
892 // if the inner loop is not executed.
893 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
894  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
895  for (PHINode &PHI : LoopNestExit->phis()) {
896  // FIXME: We currently are not able to detect floating point reductions
897  // and have to use floating point PHIs as a proxy to prevent
898  // interchanging in the presence of floating point reductions.
899  if (PHI.getType()->isFloatingPointTy())
900  return false;
901  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
902  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
903  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
904  continue;
905 
906  // The incoming value is defined in the outer loop latch. Currently we
907  // only support that in case the outer loop latch has a single predecessor.
908  // This guarantees that the outer loop latch is executed if and only if
909  // the inner loop is executed (because tightlyNested() guarantees that the
910  // outer loop header only branches to the inner loop or the outer loop
911  // latch).
912  // FIXME: We could weaken this logic and allow multiple predecessors,
913  // if the values are produced outside the loop latch. We would need
914  // additional logic to update the PHI nodes in the exit block as
915  // well.
916  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
917  return false;
918  }
919  }
920  return true;
921 }
922 
923 // In case of multi-level nested loops, it may occur that lcssa phis exist in
924 // the latch of InnerLoop, i.e., when defs of the incoming values are further
925 // inside the loopnest. Sometimes those incoming values are not available
926 // after interchange, since the original inner latch will become the new outer
927 // latch which may have predecessor paths that do not include those incoming
928 // values.
929 // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
930 // multi-level loop nests.
931 static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
932  if (InnerLoop->getSubLoops().empty())
933  return true;
934  // If the original outer latch has only one predecessor, then values defined
935  // further inside the looploop, e.g., in the innermost loop, will be available
936  // at the new outer latch after interchange.
937  if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
938  return true;
939 
940  // The outer latch has more than one predecessors, i.e., the inner
941  // exit and the inner header.
942  // PHI nodes in the inner latch are lcssa phis where the incoming values
943  // are defined further inside the loopnest. Check if those phis are used
944  // in the original inner latch. If that is the case then bail out since
945  // those incoming values may not be available at the new outer latch.
946  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
947  for (PHINode &PHI : InnerLoopLatch->phis()) {
948  for (auto *U : PHI.users()) {
949  Instruction *UI = cast<Instruction>(U);
950  if (InnerLoopLatch == UI->getParent())
951  return false;
952  }
953  }
954  return true;
955 }
956 
957 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
958  unsigned OuterLoopId,
959  CharMatrix &DepMatrix) {
960  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
961  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
962  << " and OuterLoopId = " << OuterLoopId
963  << " due to dependence\n");
964  ORE->emit([&]() {
965  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
966  InnerLoop->getStartLoc(),
967  InnerLoop->getHeader())
968  << "Cannot interchange loops due to dependences.";
969  });
970  return false;
971  }
972  // Check if outer and inner loop contain legal instructions only.
973  for (auto *BB : OuterLoop->blocks())
974  for (Instruction &I : BB->instructionsWithoutDebug())
975  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
976  // readnone functions do not prevent interchanging.
977  if (CI->onlyWritesMemory())
978  continue;
979  LLVM_DEBUG(
980  dbgs() << "Loops with call instructions cannot be interchanged "
981  << "safely.");
982  ORE->emit([&]() {
983  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
984  CI->getDebugLoc(),
985  CI->getParent())
986  << "Cannot interchange loops due to call instruction.";
987  });
988 
989  return false;
990  }
991 
992  if (!findInductions(InnerLoop, InnerLoopInductions)) {
993  LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n");
994  return false;
995  }
996 
997  if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
998  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
999  ORE->emit([&]() {
1000  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1001  InnerLoop->getStartLoc(),
1002  InnerLoop->getHeader())
1003  << "Cannot interchange loops because unsupported PHI nodes found "
1004  "in inner loop latch.";
1005  });
1006  return false;
1007  }
1008 
1009  // TODO: The loops could not be interchanged due to current limitations in the
1010  // transform module.
1011  if (currentLimitations()) {
1012  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1013  return false;
1014  }
1015 
1016  // Check if the loops are tightly nested.
1017  if (!tightlyNested(OuterLoop, InnerLoop)) {
1018  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1019  ORE->emit([&]() {
1020  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1021  InnerLoop->getStartLoc(),
1022  InnerLoop->getHeader())
1023  << "Cannot interchange loops because they are not tightly "
1024  "nested.";
1025  });
1026  return false;
1027  }
1028 
1029  if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1030  OuterInnerReductions)) {
1031  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1032  ORE->emit([&]() {
1033  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1034  InnerLoop->getStartLoc(),
1035  InnerLoop->getHeader())
1036  << "Found unsupported PHI node in loop exit.";
1037  });
1038  return false;
1039  }
1040 
1041  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1042  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1043  ORE->emit([&]() {
1044  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1045  OuterLoop->getStartLoc(),
1046  OuterLoop->getHeader())
1047  << "Found unsupported PHI node in loop exit.";
1048  });
1049  return false;
1050  }
1051 
1052  return true;
1053 }
1054 
1055 int LoopInterchangeProfitability::getInstrOrderCost() {
1056  unsigned GoodOrder, BadOrder;
1057  BadOrder = GoodOrder = 0;
1058  for (BasicBlock *BB : InnerLoop->blocks()) {
1059  for (Instruction &Ins : *BB) {
1060  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1061  unsigned NumOp = GEP->getNumOperands();
1062  bool FoundInnerInduction = false;
1063  bool FoundOuterInduction = false;
1064  for (unsigned i = 0; i < NumOp; ++i) {
1065  // Skip operands that are not SCEV-able.
1066  if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1067  continue;
1068 
1069  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1070  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1071  if (!AR)
1072  continue;
1073 
1074  // If we find the inner induction after an outer induction e.g.
1075  // for(int i=0;i<N;i++)
1076  // for(int j=0;j<N;j++)
1077  // A[i][j] = A[i-1][j-1]+k;
1078  // then it is a good order.
1079  if (AR->getLoop() == InnerLoop) {
1080  // We found an InnerLoop induction after OuterLoop induction. It is
1081  // a good order.
1082  FoundInnerInduction = true;
1083  if (FoundOuterInduction) {
1084  GoodOrder++;
1085  break;
1086  }
1087  }
1088  // If we find the outer induction after an inner induction e.g.
1089  // for(int i=0;i<N;i++)
1090  // for(int j=0;j<N;j++)
1091  // A[j][i] = A[j-1][i-1]+k;
1092  // then it is a bad order.
1093  if (AR->getLoop() == OuterLoop) {
1094  // We found an OuterLoop induction after InnerLoop induction. It is
1095  // a bad order.
1096  FoundOuterInduction = true;
1097  if (FoundInnerInduction) {
1098  BadOrder++;
1099  break;
1100  }
1101  }
1102  }
1103  }
1104  }
1105  }
1106  return GoodOrder - BadOrder;
1107 }
1108 
1109 static bool isProfitableForVectorization(unsigned InnerLoopId,
1110  unsigned OuterLoopId,
1111  CharMatrix &DepMatrix) {
1112  // TODO: Improve this heuristic to catch more cases.
1113  // If the inner loop is loop independent or doesn't carry any dependency it is
1114  // profitable to move this to outer position.
1115  for (auto &Row : DepMatrix) {
1116  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1117  return false;
1118  // TODO: We need to improve this heuristic.
1119  if (Row[OuterLoopId] != '=')
1120  return false;
1121  }
1122  // If outer loop has dependence and inner loop is loop independent then it is
1123  // profitable to interchange to enable parallelism.
1124  // If there are no dependences, interchanging will not improve anything.
1125  return !DepMatrix.empty();
1126 }
1127 
1128 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1129  unsigned OuterLoopId,
1130  CharMatrix &DepMatrix) {
1131  // TODO: Add better profitability checks.
1132  // e.g
1133  // 1) Construct dependency matrix and move the one with no loop carried dep
1134  // inside to enable vectorization.
1135 
1136  // This is rough cost estimation algorithm. It counts the good and bad order
1137  // of induction variables in the instruction and allows reordering if number
1138  // of bad orders is more than good.
1139  int Cost = getInstrOrderCost();
1140  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1141  if (Cost < -LoopInterchangeCostThreshold)
1142  return true;
1143 
1144  // It is not profitable as per current cache profitability model. But check if
1145  // we can move this loop outside to improve parallelism.
1146  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1147  return true;
1148 
1149  ORE->emit([&]() {
1150  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1151  InnerLoop->getStartLoc(),
1152  InnerLoop->getHeader())
1153  << "Interchanging loops is too costly (cost="
1154  << ore::NV("Cost", Cost) << ", threshold="
1155  << ore::NV("Threshold", LoopInterchangeCostThreshold)
1156  << ") and it does not improve parallelism.";
1157  });
1158  return false;
1159 }
1160 
1161 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1162  Loop *InnerLoop) {
1163  for (Loop *L : *OuterLoop)
1164  if (L == InnerLoop) {
1165  OuterLoop->removeChildLoop(L);
1166  return;
1167  }
1168  llvm_unreachable("Couldn't find loop");
1169 }
1170 
1171 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1172 /// new inner and outer loop after interchanging: NewInner is the original
1173 /// outer loop and NewOuter is the original inner loop.
1174 ///
1175 /// Before interchanging, we have the following structure
1176 /// Outer preheader
1177 // Outer header
1178 // Inner preheader
1179 // Inner header
1180 // Inner body
1181 // Inner latch
1182 // outer bbs
1183 // Outer latch
1184 //
1185 // After interchanging:
1186 // Inner preheader
1187 // Inner header
1188 // Outer preheader
1189 // Outer header
1190 // Inner body
1191 // outer bbs
1192 // Outer latch
1193 // Inner latch
1194 void LoopInterchangeTransform::restructureLoops(
1195  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1196  BasicBlock *OrigOuterPreHeader) {
1197  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1198  // The original inner loop preheader moves from the new inner loop to
1199  // the parent loop, if there is one.
1200  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1201  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1202 
1203  // Switch the loop levels.
1204  if (OuterLoopParent) {
1205  // Remove the loop from its parent loop.
1206  removeChildLoop(OuterLoopParent, NewInner);
1207  removeChildLoop(NewInner, NewOuter);
1208  OuterLoopParent->addChildLoop(NewOuter);
1209  } else {
1210  removeChildLoop(NewInner, NewOuter);
1211  LI->changeTopLevelLoop(NewInner, NewOuter);
1212  }
1213  while (!NewOuter->isInnermost())
1214  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1215  NewOuter->addChildLoop(NewInner);
1216 
1217  // BBs from the original inner loop.
1218  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1219 
1220  // Add BBs from the original outer loop to the original inner loop (excluding
1221  // BBs already in inner loop)
1222  for (BasicBlock *BB : NewInner->blocks())
1223  if (LI->getLoopFor(BB) == NewInner)
1224  NewOuter->addBlockEntry(BB);
1225 
1226  // Now remove inner loop header and latch from the new inner loop and move
1227  // other BBs (the loop body) to the new inner loop.
1228  BasicBlock *OuterHeader = NewOuter->getHeader();
1229  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1230  for (BasicBlock *BB : OrigInnerBBs) {
1231  // Nothing will change for BBs in child loops.
1232  if (LI->getLoopFor(BB) != NewOuter)
1233  continue;
1234  // Remove the new outer loop header and latch from the new inner loop.
1235  if (BB == OuterHeader || BB == OuterLatch)
1236  NewInner->removeBlockFromLoop(BB);
1237  else
1238  LI->changeLoopFor(BB, NewInner);
1239  }
1240 
1241  // The preheader of the original outer loop becomes part of the new
1242  // outer loop.
1243  NewOuter->addBlockEntry(OrigOuterPreHeader);
1244  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1245 
1246  // Tell SE that we move the loops around.
1247  SE->forgetLoop(NewOuter);
1248  SE->forgetLoop(NewInner);
1249 }
1250 
1252  bool Transformed = false;
1253 
1254  if (InnerLoop->getSubLoops().empty()) {
1255  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1256  LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1257  auto &InductionPHIs = LIL.getInnerLoopInductions();
1258  if (InductionPHIs.empty()) {
1259  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1260  return false;
1261  }
1262 
1263  SmallVector<Instruction *, 8> InnerIndexVarList;
1264  for (PHINode *CurInductionPHI : InductionPHIs) {
1265  if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1266  InnerIndexVarList.push_back(
1267  dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1268  else
1269  InnerIndexVarList.push_back(
1270  dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1271  }
1272 
1273  // Create a new latch block for the inner loop. We split at the
1274  // current latch's terminator and then move the condition and all
1275  // operands that are not either loop-invariant or the induction PHI into the
1276  // new latch block.
1277  BasicBlock *NewLatch =
1278  SplitBlock(InnerLoop->getLoopLatch(),
1279  InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1280 
1282  unsigned i = 0;
1283  auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1284  for (; i < WorkList.size(); i++) {
1285  // Duplicate instruction and move it the new latch. Update uses that
1286  // have been moved.
1287  Instruction *NewI = WorkList[i]->clone();
1288  NewI->insertBefore(NewLatch->getFirstNonPHI());
1289  assert(!NewI->mayHaveSideEffects() &&
1290  "Moving instructions with side-effects may change behavior of "
1291  "the loop nest!");
1292  for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1293  Instruction *UserI = cast<Instruction>(U.getUser());
1294  if (!InnerLoop->contains(UserI->getParent()) ||
1295  UserI->getParent() == NewLatch ||
1296  llvm::is_contained(InductionPHIs, UserI))
1297  U.set(NewI);
1298  }
1299  // Add operands of moved instruction to the worklist, except if they are
1300  // outside the inner loop or are the induction PHI.
1301  for (Value *Op : WorkList[i]->operands()) {
1302  Instruction *OpI = dyn_cast<Instruction>(Op);
1303  if (!OpI ||
1304  this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1305  llvm::is_contained(InductionPHIs, OpI))
1306  continue;
1307  WorkList.insert(OpI);
1308  }
1309  }
1310  };
1311 
1312  // FIXME: Should we interchange when we have a constant condition?
1313  Instruction *CondI = dyn_cast<Instruction>(
1314  cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1315  ->getCondition());
1316  if (CondI)
1317  WorkList.insert(CondI);
1318  MoveInstructions();
1319  for (Instruction *InnerIndexVar : InnerIndexVarList)
1320  WorkList.insert(cast<Instruction>(InnerIndexVar));
1321  MoveInstructions();
1322 
1323  // Splits the inner loops phi nodes out into a separate basic block.
1324  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1325  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1326  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1327  }
1328 
1329  // Instructions in the original inner loop preheader may depend on values
1330  // defined in the outer loop header. Move them there, because the original
1331  // inner loop preheader will become the entry into the interchanged loop nest.
1332  // Currently we move all instructions and rely on LICM to move invariant
1333  // instructions outside the loop nest.
1334  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1335  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1336  if (InnerLoopPreHeader != OuterLoopHeader) {
1337  SmallPtrSet<Instruction *, 4> NeedsMoving;
1338  for (Instruction &I :
1339  make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1340  std::prev(InnerLoopPreHeader->end()))))
1341  I.moveBefore(OuterLoopHeader->getTerminator());
1342  }
1343 
1344  Transformed |= adjustLoopLinks();
1345  if (!Transformed) {
1346  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1347  return false;
1348  }
1349 
1350  return true;
1351 }
1352 
1353 /// \brief Move all instructions except the terminator from FromBB right before
1354 /// InsertBefore
1355 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1356  auto &ToList = InsertBefore->getParent()->getInstList();
1357  auto &FromList = FromBB->getInstList();
1358 
1359  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1360  FromBB->getTerminator()->getIterator());
1361 }
1362 
1363 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1364 static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1365  // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1366  // from BB1 afterwards.
1367  auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1368  SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1369  for (Instruction *I : TempInstrs)
1370  I->removeFromParent();
1371 
1372  // Move instructions from BB2 to BB1.
1373  moveBBContents(BB2, BB1->getTerminator());
1374 
1375  // Move instructions from TempInstrs to BB2.
1376  for (Instruction *I : TempInstrs)
1377  I->insertBefore(BB2->getTerminator());
1378 }
1379 
1380 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1381 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1382 // \p OldBB is exactly once in BI's successor list.
1383 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1384  BasicBlock *NewBB,
1385  std::vector<DominatorTree::UpdateType> &DTUpdates,
1386  bool MustUpdateOnce = true) {
1387  assert((!MustUpdateOnce ||
1389  [OldBB](BasicBlock *BB) {
1390  return BB == OldBB;
1391  }) == 1) && "BI must jump to OldBB exactly once.");
1392  bool Changed = false;
1393  for (Use &Op : BI->operands())
1394  if (Op == OldBB) {
1395  Op.set(NewBB);
1396  Changed = true;
1397  }
1398 
1399  if (Changed) {
1400  DTUpdates.push_back(
1402  DTUpdates.push_back(
1404  }
1405  assert(Changed && "Expected a successor to be updated");
1406 }
1407 
1408 // Move Lcssa PHIs to the right place.
1409 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1410  BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1411  BasicBlock *OuterLatch, BasicBlock *OuterExit,
1412  Loop *InnerLoop, LoopInfo *LI) {
1413 
1414  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1415  // defined either in the header or latch. Those blocks will become header and
1416  // latch of the new outer loop, and the only possible users can PHI nodes
1417  // in the exit block of the loop nest or the outer loop header (reduction
1418  // PHIs, in that case, the incoming value must be defined in the inner loop
1419  // header). We can just substitute the user with the incoming value and remove
1420  // the PHI.
1421  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1422  assert(P.getNumIncomingValues() == 1 &&
1423  "Only loops with a single exit are supported!");
1424 
1425  // Incoming values are guaranteed be instructions currently.
1426  auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1427  // Skip phis with incoming values from the inner loop body, excluding the
1428  // header and latch.
1429  if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1430  continue;
1431 
1432  assert(all_of(P.users(),
1433  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1434  return (cast<PHINode>(U)->getParent() == OuterHeader &&
1435  IncI->getParent() == InnerHeader) ||
1436  cast<PHINode>(U)->getParent() == OuterExit;
1437  }) &&
1438  "Can only replace phis iff the uses are in the loop nest exit or "
1439  "the incoming value is defined in the inner header (it will "
1440  "dominate all loop blocks after interchanging)");
1441  P.replaceAllUsesWith(IncI);
1442  P.eraseFromParent();
1443  }
1444 
1445  SmallVector<PHINode *, 8> LcssaInnerExit;
1446  for (PHINode &P : InnerExit->phis())
1447  LcssaInnerExit.push_back(&P);
1448 
1449  SmallVector<PHINode *, 8> LcssaInnerLatch;
1450  for (PHINode &P : InnerLatch->phis())
1451  LcssaInnerLatch.push_back(&P);
1452 
1453  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1454  // If a PHI node has users outside of InnerExit, it has a use outside the
1455  // interchanged loop and we have to preserve it. We move these to
1456  // InnerLatch, which will become the new exit block for the innermost
1457  // loop after interchanging.
1458  for (PHINode *P : LcssaInnerExit)
1459  P->moveBefore(InnerLatch->getFirstNonPHI());
1460 
1461  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1462  // and we have to move them to the new inner latch.
1463  for (PHINode *P : LcssaInnerLatch)
1464  P->moveBefore(InnerExit->getFirstNonPHI());
1465 
1466  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1467  // incoming values defined in the outer loop, we have to add a new PHI
1468  // in the inner loop latch, which became the exit block of the outer loop,
1469  // after interchanging.
1470  if (OuterExit) {
1471  for (PHINode &P : OuterExit->phis()) {
1472  if (P.getNumIncomingValues() != 1)
1473  continue;
1474  // Skip Phis with incoming values defined in the inner loop. Those should
1475  // already have been updated.
1476  auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1477  if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1478  continue;
1479 
1480  PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1481  NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1482  NewPhi->setIncomingBlock(0, OuterLatch);
1483  // We might have incoming edges from other BBs, i.e., the original outer
1484  // header.
1485  for (auto *Pred : predecessors(InnerLatch)) {
1486  if (Pred == OuterLatch)
1487  continue;
1488  NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1489  }
1490  NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1491  P.setIncomingValue(0, NewPhi);
1492  }
1493  }
1494 
1495  // Now adjust the incoming blocks for the LCSSA PHIs.
1496  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1497  // with the new latch.
1498  InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1499 }
1500 
1501 bool LoopInterchangeTransform::adjustLoopBranches() {
1502  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1503  std::vector<DominatorTree::UpdateType> DTUpdates;
1504 
1505  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1506  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1507 
1508  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1509  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1510  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1511  // Ensure that both preheaders do not contain PHI nodes and have single
1512  // predecessors. This allows us to move them easily. We use
1513  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1514  // preheaders do not satisfy those conditions.
1515  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1516  !OuterLoopPreHeader->getUniquePredecessor())
1517  OuterLoopPreHeader =
1518  InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1519  if (InnerLoopPreHeader == OuterLoop->getHeader())
1520  InnerLoopPreHeader =
1521  InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1522 
1523  // Adjust the loop preheader
1524  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1525  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1526  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1527  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1528  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1529  BasicBlock *InnerLoopLatchPredecessor =
1530  InnerLoopLatch->getUniquePredecessor();
1531  BasicBlock *InnerLoopLatchSuccessor;
1532  BasicBlock *OuterLoopLatchSuccessor;
1533 
1534  BranchInst *OuterLoopLatchBI =
1535  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1536  BranchInst *InnerLoopLatchBI =
1537  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1538  BranchInst *OuterLoopHeaderBI =
1539  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1540  BranchInst *InnerLoopHeaderBI =
1541  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1542 
1543  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1544  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1545  !InnerLoopHeaderBI)
1546  return false;
1547 
1548  BranchInst *InnerLoopLatchPredecessorBI =
1549  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1550  BranchInst *OuterLoopPredecessorBI =
1551  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1552 
1553  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1554  return false;
1555  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1556  if (!InnerLoopHeaderSuccessor)
1557  return false;
1558 
1559  // Adjust Loop Preheader and headers.
1560  // The branches in the outer loop predecessor and the outer loop header can
1561  // be unconditional branches or conditional branches with duplicates. Consider
1562  // this when updating the successors.
1563  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1564  InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1565  // The outer loop header might or might not branch to the outer latch.
1566  // We are guaranteed to branch to the inner loop preheader.
1567  if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1568  // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1569  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1570  DTUpdates,
1571  /*MustUpdateOnce=*/false);
1572  }
1573  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1574  InnerLoopHeaderSuccessor, DTUpdates,
1575  /*MustUpdateOnce=*/false);
1576 
1577  // Adjust reduction PHI's now that the incoming block has changed.
1578  InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1579  OuterLoopHeader);
1580 
1581  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1582  OuterLoopPreHeader, DTUpdates);
1583 
1584  // -------------Adjust loop latches-----------
1585  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1586  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1587  else
1588  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1589 
1590  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1591  InnerLoopLatchSuccessor, DTUpdates);
1592 
1593  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1594  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1595  else
1596  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1597 
1598  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1599  OuterLoopLatchSuccessor, DTUpdates);
1600  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1601  DTUpdates);
1602 
1603  DT->applyUpdates(DTUpdates);
1604  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1605  OuterLoopPreHeader);
1606 
1607  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1608  OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1609  InnerLoop, LI);
1610  // For PHIs in the exit block of the outer loop, outer's latch has been
1611  // replaced by Inners'.
1612  OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1613 
1614  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1615  // Now update the reduction PHIs in the inner and outer loop headers.
1616  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1617  for (PHINode &PHI : InnerLoopHeader->phis())
1618  if (OuterInnerReductions.contains(&PHI))
1619  InnerLoopPHIs.push_back(&PHI);
1620 
1621  for (PHINode &PHI : OuterLoopHeader->phis())
1622  if (OuterInnerReductions.contains(&PHI))
1623  OuterLoopPHIs.push_back(&PHI);
1624 
1625  // Now move the remaining reduction PHIs from outer to inner loop header and
1626  // vice versa. The PHI nodes must be part of a reduction across the inner and
1627  // outer loop and all the remains to do is and updating the incoming blocks.
1628  for (PHINode *PHI : OuterLoopPHIs) {
1629  LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1630  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1631  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1632  }
1633  for (PHINode *PHI : InnerLoopPHIs) {
1634  LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1635  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1636  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1637  }
1638 
1639  // Update the incoming blocks for moved PHI nodes.
1640  OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1641  OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1642  InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1643  InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1644 
1645  // Values defined in the outer loop header could be used in the inner loop
1646  // latch. In that case, we need to create LCSSA phis for them, because after
1647  // interchanging they will be defined in the new inner loop and used in the
1648  // new outer loop.
1649  IRBuilder<> Builder(OuterLoopHeader->getContext());
1650  SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1651  for (Instruction &I :
1652  make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1653  MayNeedLCSSAPhis.push_back(&I);
1654  formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1655 
1656  return true;
1657 }
1658 
1659 bool LoopInterchangeTransform::adjustLoopLinks() {
1660  // Adjust all branches in the inner and outer loop.
1661  bool Changed = adjustLoopBranches();
1662  if (Changed) {
1663  // We have interchanged the preheaders so we need to interchange the data in
1664  // the preheaders as well. This is because the content of the inner
1665  // preheader was previously executed inside the outer loop.
1666  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1667  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1668  swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1669  }
1670  return Changed;
1671 }
1672 
1673 namespace {
1674 /// Main LoopInterchange Pass.
1675 struct LoopInterchangeLegacyPass : public LoopPass {
1676  static char ID;
1677 
1678  LoopInterchangeLegacyPass() : LoopPass(ID) {
1680  }
1681 
1682  void getAnalysisUsage(AnalysisUsage &AU) const override {
1685 
1687  }
1688 
1689  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1690  if (skipLoop(L))
1691  return false;
1692 
1693  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1694  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1695  auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1696  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1697  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1698 
1699  return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1700  }
1701 };
1702 } // namespace
1703 
1705 
1706 INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
1707  "Interchanges loops for cache reuse", false, false)
1711 
1712 INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
1713  "Interchanges loops for cache reuse", false, false)
1714 
1716  return new LoopInterchangeLegacyPass();
1717 }
1718 
1720  LoopAnalysisManager &AM,
1722  LPMUpdater &U) {
1723  Function &F = *LN.getParent();
1724 
1725  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1727  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1728  return PreservedAnalyses::all();
1730 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::initializeLoopInterchangeLegacyPassPass
void initializeLoopInterchangeLegacyPassPass(PassRegistry &)
isLegalToInterChangeLoops
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Definition: LoopInterchange.cpp:257
moveBBContents
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
Definition: LoopInterchange.cpp:1355
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
followLCSSA
static Value * followLCSSA(Value *SV)
Definition: LoopInterchange.cpp:715
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:138
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::BranchInst::successors
iterator_range< succ_op_iterator > successors()
Definition: Instructions.h:3202
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2743
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:765
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:979
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::cfg::UpdateKind::Insert
@ Insert
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopInterchange.cpp:56
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector< Loop *, 8 >
Statistic.h
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:195
ErrorHandling.h
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:634
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1008
llvm::Dependence::DVEntry::GE
@ GE
Definition: DependenceAnalysis.h:95
moveLCSSAPhis
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
Definition: LoopInterchange.cpp:1409
OptimizationRemarkEmitter.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
Right
Vector Shift Left Right
Definition: README_P9.txt:118
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
isOuterMostDepPositive
static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:196
ScalarEvolution.h
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::cfg::UpdateKind::Delete
@ Delete
interchange
loop interchange
Definition: LoopInterchange.cpp:1712
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
findInnerReductionPhi
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
Definition: LoopInterchange.cpp:726
llvm::SmallPtrSet< PHINode *, 4 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::Dependence::DVEntry::LT
@ LT
Definition: DependenceAnalysis.h:90
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:31
llvm::codeview::VFTableSlotKind::Outer
@ Outer
STLExtras.h
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:69
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2756
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1739
loops
loops
Definition: LoopInfo.cpp:1176
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.cpp:693
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:306
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AlignStyle::Left
@ Left
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1649
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:306
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::createLoopInterchangePass
Pass * createLoopInterchangePass()
Definition: LoopInterchange.cpp:1715
Constants.h
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2753
areInnerLoopExitPHIsSupported
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
Definition: LoopInterchange.cpp:868
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
MaxLoopNestDepth
static const unsigned MaxLoopNestDepth
Definition: LoopInterchange.cpp:77
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
swapBBContents
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
Definition: LoopInterchange.cpp:1364
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
populateWorklist
static LoopVector populateWorklist(Loop &L)
Definition: LoopInterchange.cpp:273
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
false
Definition: StackSlotColoring.cpp:142
populateDependencyMatrix
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI)
Definition: LoopInterchange.cpp:89
llvm::Instruction
Definition: Instruction.h:45
isProfitableForVectorization
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
Definition: LoopInterchange.cpp:1109
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:75
LoopInterchangeCostThreshold
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchangeLegacyPass
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
areInnerLoopLatchPHIsSupported
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:931
llvm::LoopBase::removeBlockFromLoop
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
Definition: LoopInfo.h:460
Utils.h
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2749
Type.h
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:704
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3173
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4339
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2767
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:711
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:123
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::LoopPass
Definition: LoopPass.h:27
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2792
containsNoDependence
static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:209
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2807
llvm::LoopNest::skipEmptyBlockUntil
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
Definition: LoopNestAnalysis.cpp:289
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:642
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1714
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
validDepInterchange
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep)
Definition: LoopInterchange.cpp:219
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::Dependence::DVEntry::LE
@ LE
Definition: DependenceAnalysis.h:92
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
LoopNestAnalysis.h
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
LoopInterchange.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:860
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1656
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1746
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:276
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:176
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12996
areOuterLoopExitPHIsSupported
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:893
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1362
llvm::BasicBlock::replacePhiUsesWith
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:450
llvm::map_range
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:361
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7850
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
interChangeDependencies
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
Definition: LoopInterchange.cpp:188
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:462
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
reuse
loop Interchanges loops for cache reuse
Definition: LoopInterchange.cpp:1713
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Direction
Loop::LoopBounds::Direction Direction
Definition: LoopInfo.cpp:230
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4197
llvm::LoopBase::addBlockEntry
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:423
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:581
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
updateSuccessor
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
Definition: LoopInterchange.cpp:1383
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::Dependence::DVEntry::GT
@ GT
Definition: DependenceAnalysis.h:93
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:117
ScalarEvolutionExpressions.h
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:73
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3525
llvm::formLCSSAForInstructions
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:79
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::Dependence::DVEntry::EQ
@ EQ
Definition: DependenceAnalysis.h:91
llvm::ConstantInt::isNegative
bool isNegative() const
Definition: Constants.h:189
SmallVector.h
User.h
llvm::LoopInfoBase::changeTopLevelLoop
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
Definition: LoopInfo.h:1018
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::PHINode
Definition: Instructions.h:2657
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::LoopInterchangePass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopInterchange.cpp:1719
llvm::SmallPtrSetImpl< PHINode * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
MaxMemInstrCount
static const unsigned MaxMemInstrCount
Definition: LoopInterchange.cpp:74
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3092
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7720
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
Value.h
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3171
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3185
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38