LLVM  15.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/LoopPass.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/User.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/InitializePasses.h"
39 #include "llvm/Pass.h"
40 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/Debug.h"
45 #include "llvm/Transforms/Scalar.h"
48 #include <cassert>
49 #include <utility>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "loop-interchange"
55 
56 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
57 
59  "loop-interchange-threshold", cl::init(0), cl::Hidden,
60  cl::desc("Interchange if you gain more than this number"));
61 
62 namespace {
63 
64 using LoopVector = SmallVector<Loop *, 8>;
65 
66 // TODO: Check if we can use a sparse matrix here.
67 using CharMatrix = std::vector<std::vector<char>>;
68 
69 } // end anonymous namespace
70 
71 // Maximum number of dependencies that can be handled in the dependency matrix.
72 static const unsigned MaxMemInstrCount = 100;
73 
74 // Maximum loop depth supported.
75 static const unsigned MaxLoopNestDepth = 10;
76 
77 #ifdef DUMP_DEP_MATRICIES
78 static void printDepMatrix(CharMatrix &DepMatrix) {
79  for (auto &Row : DepMatrix) {
80  for (auto D : Row)
81  LLVM_DEBUG(dbgs() << D << " ");
82  LLVM_DEBUG(dbgs() << "\n");
83  }
84 }
85 #endif
86 
87 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
88  Loop *L, DependenceInfo *DI) {
89  using ValueVector = SmallVector<Value *, 16>;
90 
91  ValueVector MemInstr;
92 
93  // For each block.
94  for (BasicBlock *BB : L->blocks()) {
95  // Scan the BB and collect legal loads and stores.
96  for (Instruction &I : *BB) {
97  if (!isa<Instruction>(I))
98  return false;
99  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
100  if (!Ld->isSimple())
101  return false;
102  MemInstr.push_back(&I);
103  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
104  if (!St->isSimple())
105  return false;
106  MemInstr.push_back(&I);
107  }
108  }
109  }
110 
111  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
112  << " Loads and Stores to analyze\n");
113 
114  ValueVector::iterator I, IE, J, JE;
115 
116  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
117  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
118  std::vector<char> Dep;
119  Instruction *Src = cast<Instruction>(*I);
120  Instruction *Dst = cast<Instruction>(*J);
121  // Ignore Input dependencies.
122  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
123  continue;
124  // Track Output, Flow, and Anti dependencies.
125  if (auto D = DI->depends(Src, Dst, true)) {
126  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
127  LLVM_DEBUG(StringRef DepType =
128  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
129  dbgs() << "Found " << DepType
130  << " dependency between Src and Dst\n"
131  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
132  unsigned Levels = D->getLevels();
133  char Direction;
134  for (unsigned II = 1; II <= Levels; ++II) {
135  const SCEV *Distance = D->getDistance(II);
136  const SCEVConstant *SCEVConst =
137  dyn_cast_or_null<SCEVConstant>(Distance);
138  if (SCEVConst) {
139  const ConstantInt *CI = SCEVConst->getValue();
140  if (CI->isNegative())
141  Direction = '<';
142  else if (CI->isZero())
143  Direction = '=';
144  else
145  Direction = '>';
146  Dep.push_back(Direction);
147  } else if (D->isScalar(II)) {
148  Direction = 'S';
149  Dep.push_back(Direction);
150  } else {
151  unsigned Dir = D->getDirection(II);
152  if (Dir == Dependence::DVEntry::LT ||
154  Direction = '<';
155  else if (Dir == Dependence::DVEntry::GT ||
157  Direction = '>';
158  else if (Dir == Dependence::DVEntry::EQ)
159  Direction = '=';
160  else
161  Direction = '*';
162  Dep.push_back(Direction);
163  }
164  }
165  while (Dep.size() != Level) {
166  Dep.push_back('I');
167  }
168 
169  DepMatrix.push_back(Dep);
170  if (DepMatrix.size() > MaxMemInstrCount) {
171  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
172  << " dependencies inside loop\n");
173  return false;
174  }
175  }
176  }
177  }
178 
179  return true;
180 }
181 
182 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
183 // matrix by exchanging the two columns.
184 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
185  unsigned ToIndx) {
186  for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
187  std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
188 }
189 
190 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
191 // '>'
192 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
193  unsigned Column) {
194  for (unsigned i = 0; i <= Column; ++i) {
195  if (DepMatrix[Row][i] == '<')
196  return false;
197  if (DepMatrix[Row][i] == '>')
198  return true;
199  }
200  // All dependencies were '=','S' or 'I'
201  return false;
202 }
203 
204 // Checks if no dependence exist in the dependency matrix in Row before Column.
205 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
206  unsigned Column) {
207  for (unsigned i = 0; i < Column; ++i) {
208  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
209  DepMatrix[Row][i] != 'I')
210  return false;
211  }
212  return true;
213 }
214 
215 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
216  unsigned OuterLoopId, char InnerDep,
217  char OuterDep) {
218  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
219  return false;
220 
221  if (InnerDep == OuterDep)
222  return true;
223 
224  // It is legal to interchange if and only if after interchange no row has a
225  // '>' direction as the leftmost non-'='.
226 
227  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
228  return true;
229 
230  if (InnerDep == '<')
231  return true;
232 
233  if (InnerDep == '>') {
234  // If OuterLoopId represents outermost loop then interchanging will make the
235  // 1st dependency as '>'
236  if (OuterLoopId == 0)
237  return false;
238 
239  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
240  // interchanging will result in this row having an outermost non '='
241  // dependency of '>'
242  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
243  return true;
244  }
245 
246  return false;
247 }
248 
249 // Checks if it is legal to interchange 2 loops.
250 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
251 // if the direction matrix, after the same permutation is applied to its
252 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
253 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
254  unsigned InnerLoopId,
255  unsigned OuterLoopId) {
256  unsigned NumRows = DepMatrix.size();
257  // For each row check if it is valid to interchange.
258  for (unsigned Row = 0; Row < NumRows; ++Row) {
259  char InnerDep = DepMatrix[Row][InnerLoopId];
260  char OuterDep = DepMatrix[Row][OuterLoopId];
261  if (InnerDep == '*' || OuterDep == '*')
262  return false;
263  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
264  return false;
265  }
266  return true;
267 }
268 
269 static void populateWorklist(Loop &L, LoopVector &LoopList) {
270  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
271  << L.getHeader()->getParent()->getName() << " Loop: %"
272  << L.getHeader()->getName() << '\n');
273  assert(LoopList.empty() && "LoopList should initially be empty!");
274  Loop *CurrentLoop = &L;
275  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
276  while (!Vec->empty()) {
277  // The current loop has multiple subloops in it hence it is not tightly
278  // nested.
279  // Discard all loops above it added into Worklist.
280  if (Vec->size() != 1) {
281  LoopList = {};
282  return;
283  }
284 
285  LoopList.push_back(CurrentLoop);
286  CurrentLoop = Vec->front();
287  Vec = &CurrentLoop->getSubLoops();
288  }
289  LoopList.push_back(CurrentLoop);
290  return;
291 }
292 
293 namespace {
294 
295 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
296 class LoopInterchangeLegality {
297 public:
298  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
300  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
301 
302  /// Check if the loops can be interchanged.
303  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
304  CharMatrix &DepMatrix);
305 
306  /// Discover induction PHIs in the header of \p L. Induction
307  /// PHIs are added to \p Inductions.
308  bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
309 
310  /// Check if the loop structure is understood. We do not handle triangular
311  /// loops for now.
312  bool isLoopStructureUnderstood();
313 
314  bool currentLimitations();
315 
316  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
317  return OuterInnerReductions;
318  }
319 
320  const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
321  return InnerLoopInductions;
322  }
323 
324 private:
325  bool tightlyNested(Loop *Outer, Loop *Inner);
326  bool containsUnsafeInstructions(BasicBlock *BB);
327 
328  /// Discover induction and reduction PHIs in the header of \p L. Induction
329  /// PHIs are added to \p Inductions, reductions are added to
330  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
331  /// to be passed as \p InnerLoop.
332  bool findInductionAndReductions(Loop *L,
333  SmallVector<PHINode *, 8> &Inductions,
334  Loop *InnerLoop);
335 
336  Loop *OuterLoop;
337  Loop *InnerLoop;
338 
339  ScalarEvolution *SE;
340 
341  /// Interface to emit optimization remarks.
343 
344  /// Set of reduction PHIs taking part of a reduction across the inner and
345  /// outer loop.
346  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
347 
348  /// Set of inner loop induction PHIs
349  SmallVector<PHINode *, 8> InnerLoopInductions;
350 };
351 
352 /// LoopInterchangeProfitability checks if it is profitable to interchange the
353 /// loop.
354 class LoopInterchangeProfitability {
355 public:
356  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
358  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
359 
360  /// Check if the loop interchange is profitable.
361  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
362  CharMatrix &DepMatrix);
363 
364 private:
365  int getInstrOrderCost();
366 
367  Loop *OuterLoop;
368  Loop *InnerLoop;
369 
370  /// Scev analysis.
371  ScalarEvolution *SE;
372 
373  /// Interface to emit optimization remarks.
375 };
376 
377 /// LoopInterchangeTransform interchanges the loop.
378 class LoopInterchangeTransform {
379 public:
380  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
381  LoopInfo *LI, DominatorTree *DT,
382  const LoopInterchangeLegality &LIL)
383  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
384 
385  /// Interchange OuterLoop and InnerLoop.
386  bool transform();
387  void restructureLoops(Loop *NewInner, Loop *NewOuter,
388  BasicBlock *OrigInnerPreHeader,
389  BasicBlock *OrigOuterPreHeader);
390  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
391 
392 private:
393  bool adjustLoopLinks();
394  bool adjustLoopBranches();
395 
396  Loop *OuterLoop;
397  Loop *InnerLoop;
398 
399  /// Scev analysis.
400  ScalarEvolution *SE;
401 
402  LoopInfo *LI;
403  DominatorTree *DT;
404 
405  const LoopInterchangeLegality &LIL;
406 };
407 
408 struct LoopInterchange {
409  ScalarEvolution *SE = nullptr;
410  LoopInfo *LI = nullptr;
411  DependenceInfo *DI = nullptr;
412  DominatorTree *DT = nullptr;
413 
414  /// Interface to emit optimization remarks.
416 
417  LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
419  : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
420 
421  bool run(Loop *L) {
422  if (L->getParentLoop())
423  return false;
424  SmallVector<Loop *, 8> LoopList;
425  populateWorklist(*L, LoopList);
426  return processLoopList(LoopList);
427  }
428 
429  bool run(LoopNest &LN) {
430  SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
431  for (unsigned I = 1; I < LoopList.size(); ++I)
432  if (LoopList[I]->getParentLoop() != LoopList[I - 1])
433  return false;
434  return processLoopList(LoopList);
435  }
436 
437  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
438  for (Loop *L : LoopList) {
439  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
440  if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
441  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
442  return false;
443  }
444  if (L->getNumBackEdges() != 1) {
445  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
446  return false;
447  }
448  if (!L->getExitingBlock()) {
449  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
450  return false;
451  }
452  }
453  return true;
454  }
455 
456  unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
457  // TODO: Add a better heuristic to select the loop to be interchanged based
458  // on the dependence matrix. Currently we select the innermost loop.
459  return LoopList.size() - 1;
460  }
461 
462  bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
463  bool Changed = false;
464  unsigned LoopNestDepth = LoopList.size();
465  if (LoopNestDepth < 2) {
466  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
467  return false;
468  }
469  if (LoopNestDepth > MaxLoopNestDepth) {
470  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
471  << MaxLoopNestDepth << "\n");
472  return false;
473  }
474  if (!isComputableLoopNest(LoopList)) {
475  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
476  return false;
477  }
478 
479  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
480  << "\n");
481 
482  CharMatrix DependencyMatrix;
483  Loop *OuterMostLoop = *(LoopList.begin());
484  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
485  OuterMostLoop, DI)) {
486  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
487  return false;
488  }
489 #ifdef DUMP_DEP_MATRICIES
490  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
491  printDepMatrix(DependencyMatrix);
492 #endif
493 
494  // Get the Outermost loop exit.
495  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
496  if (!LoopNestExit) {
497  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
498  return false;
499  }
500 
501  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
502  // We try to achieve the globally optimal memory access for the loopnest,
503  // and do interchange based on a bubble-sort fasion. We start from
504  // the innermost loop, move it outwards to the best possible position
505  // and repeat this process.
506  for (unsigned j = SelecLoopId; j > 0; j--) {
507  bool ChangedPerIter = false;
508  for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
509  bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
510  DependencyMatrix);
511  if (!Interchanged)
512  continue;
513  // Loops interchanged, update LoopList accordingly.
514  std::swap(LoopList[i - 1], LoopList[i]);
515  // Update the DependencyMatrix
516  interChangeDependencies(DependencyMatrix, i, i - 1);
517 #ifdef DUMP_DEP_MATRICIES
518  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
519  printDepMatrix(DependencyMatrix);
520 #endif
521  ChangedPerIter |= Interchanged;
522  Changed |= Interchanged;
523  }
524  // Early abort if there was no interchange during an entire round of
525  // moving loops outwards.
526  if (!ChangedPerIter)
527  break;
528  }
529  return Changed;
530  }
531 
532  bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
533  unsigned OuterLoopId,
534  std::vector<std::vector<char>> &DependencyMatrix) {
535  LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
536  << " and OuterLoopId = " << OuterLoopId << "\n");
537  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
538  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
539  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
540  return false;
541  }
542  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
543  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
544  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
545  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
546  return false;
547  }
548 
549  ORE->emit([&]() {
550  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
551  InnerLoop->getStartLoc(),
552  InnerLoop->getHeader())
553  << "Loop interchanged with enclosing loop.";
554  });
555 
556  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
557  LIT.transform();
558  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
559  LoopsInterchanged++;
560 
561  assert(InnerLoop->isLCSSAForm(*DT) &&
562  "Inner loop not left in LCSSA form after loop interchange!");
563  assert(OuterLoop->isLCSSAForm(*DT) &&
564  "Outer loop not left in LCSSA form after loop interchange!");
565 
566  return true;
567  }
568 };
569 
570 } // end anonymous namespace
571 
572 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
573  return any_of(*BB, [](const Instruction &I) {
574  return I.mayHaveSideEffects() || I.mayReadFromMemory();
575  });
576 }
577 
578 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
579  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
580  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
581  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
582 
583  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
584 
585  // A perfectly nested loop will not have any branch in between the outer and
586  // inner block i.e. outer header will branch to either inner preheader and
587  // outerloop latch.
588  BranchInst *OuterLoopHeaderBI =
589  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
590  if (!OuterLoopHeaderBI)
591  return false;
592 
593  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
594  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
595  Succ != OuterLoopLatch)
596  return false;
597 
598  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
599  // We do not have any basic block in between now make sure the outer header
600  // and outer loop latch doesn't contain any unsafe instructions.
601  if (containsUnsafeInstructions(OuterLoopHeader) ||
602  containsUnsafeInstructions(OuterLoopLatch))
603  return false;
604 
605  // Also make sure the inner loop preheader does not contain any unsafe
606  // instructions. Note that all instructions in the preheader will be moved to
607  // the outer loop header when interchanging.
608  if (InnerLoopPreHeader != OuterLoopHeader &&
609  containsUnsafeInstructions(InnerLoopPreHeader))
610  return false;
611 
612  BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
613  // Ensure the inner loop exit block flows to the outer loop latch possibly
614  // through empty blocks.
615  const BasicBlock &SuccInner =
616  LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
617  if (&SuccInner != OuterLoopLatch) {
618  LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
619  << " does not lead to the outer loop latch.\n";);
620  return false;
621  }
622  // The inner loop exit block does flow to the outer loop latch and not some
623  // other BBs, now make sure it contains safe instructions, since it will be
624  // moved into the (new) inner loop after interchange.
625  if (containsUnsafeInstructions(InnerLoopExit))
626  return false;
627 
628  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
629  // We have a perfect loop nest.
630  return true;
631 }
632 
633 bool LoopInterchangeLegality::isLoopStructureUnderstood() {
634  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
635  for (PHINode *InnerInduction : InnerLoopInductions) {
636  unsigned Num = InnerInduction->getNumOperands();
637  for (unsigned i = 0; i < Num; ++i) {
638  Value *Val = InnerInduction->getOperand(i);
639  if (isa<Constant>(Val))
640  continue;
641  Instruction *I = dyn_cast<Instruction>(Val);
642  if (!I)
643  return false;
644  // TODO: Handle triangular loops.
645  // e.g. for(int i=0;i<N;i++)
646  // for(int j=i;j<N;j++)
647  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
648  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
649  InnerLoopPreheader &&
650  !OuterLoop->isLoopInvariant(I)) {
651  return false;
652  }
653  }
654  }
655 
656  // TODO: Handle triangular loops of another form.
657  // e.g. for(int i=0;i<N;i++)
658  // for(int j=0;j<i;j++)
659  // or,
660  // for(int i=0;i<N;i++)
661  // for(int j=0;j*i<N;j++)
662  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
663  BranchInst *InnerLoopLatchBI =
664  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
665  if (!InnerLoopLatchBI->isConditional())
666  return false;
667  if (CmpInst *InnerLoopCmp =
668  dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
669  Value *Op0 = InnerLoopCmp->getOperand(0);
670  Value *Op1 = InnerLoopCmp->getOperand(1);
671 
672  // LHS and RHS of the inner loop exit condition, e.g.,
673  // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
674  Value *Left = nullptr;
675  Value *Right = nullptr;
676 
677  // Check if V only involves inner loop induction variable.
678  // Return true if V is InnerInduction, or a cast from
679  // InnerInduction, or a binary operator that involves
680  // InnerInduction and a constant.
681  std::function<bool(Value *)> IsPathToInnerIndVar;
682  IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
683  if (llvm::is_contained(InnerLoopInductions, V))
684  return true;
685  if (isa<Constant>(V))
686  return true;
687  const Instruction *I = dyn_cast<Instruction>(V);
688  if (!I)
689  return false;
690  if (isa<CastInst>(I))
691  return IsPathToInnerIndVar(I->getOperand(0));
692  if (isa<BinaryOperator>(I))
693  return IsPathToInnerIndVar(I->getOperand(0)) &&
694  IsPathToInnerIndVar(I->getOperand(1));
695  return false;
696  };
697 
698  // In case of multiple inner loop indvars, it is okay if LHS and RHS
699  // are both inner indvar related variables.
700  if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
701  return true;
702 
703  // Otherwise we check if the cmp instruction compares an inner indvar
704  // related variable (Left) with a outer loop invariant (Right).
705  if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
706  Left = Op0;
707  Right = Op1;
708  } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
709  Left = Op1;
710  Right = Op0;
711  }
712 
713  if (Left == nullptr)
714  return false;
715 
716  const SCEV *S = SE->getSCEV(Right);
717  if (!SE->isLoopInvariant(S, OuterLoop))
718  return false;
719  }
720 
721  return true;
722 }
723 
724 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
725 // value.
726 static Value *followLCSSA(Value *SV) {
727  PHINode *PHI = dyn_cast<PHINode>(SV);
728  if (!PHI)
729  return SV;
730 
731  if (PHI->getNumIncomingValues() != 1)
732  return SV;
733  return followLCSSA(PHI->getIncomingValue(0));
734 }
735 
736 // Check V's users to see if it is involved in a reduction in L.
738  // Reduction variables cannot be constants.
739  if (isa<Constant>(V))
740  return nullptr;
741 
742  for (Value *User : V->users()) {
743  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
744  if (PHI->getNumIncomingValues() == 1)
745  continue;
747  if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
748  // Detect floating point reduction only when it can be reordered.
749  if (RD.getExactFPMathInst() != nullptr)
750  return nullptr;
751  return PHI;
752  }
753  return nullptr;
754  }
755  }
756 
757  return nullptr;
758 }
759 
760 bool LoopInterchangeLegality::findInductionAndReductions(
761  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
762  if (!L->getLoopLatch() || !L->getLoopPredecessor())
763  return false;
764  for (PHINode &PHI : L->getHeader()->phis()) {
767  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
768  Inductions.push_back(&PHI);
769  else {
770  // PHIs in inner loops need to be part of a reduction in the outer loop,
771  // discovered when checking the PHIs of the outer loop earlier.
772  if (!InnerLoop) {
773  if (!OuterInnerReductions.count(&PHI)) {
774  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
775  "across the outer loop.\n");
776  return false;
777  }
778  } else {
779  assert(PHI.getNumIncomingValues() == 2 &&
780  "Phis in loop header should have exactly 2 incoming values");
781  // Check if we have a PHI node in the outer loop that has a reduction
782  // result from the inner loop as an incoming value.
783  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
784  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
785  if (!InnerRedPhi ||
786  !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
787  LLVM_DEBUG(
788  dbgs()
789  << "Failed to recognize PHI as an induction or reduction.\n");
790  return false;
791  }
792  OuterInnerReductions.insert(&PHI);
793  OuterInnerReductions.insert(InnerRedPhi);
794  }
795  }
796  }
797  return true;
798 }
799 
800 // This function indicates the current limitations in the transform as a result
801 // of which we do not proceed.
802 bool LoopInterchangeLegality::currentLimitations() {
803  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
804 
805  // transform currently expects the loop latches to also be the exiting
806  // blocks.
807  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
808  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
809  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
810  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
811  LLVM_DEBUG(
812  dbgs() << "Loops where the latch is not the exiting block are not"
813  << " supported currently.\n");
814  ORE->emit([&]() {
815  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
816  OuterLoop->getStartLoc(),
817  OuterLoop->getHeader())
818  << "Loops where the latch is not the exiting block cannot be"
819  " interchange currently.";
820  });
821  return true;
822  }
823 
824  SmallVector<PHINode *, 8> Inductions;
825  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
826  LLVM_DEBUG(
827  dbgs() << "Only outer loops with induction or reduction PHI nodes "
828  << "are supported currently.\n");
829  ORE->emit([&]() {
830  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
831  OuterLoop->getStartLoc(),
832  OuterLoop->getHeader())
833  << "Only outer loops with induction or reduction PHI nodes can be"
834  " interchanged currently.";
835  });
836  return true;
837  }
838 
839  Inductions.clear();
840  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
841  LLVM_DEBUG(
842  dbgs() << "Only inner loops with induction or reduction PHI nodes "
843  << "are supported currently.\n");
844  ORE->emit([&]() {
845  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
846  InnerLoop->getStartLoc(),
847  InnerLoop->getHeader())
848  << "Only inner loops with induction or reduction PHI nodes can be"
849  " interchange currently.";
850  });
851  return true;
852  }
853 
854  // TODO: Triangular loops are not handled for now.
855  if (!isLoopStructureUnderstood()) {
856  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
857  ORE->emit([&]() {
858  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
859  InnerLoop->getStartLoc(),
860  InnerLoop->getHeader())
861  << "Inner loop structure not understood currently.";
862  });
863  return true;
864  }
865 
866  return false;
867 }
868 
869 bool LoopInterchangeLegality::findInductions(
870  Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
871  for (PHINode &PHI : L->getHeader()->phis()) {
873  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
874  Inductions.push_back(&PHI);
875  }
876  return !Inductions.empty();
877 }
878 
879 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
880 // users are either reduction PHIs or PHIs outside the outer loop (which means
881 // the we are only interested in the final value after the loop).
882 static bool
884  SmallPtrSetImpl<PHINode *> &Reductions) {
885  BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
886  for (PHINode &PHI : InnerExit->phis()) {
887  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
888  if (PHI.getNumIncomingValues() > 1)
889  return false;
890  if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
891  PHINode *PN = dyn_cast<PHINode>(U);
892  return !PN ||
893  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
894  })) {
895  return false;
896  }
897  }
898  return true;
899 }
900 
901 // We currently support LCSSA PHI nodes in the outer loop exit, if their
902 // incoming values do not come from the outer loop latch or if the
903 // outer loop latch has a single predecessor. In that case, the value will
904 // be available if both the inner and outer loop conditions are true, which
905 // will still be true after interchanging. If we have multiple predecessor,
906 // that may not be the case, e.g. because the outer loop latch may be executed
907 // if the inner loop is not executed.
908 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
909  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
910  for (PHINode &PHI : LoopNestExit->phis()) {
911  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
912  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
913  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
914  continue;
915 
916  // The incoming value is defined in the outer loop latch. Currently we
917  // only support that in case the outer loop latch has a single predecessor.
918  // This guarantees that the outer loop latch is executed if and only if
919  // the inner loop is executed (because tightlyNested() guarantees that the
920  // outer loop header only branches to the inner loop or the outer loop
921  // latch).
922  // FIXME: We could weaken this logic and allow multiple predecessors,
923  // if the values are produced outside the loop latch. We would need
924  // additional logic to update the PHI nodes in the exit block as
925  // well.
926  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
927  return false;
928  }
929  }
930  return true;
931 }
932 
933 // In case of multi-level nested loops, it may occur that lcssa phis exist in
934 // the latch of InnerLoop, i.e., when defs of the incoming values are further
935 // inside the loopnest. Sometimes those incoming values are not available
936 // after interchange, since the original inner latch will become the new outer
937 // latch which may have predecessor paths that do not include those incoming
938 // values.
939 // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
940 // multi-level loop nests.
941 static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
942  if (InnerLoop->getSubLoops().empty())
943  return true;
944  // If the original outer latch has only one predecessor, then values defined
945  // further inside the looploop, e.g., in the innermost loop, will be available
946  // at the new outer latch after interchange.
947  if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
948  return true;
949 
950  // The outer latch has more than one predecessors, i.e., the inner
951  // exit and the inner header.
952  // PHI nodes in the inner latch are lcssa phis where the incoming values
953  // are defined further inside the loopnest. Check if those phis are used
954  // in the original inner latch. If that is the case then bail out since
955  // those incoming values may not be available at the new outer latch.
956  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
957  for (PHINode &PHI : InnerLoopLatch->phis()) {
958  for (auto *U : PHI.users()) {
959  Instruction *UI = cast<Instruction>(U);
960  if (InnerLoopLatch == UI->getParent())
961  return false;
962  }
963  }
964  return true;
965 }
966 
967 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
968  unsigned OuterLoopId,
969  CharMatrix &DepMatrix) {
970  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
971  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
972  << " and OuterLoopId = " << OuterLoopId
973  << " due to dependence\n");
974  ORE->emit([&]() {
975  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
976  InnerLoop->getStartLoc(),
977  InnerLoop->getHeader())
978  << "Cannot interchange loops due to dependences.";
979  });
980  return false;
981  }
982  // Check if outer and inner loop contain legal instructions only.
983  for (auto *BB : OuterLoop->blocks())
984  for (Instruction &I : BB->instructionsWithoutDebug())
985  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
986  // readnone functions do not prevent interchanging.
987  if (CI->onlyWritesMemory())
988  continue;
989  LLVM_DEBUG(
990  dbgs() << "Loops with call instructions cannot be interchanged "
991  << "safely.");
992  ORE->emit([&]() {
993  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
994  CI->getDebugLoc(),
995  CI->getParent())
996  << "Cannot interchange loops due to call instruction.";
997  });
998 
999  return false;
1000  }
1001 
1002  if (!findInductions(InnerLoop, InnerLoopInductions)) {
1003  LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n");
1004  return false;
1005  }
1006 
1007  if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1008  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1009  ORE->emit([&]() {
1010  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1011  InnerLoop->getStartLoc(),
1012  InnerLoop->getHeader())
1013  << "Cannot interchange loops because unsupported PHI nodes found "
1014  "in inner loop latch.";
1015  });
1016  return false;
1017  }
1018 
1019  // TODO: The loops could not be interchanged due to current limitations in the
1020  // transform module.
1021  if (currentLimitations()) {
1022  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1023  return false;
1024  }
1025 
1026  // Check if the loops are tightly nested.
1027  if (!tightlyNested(OuterLoop, InnerLoop)) {
1028  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1029  ORE->emit([&]() {
1030  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1031  InnerLoop->getStartLoc(),
1032  InnerLoop->getHeader())
1033  << "Cannot interchange loops because they are not tightly "
1034  "nested.";
1035  });
1036  return false;
1037  }
1038 
1039  if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1040  OuterInnerReductions)) {
1041  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1042  ORE->emit([&]() {
1043  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1044  InnerLoop->getStartLoc(),
1045  InnerLoop->getHeader())
1046  << "Found unsupported PHI node in loop exit.";
1047  });
1048  return false;
1049  }
1050 
1051  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1052  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1053  ORE->emit([&]() {
1054  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1055  OuterLoop->getStartLoc(),
1056  OuterLoop->getHeader())
1057  << "Found unsupported PHI node in loop exit.";
1058  });
1059  return false;
1060  }
1061 
1062  return true;
1063 }
1064 
1065 int LoopInterchangeProfitability::getInstrOrderCost() {
1066  unsigned GoodOrder, BadOrder;
1067  BadOrder = GoodOrder = 0;
1068  for (BasicBlock *BB : InnerLoop->blocks()) {
1069  for (Instruction &Ins : *BB) {
1070  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1071  unsigned NumOp = GEP->getNumOperands();
1072  bool FoundInnerInduction = false;
1073  bool FoundOuterInduction = false;
1074  for (unsigned i = 0; i < NumOp; ++i) {
1075  // Skip operands that are not SCEV-able.
1076  if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1077  continue;
1078 
1079  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1080  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1081  if (!AR)
1082  continue;
1083 
1084  // If we find the inner induction after an outer induction e.g.
1085  // for(int i=0;i<N;i++)
1086  // for(int j=0;j<N;j++)
1087  // A[i][j] = A[i-1][j-1]+k;
1088  // then it is a good order.
1089  if (AR->getLoop() == InnerLoop) {
1090  // We found an InnerLoop induction after OuterLoop induction. It is
1091  // a good order.
1092  FoundInnerInduction = true;
1093  if (FoundOuterInduction) {
1094  GoodOrder++;
1095  break;
1096  }
1097  }
1098  // If we find the outer induction after an inner induction e.g.
1099  // for(int i=0;i<N;i++)
1100  // for(int j=0;j<N;j++)
1101  // A[j][i] = A[j-1][i-1]+k;
1102  // then it is a bad order.
1103  if (AR->getLoop() == OuterLoop) {
1104  // We found an OuterLoop induction after InnerLoop induction. It is
1105  // a bad order.
1106  FoundOuterInduction = true;
1107  if (FoundInnerInduction) {
1108  BadOrder++;
1109  break;
1110  }
1111  }
1112  }
1113  }
1114  }
1115  }
1116  return GoodOrder - BadOrder;
1117 }
1118 
1119 static bool isProfitableForVectorization(unsigned InnerLoopId,
1120  unsigned OuterLoopId,
1121  CharMatrix &DepMatrix) {
1122  // TODO: Improve this heuristic to catch more cases.
1123  // If the inner loop is loop independent or doesn't carry any dependency it is
1124  // profitable to move this to outer position.
1125  for (auto &Row : DepMatrix) {
1126  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1127  return false;
1128  // TODO: We need to improve this heuristic.
1129  if (Row[OuterLoopId] != '=')
1130  return false;
1131  }
1132  // If outer loop has dependence and inner loop is loop independent then it is
1133  // profitable to interchange to enable parallelism.
1134  // If there are no dependences, interchanging will not improve anything.
1135  return !DepMatrix.empty();
1136 }
1137 
1138 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1139  unsigned OuterLoopId,
1140  CharMatrix &DepMatrix) {
1141  // TODO: Add better profitability checks.
1142  // e.g
1143  // 1) Construct dependency matrix and move the one with no loop carried dep
1144  // inside to enable vectorization.
1145 
1146  // This is rough cost estimation algorithm. It counts the good and bad order
1147  // of induction variables in the instruction and allows reordering if number
1148  // of bad orders is more than good.
1149  int Cost = getInstrOrderCost();
1150  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1151  if (Cost < -LoopInterchangeCostThreshold)
1152  return true;
1153 
1154  // It is not profitable as per current cache profitability model. But check if
1155  // we can move this loop outside to improve parallelism.
1156  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1157  return true;
1158 
1159  ORE->emit([&]() {
1160  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1161  InnerLoop->getStartLoc(),
1162  InnerLoop->getHeader())
1163  << "Interchanging loops is too costly (cost="
1164  << ore::NV("Cost", Cost) << ", threshold="
1165  << ore::NV("Threshold", LoopInterchangeCostThreshold)
1166  << ") and it does not improve parallelism.";
1167  });
1168  return false;
1169 }
1170 
1171 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1172  Loop *InnerLoop) {
1173  for (Loop *L : *OuterLoop)
1174  if (L == InnerLoop) {
1175  OuterLoop->removeChildLoop(L);
1176  return;
1177  }
1178  llvm_unreachable("Couldn't find loop");
1179 }
1180 
1181 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1182 /// new inner and outer loop after interchanging: NewInner is the original
1183 /// outer loop and NewOuter is the original inner loop.
1184 ///
1185 /// Before interchanging, we have the following structure
1186 /// Outer preheader
1187 // Outer header
1188 // Inner preheader
1189 // Inner header
1190 // Inner body
1191 // Inner latch
1192 // outer bbs
1193 // Outer latch
1194 //
1195 // After interchanging:
1196 // Inner preheader
1197 // Inner header
1198 // Outer preheader
1199 // Outer header
1200 // Inner body
1201 // outer bbs
1202 // Outer latch
1203 // Inner latch
1204 void LoopInterchangeTransform::restructureLoops(
1205  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1206  BasicBlock *OrigOuterPreHeader) {
1207  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1208  // The original inner loop preheader moves from the new inner loop to
1209  // the parent loop, if there is one.
1210  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1211  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1212 
1213  // Switch the loop levels.
1214  if (OuterLoopParent) {
1215  // Remove the loop from its parent loop.
1216  removeChildLoop(OuterLoopParent, NewInner);
1217  removeChildLoop(NewInner, NewOuter);
1218  OuterLoopParent->addChildLoop(NewOuter);
1219  } else {
1220  removeChildLoop(NewInner, NewOuter);
1221  LI->changeTopLevelLoop(NewInner, NewOuter);
1222  }
1223  while (!NewOuter->isInnermost())
1224  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1225  NewOuter->addChildLoop(NewInner);
1226 
1227  // BBs from the original inner loop.
1228  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1229 
1230  // Add BBs from the original outer loop to the original inner loop (excluding
1231  // BBs already in inner loop)
1232  for (BasicBlock *BB : NewInner->blocks())
1233  if (LI->getLoopFor(BB) == NewInner)
1234  NewOuter->addBlockEntry(BB);
1235 
1236  // Now remove inner loop header and latch from the new inner loop and move
1237  // other BBs (the loop body) to the new inner loop.
1238  BasicBlock *OuterHeader = NewOuter->getHeader();
1239  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1240  for (BasicBlock *BB : OrigInnerBBs) {
1241  // Nothing will change for BBs in child loops.
1242  if (LI->getLoopFor(BB) != NewOuter)
1243  continue;
1244  // Remove the new outer loop header and latch from the new inner loop.
1245  if (BB == OuterHeader || BB == OuterLatch)
1246  NewInner->removeBlockFromLoop(BB);
1247  else
1248  LI->changeLoopFor(BB, NewInner);
1249  }
1250 
1251  // The preheader of the original outer loop becomes part of the new
1252  // outer loop.
1253  NewOuter->addBlockEntry(OrigOuterPreHeader);
1254  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1255 
1256  // Tell SE that we move the loops around.
1257  SE->forgetLoop(NewOuter);
1258  SE->forgetLoop(NewInner);
1259 }
1260 
1262  bool Transformed = false;
1263 
1264  if (InnerLoop->getSubLoops().empty()) {
1265  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1266  LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1267  auto &InductionPHIs = LIL.getInnerLoopInductions();
1268  if (InductionPHIs.empty()) {
1269  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1270  return false;
1271  }
1272 
1273  SmallVector<Instruction *, 8> InnerIndexVarList;
1274  for (PHINode *CurInductionPHI : InductionPHIs) {
1275  if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1276  InnerIndexVarList.push_back(
1277  dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1278  else
1279  InnerIndexVarList.push_back(
1280  dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1281  }
1282 
1283  // Create a new latch block for the inner loop. We split at the
1284  // current latch's terminator and then move the condition and all
1285  // operands that are not either loop-invariant or the induction PHI into the
1286  // new latch block.
1287  BasicBlock *NewLatch =
1288  SplitBlock(InnerLoop->getLoopLatch(),
1289  InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1290 
1292  unsigned i = 0;
1293  auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1294  for (; i < WorkList.size(); i++) {
1295  // Duplicate instruction and move it the new latch. Update uses that
1296  // have been moved.
1297  Instruction *NewI = WorkList[i]->clone();
1298  NewI->insertBefore(NewLatch->getFirstNonPHI());
1299  assert(!NewI->mayHaveSideEffects() &&
1300  "Moving instructions with side-effects may change behavior of "
1301  "the loop nest!");
1302  for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1303  Instruction *UserI = cast<Instruction>(U.getUser());
1304  if (!InnerLoop->contains(UserI->getParent()) ||
1305  UserI->getParent() == NewLatch ||
1306  llvm::is_contained(InductionPHIs, UserI))
1307  U.set(NewI);
1308  }
1309  // Add operands of moved instruction to the worklist, except if they are
1310  // outside the inner loop or are the induction PHI.
1311  for (Value *Op : WorkList[i]->operands()) {
1312  Instruction *OpI = dyn_cast<Instruction>(Op);
1313  if (!OpI ||
1314  this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1315  llvm::is_contained(InductionPHIs, OpI))
1316  continue;
1317  WorkList.insert(OpI);
1318  }
1319  }
1320  };
1321 
1322  // FIXME: Should we interchange when we have a constant condition?
1323  Instruction *CondI = dyn_cast<Instruction>(
1324  cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1325  ->getCondition());
1326  if (CondI)
1327  WorkList.insert(CondI);
1328  MoveInstructions();
1329  for (Instruction *InnerIndexVar : InnerIndexVarList)
1330  WorkList.insert(cast<Instruction>(InnerIndexVar));
1331  MoveInstructions();
1332 
1333  // Splits the inner loops phi nodes out into a separate basic block.
1334  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1335  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1336  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1337  }
1338 
1339  // Instructions in the original inner loop preheader may depend on values
1340  // defined in the outer loop header. Move them there, because the original
1341  // inner loop preheader will become the entry into the interchanged loop nest.
1342  // Currently we move all instructions and rely on LICM to move invariant
1343  // instructions outside the loop nest.
1344  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1345  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1346  if (InnerLoopPreHeader != OuterLoopHeader) {
1347  SmallPtrSet<Instruction *, 4> NeedsMoving;
1348  for (Instruction &I :
1349  make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1350  std::prev(InnerLoopPreHeader->end()))))
1351  I.moveBefore(OuterLoopHeader->getTerminator());
1352  }
1353 
1354  Transformed |= adjustLoopLinks();
1355  if (!Transformed) {
1356  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1357  return false;
1358  }
1359 
1360  return true;
1361 }
1362 
1363 /// \brief Move all instructions except the terminator from FromBB right before
1364 /// InsertBefore
1365 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1366  auto &ToList = InsertBefore->getParent()->getInstList();
1367  auto &FromList = FromBB->getInstList();
1368 
1369  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1370  FromBB->getTerminator()->getIterator());
1371 }
1372 
1373 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1374 static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1375  // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1376  // from BB1 afterwards.
1377  auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1378  SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1379  for (Instruction *I : TempInstrs)
1380  I->removeFromParent();
1381 
1382  // Move instructions from BB2 to BB1.
1383  moveBBContents(BB2, BB1->getTerminator());
1384 
1385  // Move instructions from TempInstrs to BB2.
1386  for (Instruction *I : TempInstrs)
1387  I->insertBefore(BB2->getTerminator());
1388 }
1389 
1390 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1391 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1392 // \p OldBB is exactly once in BI's successor list.
1393 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1394  BasicBlock *NewBB,
1395  std::vector<DominatorTree::UpdateType> &DTUpdates,
1396  bool MustUpdateOnce = true) {
1397  assert((!MustUpdateOnce ||
1399  [OldBB](BasicBlock *BB) {
1400  return BB == OldBB;
1401  }) == 1) && "BI must jump to OldBB exactly once.");
1402  bool Changed = false;
1403  for (Use &Op : BI->operands())
1404  if (Op == OldBB) {
1405  Op.set(NewBB);
1406  Changed = true;
1407  }
1408 
1409  if (Changed) {
1410  DTUpdates.push_back(
1412  DTUpdates.push_back(
1414  }
1415  assert(Changed && "Expected a successor to be updated");
1416 }
1417 
1418 // Move Lcssa PHIs to the right place.
1419 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1420  BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1421  BasicBlock *OuterLatch, BasicBlock *OuterExit,
1422  Loop *InnerLoop, LoopInfo *LI) {
1423 
1424  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1425  // defined either in the header or latch. Those blocks will become header and
1426  // latch of the new outer loop, and the only possible users can PHI nodes
1427  // in the exit block of the loop nest or the outer loop header (reduction
1428  // PHIs, in that case, the incoming value must be defined in the inner loop
1429  // header). We can just substitute the user with the incoming value and remove
1430  // the PHI.
1431  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1432  assert(P.getNumIncomingValues() == 1 &&
1433  "Only loops with a single exit are supported!");
1434 
1435  // Incoming values are guaranteed be instructions currently.
1436  auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1437  // In case of multi-level nested loops, follow LCSSA to find the incoming
1438  // value defined from the innermost loop.
1439  auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1440  // Skip phis with incoming values from the inner loop body, excluding the
1441  // header and latch.
1442  if (IncIInnerMost->getParent() != InnerLatch &&
1443  IncIInnerMost->getParent() != InnerHeader)
1444  continue;
1445 
1446  assert(all_of(P.users(),
1447  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1448  return (cast<PHINode>(U)->getParent() == OuterHeader &&
1449  IncI->getParent() == InnerHeader) ||
1450  cast<PHINode>(U)->getParent() == OuterExit;
1451  }) &&
1452  "Can only replace phis iff the uses are in the loop nest exit or "
1453  "the incoming value is defined in the inner header (it will "
1454  "dominate all loop blocks after interchanging)");
1455  P.replaceAllUsesWith(IncI);
1456  P.eraseFromParent();
1457  }
1458 
1459  SmallVector<PHINode *, 8> LcssaInnerExit;
1460  for (PHINode &P : InnerExit->phis())
1461  LcssaInnerExit.push_back(&P);
1462 
1463  SmallVector<PHINode *, 8> LcssaInnerLatch;
1464  for (PHINode &P : InnerLatch->phis())
1465  LcssaInnerLatch.push_back(&P);
1466 
1467  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1468  // If a PHI node has users outside of InnerExit, it has a use outside the
1469  // interchanged loop and we have to preserve it. We move these to
1470  // InnerLatch, which will become the new exit block for the innermost
1471  // loop after interchanging.
1472  for (PHINode *P : LcssaInnerExit)
1473  P->moveBefore(InnerLatch->getFirstNonPHI());
1474 
1475  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1476  // and we have to move them to the new inner latch.
1477  for (PHINode *P : LcssaInnerLatch)
1478  P->moveBefore(InnerExit->getFirstNonPHI());
1479 
1480  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1481  // incoming values defined in the outer loop, we have to add a new PHI
1482  // in the inner loop latch, which became the exit block of the outer loop,
1483  // after interchanging.
1484  if (OuterExit) {
1485  for (PHINode &P : OuterExit->phis()) {
1486  if (P.getNumIncomingValues() != 1)
1487  continue;
1488  // Skip Phis with incoming values defined in the inner loop. Those should
1489  // already have been updated.
1490  auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1491  if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1492  continue;
1493 
1494  PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1495  NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1496  NewPhi->setIncomingBlock(0, OuterLatch);
1497  // We might have incoming edges from other BBs, i.e., the original outer
1498  // header.
1499  for (auto *Pred : predecessors(InnerLatch)) {
1500  if (Pred == OuterLatch)
1501  continue;
1502  NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1503  }
1504  NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1505  P.setIncomingValue(0, NewPhi);
1506  }
1507  }
1508 
1509  // Now adjust the incoming blocks for the LCSSA PHIs.
1510  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1511  // with the new latch.
1512  InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1513 }
1514 
1515 bool LoopInterchangeTransform::adjustLoopBranches() {
1516  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1517  std::vector<DominatorTree::UpdateType> DTUpdates;
1518 
1519  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1520  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1521 
1522  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1523  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1524  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1525  // Ensure that both preheaders do not contain PHI nodes and have single
1526  // predecessors. This allows us to move them easily. We use
1527  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1528  // preheaders do not satisfy those conditions.
1529  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1530  !OuterLoopPreHeader->getUniquePredecessor())
1531  OuterLoopPreHeader =
1532  InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1533  if (InnerLoopPreHeader == OuterLoop->getHeader())
1534  InnerLoopPreHeader =
1535  InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1536 
1537  // Adjust the loop preheader
1538  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1539  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1540  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1541  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1542  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1543  BasicBlock *InnerLoopLatchPredecessor =
1544  InnerLoopLatch->getUniquePredecessor();
1545  BasicBlock *InnerLoopLatchSuccessor;
1546  BasicBlock *OuterLoopLatchSuccessor;
1547 
1548  BranchInst *OuterLoopLatchBI =
1549  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1550  BranchInst *InnerLoopLatchBI =
1551  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1552  BranchInst *OuterLoopHeaderBI =
1553  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1554  BranchInst *InnerLoopHeaderBI =
1555  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1556 
1557  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1558  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1559  !InnerLoopHeaderBI)
1560  return false;
1561 
1562  BranchInst *InnerLoopLatchPredecessorBI =
1563  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1564  BranchInst *OuterLoopPredecessorBI =
1565  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1566 
1567  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1568  return false;
1569  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1570  if (!InnerLoopHeaderSuccessor)
1571  return false;
1572 
1573  // Adjust Loop Preheader and headers.
1574  // The branches in the outer loop predecessor and the outer loop header can
1575  // be unconditional branches or conditional branches with duplicates. Consider
1576  // this when updating the successors.
1577  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1578  InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1579  // The outer loop header might or might not branch to the outer latch.
1580  // We are guaranteed to branch to the inner loop preheader.
1581  if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1582  // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1583  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1584  DTUpdates,
1585  /*MustUpdateOnce=*/false);
1586  }
1587  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1588  InnerLoopHeaderSuccessor, DTUpdates,
1589  /*MustUpdateOnce=*/false);
1590 
1591  // Adjust reduction PHI's now that the incoming block has changed.
1592  InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1593  OuterLoopHeader);
1594 
1595  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1596  OuterLoopPreHeader, DTUpdates);
1597 
1598  // -------------Adjust loop latches-----------
1599  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1600  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1601  else
1602  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1603 
1604  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1605  InnerLoopLatchSuccessor, DTUpdates);
1606 
1607  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1608  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1609  else
1610  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1611 
1612  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1613  OuterLoopLatchSuccessor, DTUpdates);
1614  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1615  DTUpdates);
1616 
1617  DT->applyUpdates(DTUpdates);
1618  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1619  OuterLoopPreHeader);
1620 
1621  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1622  OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1623  InnerLoop, LI);
1624  // For PHIs in the exit block of the outer loop, outer's latch has been
1625  // replaced by Inners'.
1626  OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1627 
1628  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1629  // Now update the reduction PHIs in the inner and outer loop headers.
1630  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1631  for (PHINode &PHI : InnerLoopHeader->phis())
1632  if (OuterInnerReductions.contains(&PHI))
1633  InnerLoopPHIs.push_back(&PHI);
1634 
1635  for (PHINode &PHI : OuterLoopHeader->phis())
1636  if (OuterInnerReductions.contains(&PHI))
1637  OuterLoopPHIs.push_back(&PHI);
1638 
1639  // Now move the remaining reduction PHIs from outer to inner loop header and
1640  // vice versa. The PHI nodes must be part of a reduction across the inner and
1641  // outer loop and all the remains to do is and updating the incoming blocks.
1642  for (PHINode *PHI : OuterLoopPHIs) {
1643  LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1644  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1645  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1646  }
1647  for (PHINode *PHI : InnerLoopPHIs) {
1648  LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1649  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1650  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1651  }
1652 
1653  // Update the incoming blocks for moved PHI nodes.
1654  OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1655  OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1656  InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1657  InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1658 
1659  // Values defined in the outer loop header could be used in the inner loop
1660  // latch. In that case, we need to create LCSSA phis for them, because after
1661  // interchanging they will be defined in the new inner loop and used in the
1662  // new outer loop.
1663  IRBuilder<> Builder(OuterLoopHeader->getContext());
1664  SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1665  for (Instruction &I :
1666  make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1667  MayNeedLCSSAPhis.push_back(&I);
1668  formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1669 
1670  return true;
1671 }
1672 
1673 bool LoopInterchangeTransform::adjustLoopLinks() {
1674  // Adjust all branches in the inner and outer loop.
1675  bool Changed = adjustLoopBranches();
1676  if (Changed) {
1677  // We have interchanged the preheaders so we need to interchange the data in
1678  // the preheaders as well. This is because the content of the inner
1679  // preheader was previously executed inside the outer loop.
1680  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1681  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1682  swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1683  }
1684  return Changed;
1685 }
1686 
1687 namespace {
1688 /// Main LoopInterchange Pass.
1689 struct LoopInterchangeLegacyPass : public LoopPass {
1690  static char ID;
1691 
1692  LoopInterchangeLegacyPass() : LoopPass(ID) {
1694  }
1695 
1696  void getAnalysisUsage(AnalysisUsage &AU) const override {
1699 
1701  }
1702 
1703  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1704  if (skipLoop(L))
1705  return false;
1706 
1707  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1708  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1709  auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1710  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1711  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1712 
1713  return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1714  }
1715 };
1716 } // namespace
1717 
1719 
1720 INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
1721  "Interchanges loops for cache reuse", false, false)
1725 
1726 INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
1727  "Interchanges loops for cache reuse", false, false)
1728 
1730  return new LoopInterchangeLegacyPass();
1731 }
1732 
1734  LoopAnalysisManager &AM,
1736  LPMUpdater &U) {
1737  Function &F = *LN.getParent();
1738 
1739  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1741  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1742  return PreservedAnalyses::all();
1744 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::initializeLoopInterchangeLegacyPassPass
void initializeLoopInterchangeLegacyPassPass(PassRegistry &)
isLegalToInterChangeLoops
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Definition: LoopInterchange.cpp:253
moveBBContents
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
Definition: LoopInterchange.cpp:1365
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:299
followLCSSA
static Value * followLCSSA(Value *SV)
Definition: LoopInterchange.cpp:726
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:137
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::BranchInst::successors
iterator_range< succ_op_iterator > successors()
Definition: Instructions.h:3196
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:81
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2737
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::Dependence::DVEntry::LE
@ LE
Definition: DependenceAnalysis.h:88
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:976
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::cfg::UpdateKind::Insert
@ Insert
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:138
llvm::Dependence::DVEntry::GE
@ GE
Definition: DependenceAnalysis.h:91
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopInterchange.cpp:54
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector< Loop *, 8 >
Statistic.h
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:194
ErrorHandling.h
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:630
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1024
moveLCSSAPhis
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
Definition: LoopInterchange.cpp:1419
OptimizationRemarkEmitter.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:83
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:144
Right
Vector Shift Left Right
Definition: README_P9.txt:118
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
isOuterMostDepPositive
static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:192
ScalarEvolution.h
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::cfg::UpdateKind::Delete
@ Delete
interchange
loop interchange
Definition: LoopInterchange.cpp:1726
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:170
findInnerReductionPhi
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
Definition: LoopInterchange.cpp:737
llvm::SmallPtrSet< PHINode *, 4 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::codeview::VFTableSlotKind::Outer
@ Outer
STLExtras.h
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:69
populateWorklist
static void populateWorklist(Loop &L, LoopVector &LoopList)
Definition: LoopInterchange.cpp:269
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2750
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1716
loops
loops
Definition: LoopInfo.cpp:1176
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.cpp:695
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:299
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AlignStyle::Left
@ Left
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Dependence::DVEntry::LT
@ LT
Definition: DependenceAnalysis.h:86
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:159
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:306
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::createLoopInterchangePass
Pass * createLoopInterchangePass()
Definition: LoopInterchange.cpp:1729
Constants.h
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2747
areInnerLoopExitPHIsSupported
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
Definition: LoopInterchange.cpp:883
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
InstrTypes.h
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition: IVDescriptors.h:210
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:411
MaxLoopNestDepth
static const unsigned MaxLoopNestDepth
Definition: LoopInterchange.cpp:75
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
swapBBContents
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
Definition: LoopInterchange.cpp:1374
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
false
Definition: StackSlotColoring.cpp:141
llvm::Dependence::DVEntry::GT
@ GT
Definition: DependenceAnalysis.h:89
populateDependencyMatrix
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI)
Definition: LoopInterchange.cpp:87
llvm::Instruction
Definition: Instruction.h:42
isProfitableForVectorization
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
Definition: LoopInterchange.cpp:1119
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:355
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:76
LoopInterchangeCostThreshold
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchangeLegacyPass
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
areInnerLoopLatchPHIsSupported
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:941
llvm::LoopBase::removeBlockFromLoop
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
Definition: LoopInfo.h:476
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2743
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:720
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3167
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4406
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2761
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:118
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:269
llvm::Dependence::DVEntry::EQ
@ EQ
Definition: DependenceAnalysis.h:87
llvm::LoopPass
Definition: LoopPass.h:28
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2786
containsNoDependence
static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:205
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2801
llvm::LoopNest::skipEmptyBlockUntil
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
Definition: LoopNestAnalysis.cpp:288
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:986
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:916
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1682
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
validDepInterchange
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep)
Definition: LoopInterchange.cpp:215
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
LoopNestAnalysis.h
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
LoopInterchange.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:862
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:420
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1723
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:269
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:176
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13240
areOuterLoopExitPHIsSupported
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:908
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1504
llvm::BasicBlock::replacePhiUsesWith
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:443
llvm::map_range
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:327
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8031
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
interChangeDependencies
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
Definition: LoopInterchange.cpp:184
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:458
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
reuse
loop Interchanges loops for cache reuse
Definition: LoopInterchange.cpp:1727
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Direction
Loop::LoopBounds::Direction Direction
Definition: LoopInfo.cpp:226
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4292
llvm::LoopBase::addBlockEntry
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:439
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:266
updateSuccessor
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
Definition: LoopInterchange.cpp:1393
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:117
ScalarEvolutionExpressions.h
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:364
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3527
llvm::formLCSSAForInstructions
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:78
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::ConstantInt::isNegative
bool isNegative() const
Definition: Constants.h:189
SmallVector.h
User.h
llvm::LoopInfoBase::changeTopLevelLoop
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
Definition: LoopInfo.h:1034
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::PHINode
Definition: Instructions.h:2651
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::LoopInterchangePass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopInterchange.cpp:1733
llvm::SmallPtrSetImpl< PHINode * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:839
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
MaxMemInstrCount
static const unsigned MaxMemInstrCount
Definition: LoopInterchange.cpp:72
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3086
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7901
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:837
Value.h
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3165
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3179
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38