LLVM  16.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/LoopPass.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/User.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Debug.h"
46 #include "llvm/Transforms/Scalar.h"
50 #include <cassert>
51 #include <utility>
52 #include <vector>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "loop-interchange"
57 
58 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
59 
61  "loop-interchange-threshold", cl::init(0), cl::Hidden,
62  cl::desc("Interchange if you gain more than this number"));
63 
64 namespace {
65 
66 using LoopVector = SmallVector<Loop *, 8>;
67 
68 // TODO: Check if we can use a sparse matrix here.
69 using CharMatrix = std::vector<std::vector<char>>;
70 
71 } // end anonymous namespace
72 
73 // Maximum number of dependencies that can be handled in the dependency matrix.
74 static const unsigned MaxMemInstrCount = 100;
75 
76 // Maximum loop depth supported.
77 static const unsigned MaxLoopNestDepth = 10;
78 
79 #ifdef DUMP_DEP_MATRICIES
80 static void printDepMatrix(CharMatrix &DepMatrix) {
81  for (auto &Row : DepMatrix) {
82  for (auto D : Row)
83  LLVM_DEBUG(dbgs() << D << " ");
84  LLVM_DEBUG(dbgs() << "\n");
85  }
86 }
87 #endif
88 
89 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90  Loop *L, DependenceInfo *DI,
91  ScalarEvolution *SE) {
92  using ValueVector = SmallVector<Value *, 16>;
93 
94  ValueVector MemInstr;
95 
96  // For each block.
97  for (BasicBlock *BB : L->blocks()) {
98  // Scan the BB and collect legal loads and stores.
99  for (Instruction &I : *BB) {
100  if (!isa<Instruction>(I))
101  return false;
102  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
103  if (!Ld->isSimple())
104  return false;
105  MemInstr.push_back(&I);
106  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
107  if (!St->isSimple())
108  return false;
109  MemInstr.push_back(&I);
110  }
111  }
112  }
113 
114  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
115  << " Loads and Stores to analyze\n");
116 
117  ValueVector::iterator I, IE, J, JE;
118 
119  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
120  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
121  std::vector<char> Dep;
122  Instruction *Src = cast<Instruction>(*I);
123  Instruction *Dst = cast<Instruction>(*J);
124  // Ignore Input dependencies.
125  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
126  continue;
127  // Track Output, Flow, and Anti dependencies.
128  if (auto D = DI->depends(Src, Dst, true)) {
129  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
130  // If the direction vector is negative, normalize it to
131  // make it non-negative.
132  if (D->normalize(SE))
133  LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
134  LLVM_DEBUG(StringRef DepType =
135  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
136  dbgs() << "Found " << DepType
137  << " dependency between Src and Dst\n"
138  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
139  unsigned Levels = D->getLevels();
140  char Direction;
141  for (unsigned II = 1; II <= Levels; ++II) {
142  if (D->isScalar(II)) {
143  Direction = 'S';
144  Dep.push_back(Direction);
145  } else {
146  unsigned Dir = D->getDirection(II);
147  if (Dir == Dependence::DVEntry::LT ||
149  Direction = '<';
150  else if (Dir == Dependence::DVEntry::GT ||
152  Direction = '>';
153  else if (Dir == Dependence::DVEntry::EQ)
154  Direction = '=';
155  else
156  Direction = '*';
157  Dep.push_back(Direction);
158  }
159  }
160  while (Dep.size() != Level) {
161  Dep.push_back('I');
162  }
163 
164  DepMatrix.push_back(Dep);
165  if (DepMatrix.size() > MaxMemInstrCount) {
166  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
167  << " dependencies inside loop\n");
168  return false;
169  }
170  }
171  }
172  }
173 
174  return true;
175 }
176 
177 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
178 // matrix by exchanging the two columns.
179 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
180  unsigned ToIndx) {
181  for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
182  std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
183 }
184 
185 // After interchanging, check if the direction vector is valid.
186 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
187 // if the direction matrix, after the same permutation is applied to its
188 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
189 static bool isLexicographicallyPositive(std::vector<char> &DV) {
190  for (unsigned Level = 0; Level < DV.size(); ++Level) {
191  unsigned char Direction = DV[Level];
192  if (Direction == '<')
193  return true;
194  if (Direction == '>' || Direction == '*')
195  return false;
196  }
197  return true;
198 }
199 
200 // Checks if it is legal to interchange 2 loops.
201 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
202  unsigned InnerLoopId,
203  unsigned OuterLoopId) {
204  unsigned NumRows = DepMatrix.size();
205  std::vector<char> Cur;
206  // For each row check if it is valid to interchange.
207  for (unsigned Row = 0; Row < NumRows; ++Row) {
208  // Create temporary DepVector check its lexicographical order
209  // before and after swapping OuterLoop vs InnerLoop
210  Cur = DepMatrix[Row];
211  if (!isLexicographicallyPositive(Cur))
212  return false;
213  std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
214  if (!isLexicographicallyPositive(Cur))
215  return false;
216  }
217  return true;
218 }
219 
220 static void populateWorklist(Loop &L, LoopVector &LoopList) {
221  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
222  << L.getHeader()->getParent()->getName() << " Loop: %"
223  << L.getHeader()->getName() << '\n');
224  assert(LoopList.empty() && "LoopList should initially be empty!");
225  Loop *CurrentLoop = &L;
226  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
227  while (!Vec->empty()) {
228  // The current loop has multiple subloops in it hence it is not tightly
229  // nested.
230  // Discard all loops above it added into Worklist.
231  if (Vec->size() != 1) {
232  LoopList = {};
233  return;
234  }
235 
236  LoopList.push_back(CurrentLoop);
237  CurrentLoop = Vec->front();
238  Vec = &CurrentLoop->getSubLoops();
239  }
240  LoopList.push_back(CurrentLoop);
241 }
242 
243 namespace {
244 
245 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
246 class LoopInterchangeLegality {
247 public:
248  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
250  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
251 
252  /// Check if the loops can be interchanged.
253  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
254  CharMatrix &DepMatrix);
255 
256  /// Discover induction PHIs in the header of \p L. Induction
257  /// PHIs are added to \p Inductions.
258  bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
259 
260  /// Check if the loop structure is understood. We do not handle triangular
261  /// loops for now.
262  bool isLoopStructureUnderstood();
263 
264  bool currentLimitations();
265 
266  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
267  return OuterInnerReductions;
268  }
269 
270  const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
271  return InnerLoopInductions;
272  }
273 
274 private:
275  bool tightlyNested(Loop *Outer, Loop *Inner);
276  bool containsUnsafeInstructions(BasicBlock *BB);
277 
278  /// Discover induction and reduction PHIs in the header of \p L. Induction
279  /// PHIs are added to \p Inductions, reductions are added to
280  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
281  /// to be passed as \p InnerLoop.
282  bool findInductionAndReductions(Loop *L,
283  SmallVector<PHINode *, 8> &Inductions,
284  Loop *InnerLoop);
285 
286  Loop *OuterLoop;
287  Loop *InnerLoop;
288 
289  ScalarEvolution *SE;
290 
291  /// Interface to emit optimization remarks.
293 
294  /// Set of reduction PHIs taking part of a reduction across the inner and
295  /// outer loop.
296  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
297 
298  /// Set of inner loop induction PHIs
299  SmallVector<PHINode *, 8> InnerLoopInductions;
300 };
301 
302 /// LoopInterchangeProfitability checks if it is profitable to interchange the
303 /// loop.
304 class LoopInterchangeProfitability {
305 public:
306  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
308  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
309 
310  /// Check if the loop interchange is profitable.
311  bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
312  unsigned InnerLoopId, unsigned OuterLoopId,
313  CharMatrix &DepMatrix,
314  const DenseMap<const Loop *, unsigned> &CostMap);
315 
316 private:
317  int getInstrOrderCost();
318 
319  Loop *OuterLoop;
320  Loop *InnerLoop;
321 
322  /// Scev analysis.
323  ScalarEvolution *SE;
324 
325  /// Interface to emit optimization remarks.
327 };
328 
329 /// LoopInterchangeTransform interchanges the loop.
330 class LoopInterchangeTransform {
331 public:
332  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
333  LoopInfo *LI, DominatorTree *DT,
334  const LoopInterchangeLegality &LIL)
335  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
336 
337  /// Interchange OuterLoop and InnerLoop.
338  bool transform();
339  void restructureLoops(Loop *NewInner, Loop *NewOuter,
340  BasicBlock *OrigInnerPreHeader,
341  BasicBlock *OrigOuterPreHeader);
342  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
343 
344 private:
345  bool adjustLoopLinks();
346  bool adjustLoopBranches();
347 
348  Loop *OuterLoop;
349  Loop *InnerLoop;
350 
351  /// Scev analysis.
352  ScalarEvolution *SE;
353 
354  LoopInfo *LI;
355  DominatorTree *DT;
356 
357  const LoopInterchangeLegality &LIL;
358 };
359 
360 struct LoopInterchange {
361  ScalarEvolution *SE = nullptr;
362  LoopInfo *LI = nullptr;
363  DependenceInfo *DI = nullptr;
364  DominatorTree *DT = nullptr;
365  std::unique_ptr<CacheCost> CC = nullptr;
366 
367  /// Interface to emit optimization remarks.
369 
370  LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
371  DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
373  : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
374 
375  bool run(Loop *L) {
376  if (L->getParentLoop())
377  return false;
378  SmallVector<Loop *, 8> LoopList;
379  populateWorklist(*L, LoopList);
380  return processLoopList(LoopList);
381  }
382 
383  bool run(LoopNest &LN) {
384  SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
385  for (unsigned I = 1; I < LoopList.size(); ++I)
386  if (LoopList[I]->getParentLoop() != LoopList[I - 1])
387  return false;
388  return processLoopList(LoopList);
389  }
390 
391  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
392  for (Loop *L : LoopList) {
393  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
394  if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
395  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
396  return false;
397  }
398  if (L->getNumBackEdges() != 1) {
399  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
400  return false;
401  }
402  if (!L->getExitingBlock()) {
403  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
404  return false;
405  }
406  }
407  return true;
408  }
409 
410  unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
411  // TODO: Add a better heuristic to select the loop to be interchanged based
412  // on the dependence matrix. Currently we select the innermost loop.
413  return LoopList.size() - 1;
414  }
415 
416  bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
417  bool Changed = false;
418  unsigned LoopNestDepth = LoopList.size();
419  if (LoopNestDepth < 2) {
420  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
421  return false;
422  }
423  if (LoopNestDepth > MaxLoopNestDepth) {
424  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
425  << MaxLoopNestDepth << "\n");
426  return false;
427  }
428  if (!isComputableLoopNest(LoopList)) {
429  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
430  return false;
431  }
432 
433  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
434  << "\n");
435 
436  CharMatrix DependencyMatrix;
437  Loop *OuterMostLoop = *(LoopList.begin());
438  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
439  OuterMostLoop, DI, SE)) {
440  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
441  return false;
442  }
443 #ifdef DUMP_DEP_MATRICIES
444  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
445  printDepMatrix(DependencyMatrix);
446 #endif
447 
448  // Get the Outermost loop exit.
449  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
450  if (!LoopNestExit) {
451  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
452  return false;
453  }
454 
455  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
456  // Obtain the loop vector returned from loop cache analysis beforehand,
457  // and put each <Loop, index> pair into a map for constant time query
458  // later. Indices in loop vector reprsent the optimal order of the
459  // corresponding loop, e.g., given a loopnest with depth N, index 0
460  // indicates the loop should be placed as the outermost loop and index N
461  // indicates the loop should be placed as the innermost loop.
462  //
463  // For the old pass manager CacheCost would be null.
465  if (CC != nullptr) {
466  const auto &LoopCosts = CC->getLoopCosts();
467  for (unsigned i = 0; i < LoopCosts.size(); i++) {
468  CostMap[LoopCosts[i].first] = i;
469  }
470  }
471  // We try to achieve the globally optimal memory access for the loopnest,
472  // and do interchange based on a bubble-sort fasion. We start from
473  // the innermost loop, move it outwards to the best possible position
474  // and repeat this process.
475  for (unsigned j = SelecLoopId; j > 0; j--) {
476  bool ChangedPerIter = false;
477  for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
478  bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
479  DependencyMatrix, CostMap);
480  if (!Interchanged)
481  continue;
482  // Loops interchanged, update LoopList accordingly.
483  std::swap(LoopList[i - 1], LoopList[i]);
484  // Update the DependencyMatrix
485  interChangeDependencies(DependencyMatrix, i, i - 1);
486 #ifdef DUMP_DEP_MATRICIES
487  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
488  printDepMatrix(DependencyMatrix);
489 #endif
490  ChangedPerIter |= Interchanged;
491  Changed |= Interchanged;
492  }
493  // Early abort if there was no interchange during an entire round of
494  // moving loops outwards.
495  if (!ChangedPerIter)
496  break;
497  }
498  return Changed;
499  }
500 
501  bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
502  unsigned OuterLoopId,
503  std::vector<std::vector<char>> &DependencyMatrix,
504  const DenseMap<const Loop *, unsigned> &CostMap) {
505  LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
506  << " and OuterLoopId = " << OuterLoopId << "\n");
507  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
508  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
509  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
510  return false;
511  }
512  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
513  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
514  if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
515  DependencyMatrix, CostMap)) {
516  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
517  return false;
518  }
519 
520  ORE->emit([&]() {
521  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
522  InnerLoop->getStartLoc(),
523  InnerLoop->getHeader())
524  << "Loop interchanged with enclosing loop.";
525  });
526 
527  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
528  LIT.transform();
529  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
530  LoopsInterchanged++;
531 
532  llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
533  return true;
534  }
535 };
536 
537 } // end anonymous namespace
538 
539 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
540  return any_of(*BB, [](const Instruction &I) {
541  return I.mayHaveSideEffects() || I.mayReadFromMemory();
542  });
543 }
544 
545 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
546  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
547  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
548  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
549 
550  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
551 
552  // A perfectly nested loop will not have any branch in between the outer and
553  // inner block i.e. outer header will branch to either inner preheader and
554  // outerloop latch.
555  BranchInst *OuterLoopHeaderBI =
556  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
557  if (!OuterLoopHeaderBI)
558  return false;
559 
560  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
561  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
562  Succ != OuterLoopLatch)
563  return false;
564 
565  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
566  // We do not have any basic block in between now make sure the outer header
567  // and outer loop latch doesn't contain any unsafe instructions.
568  if (containsUnsafeInstructions(OuterLoopHeader) ||
569  containsUnsafeInstructions(OuterLoopLatch))
570  return false;
571 
572  // Also make sure the inner loop preheader does not contain any unsafe
573  // instructions. Note that all instructions in the preheader will be moved to
574  // the outer loop header when interchanging.
575  if (InnerLoopPreHeader != OuterLoopHeader &&
576  containsUnsafeInstructions(InnerLoopPreHeader))
577  return false;
578 
579  BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
580  // Ensure the inner loop exit block flows to the outer loop latch possibly
581  // through empty blocks.
582  const BasicBlock &SuccInner =
583  LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
584  if (&SuccInner != OuterLoopLatch) {
585  LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
586  << " does not lead to the outer loop latch.\n";);
587  return false;
588  }
589  // The inner loop exit block does flow to the outer loop latch and not some
590  // other BBs, now make sure it contains safe instructions, since it will be
591  // moved into the (new) inner loop after interchange.
592  if (containsUnsafeInstructions(InnerLoopExit))
593  return false;
594 
595  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
596  // We have a perfect loop nest.
597  return true;
598 }
599 
600 bool LoopInterchangeLegality::isLoopStructureUnderstood() {
601  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
602  for (PHINode *InnerInduction : InnerLoopInductions) {
603  unsigned Num = InnerInduction->getNumOperands();
604  for (unsigned i = 0; i < Num; ++i) {
605  Value *Val = InnerInduction->getOperand(i);
606  if (isa<Constant>(Val))
607  continue;
608  Instruction *I = dyn_cast<Instruction>(Val);
609  if (!I)
610  return false;
611  // TODO: Handle triangular loops.
612  // e.g. for(int i=0;i<N;i++)
613  // for(int j=i;j<N;j++)
614  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
615  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
616  InnerLoopPreheader &&
617  !OuterLoop->isLoopInvariant(I)) {
618  return false;
619  }
620  }
621  }
622 
623  // TODO: Handle triangular loops of another form.
624  // e.g. for(int i=0;i<N;i++)
625  // for(int j=0;j<i;j++)
626  // or,
627  // for(int i=0;i<N;i++)
628  // for(int j=0;j*i<N;j++)
629  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
630  BranchInst *InnerLoopLatchBI =
631  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
632  if (!InnerLoopLatchBI->isConditional())
633  return false;
634  if (CmpInst *InnerLoopCmp =
635  dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
636  Value *Op0 = InnerLoopCmp->getOperand(0);
637  Value *Op1 = InnerLoopCmp->getOperand(1);
638 
639  // LHS and RHS of the inner loop exit condition, e.g.,
640  // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
641  Value *Left = nullptr;
642  Value *Right = nullptr;
643 
644  // Check if V only involves inner loop induction variable.
645  // Return true if V is InnerInduction, or a cast from
646  // InnerInduction, or a binary operator that involves
647  // InnerInduction and a constant.
648  std::function<bool(Value *)> IsPathToInnerIndVar;
649  IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
650  if (llvm::is_contained(InnerLoopInductions, V))
651  return true;
652  if (isa<Constant>(V))
653  return true;
654  const Instruction *I = dyn_cast<Instruction>(V);
655  if (!I)
656  return false;
657  if (isa<CastInst>(I))
658  return IsPathToInnerIndVar(I->getOperand(0));
659  if (isa<BinaryOperator>(I))
660  return IsPathToInnerIndVar(I->getOperand(0)) &&
661  IsPathToInnerIndVar(I->getOperand(1));
662  return false;
663  };
664 
665  // In case of multiple inner loop indvars, it is okay if LHS and RHS
666  // are both inner indvar related variables.
667  if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
668  return true;
669 
670  // Otherwise we check if the cmp instruction compares an inner indvar
671  // related variable (Left) with a outer loop invariant (Right).
672  if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
673  Left = Op0;
674  Right = Op1;
675  } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
676  Left = Op1;
677  Right = Op0;
678  }
679 
680  if (Left == nullptr)
681  return false;
682 
683  const SCEV *S = SE->getSCEV(Right);
684  if (!SE->isLoopInvariant(S, OuterLoop))
685  return false;
686  }
687 
688  return true;
689 }
690 
691 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
692 // value.
693 static Value *followLCSSA(Value *SV) {
694  PHINode *PHI = dyn_cast<PHINode>(SV);
695  if (!PHI)
696  return SV;
697 
698  if (PHI->getNumIncomingValues() != 1)
699  return SV;
700  return followLCSSA(PHI->getIncomingValue(0));
701 }
702 
703 // Check V's users to see if it is involved in a reduction in L.
705  // Reduction variables cannot be constants.
706  if (isa<Constant>(V))
707  return nullptr;
708 
709  for (Value *User : V->users()) {
710  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
711  if (PHI->getNumIncomingValues() == 1)
712  continue;
715  // Detect floating point reduction only when it can be reordered.
716  if (RD.getExactFPMathInst() != nullptr)
717  return nullptr;
718  return PHI;
719  }
720  return nullptr;
721  }
722  }
723 
724  return nullptr;
725 }
726 
727 bool LoopInterchangeLegality::findInductionAndReductions(
728  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
729  if (!L->getLoopLatch() || !L->getLoopPredecessor())
730  return false;
731  for (PHINode &PHI : L->getHeader()->phis()) {
735  Inductions.push_back(&PHI);
736  else {
737  // PHIs in inner loops need to be part of a reduction in the outer loop,
738  // discovered when checking the PHIs of the outer loop earlier.
739  if (!InnerLoop) {
740  if (!OuterInnerReductions.count(&PHI)) {
741  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
742  "across the outer loop.\n");
743  return false;
744  }
745  } else {
746  assert(PHI.getNumIncomingValues() == 2 &&
747  "Phis in loop header should have exactly 2 incoming values");
748  // Check if we have a PHI node in the outer loop that has a reduction
749  // result from the inner loop as an incoming value.
750  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
751  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
752  if (!InnerRedPhi ||
753  !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
754  LLVM_DEBUG(
755  dbgs()
756  << "Failed to recognize PHI as an induction or reduction.\n");
757  return false;
758  }
759  OuterInnerReductions.insert(&PHI);
760  OuterInnerReductions.insert(InnerRedPhi);
761  }
762  }
763  }
764  return true;
765 }
766 
767 // This function indicates the current limitations in the transform as a result
768 // of which we do not proceed.
769 bool LoopInterchangeLegality::currentLimitations() {
770  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
771 
772  // transform currently expects the loop latches to also be the exiting
773  // blocks.
774  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
775  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
776  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
777  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
778  LLVM_DEBUG(
779  dbgs() << "Loops where the latch is not the exiting block are not"
780  << " supported currently.\n");
781  ORE->emit([&]() {
782  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
783  OuterLoop->getStartLoc(),
784  OuterLoop->getHeader())
785  << "Loops where the latch is not the exiting block cannot be"
786  " interchange currently.";
787  });
788  return true;
789  }
790 
791  SmallVector<PHINode *, 8> Inductions;
792  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
793  LLVM_DEBUG(
794  dbgs() << "Only outer loops with induction or reduction PHI nodes "
795  << "are supported currently.\n");
796  ORE->emit([&]() {
797  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
798  OuterLoop->getStartLoc(),
799  OuterLoop->getHeader())
800  << "Only outer loops with induction or reduction PHI nodes can be"
801  " interchanged currently.";
802  });
803  return true;
804  }
805 
806  Inductions.clear();
807  // For multi-level loop nests, make sure that all phi nodes for inner loops
808  // at all levels can be recognized as a induction or reduction phi. Bail out
809  // if a phi node at a certain nesting level cannot be properly recognized.
810  Loop *CurLevelLoop = OuterLoop;
811  while (!CurLevelLoop->getSubLoops().empty()) {
812  // We already made sure that the loop nest is tightly nested.
813  CurLevelLoop = CurLevelLoop->getSubLoops().front();
814  if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
815  LLVM_DEBUG(
816  dbgs() << "Only inner loops with induction or reduction PHI nodes "
817  << "are supported currently.\n");
818  ORE->emit([&]() {
819  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
820  CurLevelLoop->getStartLoc(),
821  CurLevelLoop->getHeader())
822  << "Only inner loops with induction or reduction PHI nodes can be"
823  " interchange currently.";
824  });
825  return true;
826  }
827  }
828 
829  // TODO: Triangular loops are not handled for now.
830  if (!isLoopStructureUnderstood()) {
831  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
832  ORE->emit([&]() {
833  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
834  InnerLoop->getStartLoc(),
835  InnerLoop->getHeader())
836  << "Inner loop structure not understood currently.";
837  });
838  return true;
839  }
840 
841  return false;
842 }
843 
844 bool LoopInterchangeLegality::findInductions(
845  Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
846  for (PHINode &PHI : L->getHeader()->phis()) {
849  Inductions.push_back(&PHI);
850  }
851  return !Inductions.empty();
852 }
853 
854 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
855 // users are either reduction PHIs or PHIs outside the outer loop (which means
856 // the we are only interested in the final value after the loop).
857 static bool
859  SmallPtrSetImpl<PHINode *> &Reductions) {
860  BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
861  for (PHINode &PHI : InnerExit->phis()) {
862  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
863  if (PHI.getNumIncomingValues() > 1)
864  return false;
865  if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
866  PHINode *PN = dyn_cast<PHINode>(U);
867  return !PN ||
868  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
869  })) {
870  return false;
871  }
872  }
873  return true;
874 }
875 
876 // We currently support LCSSA PHI nodes in the outer loop exit, if their
877 // incoming values do not come from the outer loop latch or if the
878 // outer loop latch has a single predecessor. In that case, the value will
879 // be available if both the inner and outer loop conditions are true, which
880 // will still be true after interchanging. If we have multiple predecessor,
881 // that may not be the case, e.g. because the outer loop latch may be executed
882 // if the inner loop is not executed.
883 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
884  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
885  for (PHINode &PHI : LoopNestExit->phis()) {
886  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
887  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
888  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
889  continue;
890 
891  // The incoming value is defined in the outer loop latch. Currently we
892  // only support that in case the outer loop latch has a single predecessor.
893  // This guarantees that the outer loop latch is executed if and only if
894  // the inner loop is executed (because tightlyNested() guarantees that the
895  // outer loop header only branches to the inner loop or the outer loop
896  // latch).
897  // FIXME: We could weaken this logic and allow multiple predecessors,
898  // if the values are produced outside the loop latch. We would need
899  // additional logic to update the PHI nodes in the exit block as
900  // well.
901  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
902  return false;
903  }
904  }
905  return true;
906 }
907 
908 // In case of multi-level nested loops, it may occur that lcssa phis exist in
909 // the latch of InnerLoop, i.e., when defs of the incoming values are further
910 // inside the loopnest. Sometimes those incoming values are not available
911 // after interchange, since the original inner latch will become the new outer
912 // latch which may have predecessor paths that do not include those incoming
913 // values.
914 // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
915 // multi-level loop nests.
916 static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
917  if (InnerLoop->getSubLoops().empty())
918  return true;
919  // If the original outer latch has only one predecessor, then values defined
920  // further inside the looploop, e.g., in the innermost loop, will be available
921  // at the new outer latch after interchange.
922  if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
923  return true;
924 
925  // The outer latch has more than one predecessors, i.e., the inner
926  // exit and the inner header.
927  // PHI nodes in the inner latch are lcssa phis where the incoming values
928  // are defined further inside the loopnest. Check if those phis are used
929  // in the original inner latch. If that is the case then bail out since
930  // those incoming values may not be available at the new outer latch.
931  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
932  for (PHINode &PHI : InnerLoopLatch->phis()) {
933  for (auto *U : PHI.users()) {
934  Instruction *UI = cast<Instruction>(U);
935  if (InnerLoopLatch == UI->getParent())
936  return false;
937  }
938  }
939  return true;
940 }
941 
942 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
943  unsigned OuterLoopId,
944  CharMatrix &DepMatrix) {
945  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
946  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
947  << " and OuterLoopId = " << OuterLoopId
948  << " due to dependence\n");
949  ORE->emit([&]() {
950  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
951  InnerLoop->getStartLoc(),
952  InnerLoop->getHeader())
953  << "Cannot interchange loops due to dependences.";
954  });
955  return false;
956  }
957  // Check if outer and inner loop contain legal instructions only.
958  for (auto *BB : OuterLoop->blocks())
959  for (Instruction &I : BB->instructionsWithoutDebug())
960  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
961  // readnone functions do not prevent interchanging.
962  if (CI->onlyWritesMemory())
963  continue;
964  LLVM_DEBUG(
965  dbgs() << "Loops with call instructions cannot be interchanged "
966  << "safely.");
967  ORE->emit([&]() {
968  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
969  CI->getDebugLoc(),
970  CI->getParent())
971  << "Cannot interchange loops due to call instruction.";
972  });
973 
974  return false;
975  }
976 
977  if (!findInductions(InnerLoop, InnerLoopInductions)) {
978  LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n");
979  return false;
980  }
981 
982  if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
983  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
984  ORE->emit([&]() {
985  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
986  InnerLoop->getStartLoc(),
987  InnerLoop->getHeader())
988  << "Cannot interchange loops because unsupported PHI nodes found "
989  "in inner loop latch.";
990  });
991  return false;
992  }
993 
994  // TODO: The loops could not be interchanged due to current limitations in the
995  // transform module.
996  if (currentLimitations()) {
997  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
998  return false;
999  }
1000 
1001  // Check if the loops are tightly nested.
1002  if (!tightlyNested(OuterLoop, InnerLoop)) {
1003  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1004  ORE->emit([&]() {
1005  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1006  InnerLoop->getStartLoc(),
1007  InnerLoop->getHeader())
1008  << "Cannot interchange loops because they are not tightly "
1009  "nested.";
1010  });
1011  return false;
1012  }
1013 
1014  if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1015  OuterInnerReductions)) {
1016  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1017  ORE->emit([&]() {
1018  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1019  InnerLoop->getStartLoc(),
1020  InnerLoop->getHeader())
1021  << "Found unsupported PHI node in loop exit.";
1022  });
1023  return false;
1024  }
1025 
1026  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1027  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1028  ORE->emit([&]() {
1029  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1030  OuterLoop->getStartLoc(),
1031  OuterLoop->getHeader())
1032  << "Found unsupported PHI node in loop exit.";
1033  });
1034  return false;
1035  }
1036 
1037  return true;
1038 }
1039 
1040 int LoopInterchangeProfitability::getInstrOrderCost() {
1041  unsigned GoodOrder, BadOrder;
1042  BadOrder = GoodOrder = 0;
1043  for (BasicBlock *BB : InnerLoop->blocks()) {
1044  for (Instruction &Ins : *BB) {
1045  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1046  unsigned NumOp = GEP->getNumOperands();
1047  bool FoundInnerInduction = false;
1048  bool FoundOuterInduction = false;
1049  for (unsigned i = 0; i < NumOp; ++i) {
1050  // Skip operands that are not SCEV-able.
1051  if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1052  continue;
1053 
1054  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1055  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1056  if (!AR)
1057  continue;
1058 
1059  // If we find the inner induction after an outer induction e.g.
1060  // for(int i=0;i<N;i++)
1061  // for(int j=0;j<N;j++)
1062  // A[i][j] = A[i-1][j-1]+k;
1063  // then it is a good order.
1064  if (AR->getLoop() == InnerLoop) {
1065  // We found an InnerLoop induction after OuterLoop induction. It is
1066  // a good order.
1067  FoundInnerInduction = true;
1068  if (FoundOuterInduction) {
1069  GoodOrder++;
1070  break;
1071  }
1072  }
1073  // If we find the outer induction after an inner induction e.g.
1074  // for(int i=0;i<N;i++)
1075  // for(int j=0;j<N;j++)
1076  // A[j][i] = A[j-1][i-1]+k;
1077  // then it is a bad order.
1078  if (AR->getLoop() == OuterLoop) {
1079  // We found an OuterLoop induction after InnerLoop induction. It is
1080  // a bad order.
1081  FoundOuterInduction = true;
1082  if (FoundInnerInduction) {
1083  BadOrder++;
1084  break;
1085  }
1086  }
1087  }
1088  }
1089  }
1090  }
1091  return GoodOrder - BadOrder;
1092 }
1093 
1094 static bool isProfitableForVectorization(unsigned InnerLoopId,
1095  unsigned OuterLoopId,
1096  CharMatrix &DepMatrix) {
1097  // TODO: Improve this heuristic to catch more cases.
1098  // If the inner loop is loop independent or doesn't carry any dependency it is
1099  // profitable to move this to outer position.
1100  for (auto &Row : DepMatrix) {
1101  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1102  return false;
1103  // TODO: We need to improve this heuristic.
1104  if (Row[OuterLoopId] != '=')
1105  return false;
1106  }
1107  // If outer loop has dependence and inner loop is loop independent then it is
1108  // profitable to interchange to enable parallelism.
1109  // If there are no dependences, interchanging will not improve anything.
1110  return !DepMatrix.empty();
1111 }
1112 
1113 bool LoopInterchangeProfitability::isProfitable(
1114  const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1115  unsigned OuterLoopId, CharMatrix &DepMatrix,
1116  const DenseMap<const Loop *, unsigned> &CostMap) {
1117  // TODO: Remove the legacy cost model.
1118 
1119  // This is the new cost model returned from loop cache analysis.
1120  // A smaller index means the loop should be placed an outer loop, and vice
1121  // versa.
1122  if (CostMap.find(InnerLoop) != CostMap.end() &&
1123  CostMap.find(OuterLoop) != CostMap.end()) {
1124  unsigned InnerIndex = 0, OuterIndex = 0;
1125  InnerIndex = CostMap.find(InnerLoop)->second;
1126  OuterIndex = CostMap.find(OuterLoop)->second;
1127  LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1128  << ", OuterIndex = " << OuterIndex << "\n");
1129  if (InnerIndex < OuterIndex)
1130  return true;
1131  } else {
1132  // Legacy cost model: this is rough cost estimation algorithm. It counts the
1133  // good and bad order of induction variables in the instruction and allows
1134  // reordering if number of bad orders is more than good.
1135  int Cost = getInstrOrderCost();
1136  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1137  if (Cost < -LoopInterchangeCostThreshold)
1138  return true;
1139  }
1140 
1141  // It is not profitable as per current cache profitability model. But check if
1142  // we can move this loop outside to improve parallelism.
1143  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1144  return true;
1145 
1146  ORE->emit([&]() {
1147  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1148  InnerLoop->getStartLoc(),
1149  InnerLoop->getHeader())
1150  << "Interchanging loops is too costly and it does not improve "
1151  "parallelism.";
1152  });
1153  return false;
1154 }
1155 
1156 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1157  Loop *InnerLoop) {
1158  for (Loop *L : *OuterLoop)
1159  if (L == InnerLoop) {
1160  OuterLoop->removeChildLoop(L);
1161  return;
1162  }
1163  llvm_unreachable("Couldn't find loop");
1164 }
1165 
1166 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1167 /// new inner and outer loop after interchanging: NewInner is the original
1168 /// outer loop and NewOuter is the original inner loop.
1169 ///
1170 /// Before interchanging, we have the following structure
1171 /// Outer preheader
1172 // Outer header
1173 // Inner preheader
1174 // Inner header
1175 // Inner body
1176 // Inner latch
1177 // outer bbs
1178 // Outer latch
1179 //
1180 // After interchanging:
1181 // Inner preheader
1182 // Inner header
1183 // Outer preheader
1184 // Outer header
1185 // Inner body
1186 // outer bbs
1187 // Outer latch
1188 // Inner latch
1189 void LoopInterchangeTransform::restructureLoops(
1190  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1191  BasicBlock *OrigOuterPreHeader) {
1192  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1193  // The original inner loop preheader moves from the new inner loop to
1194  // the parent loop, if there is one.
1195  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1196  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1197 
1198  // Switch the loop levels.
1199  if (OuterLoopParent) {
1200  // Remove the loop from its parent loop.
1201  removeChildLoop(OuterLoopParent, NewInner);
1202  removeChildLoop(NewInner, NewOuter);
1203  OuterLoopParent->addChildLoop(NewOuter);
1204  } else {
1205  removeChildLoop(NewInner, NewOuter);
1206  LI->changeTopLevelLoop(NewInner, NewOuter);
1207  }
1208  while (!NewOuter->isInnermost())
1209  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1210  NewOuter->addChildLoop(NewInner);
1211 
1212  // BBs from the original inner loop.
1213  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1214 
1215  // Add BBs from the original outer loop to the original inner loop (excluding
1216  // BBs already in inner loop)
1217  for (BasicBlock *BB : NewInner->blocks())
1218  if (LI->getLoopFor(BB) == NewInner)
1219  NewOuter->addBlockEntry(BB);
1220 
1221  // Now remove inner loop header and latch from the new inner loop and move
1222  // other BBs (the loop body) to the new inner loop.
1223  BasicBlock *OuterHeader = NewOuter->getHeader();
1224  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1225  for (BasicBlock *BB : OrigInnerBBs) {
1226  // Nothing will change for BBs in child loops.
1227  if (LI->getLoopFor(BB) != NewOuter)
1228  continue;
1229  // Remove the new outer loop header and latch from the new inner loop.
1230  if (BB == OuterHeader || BB == OuterLatch)
1231  NewInner->removeBlockFromLoop(BB);
1232  else
1233  LI->changeLoopFor(BB, NewInner);
1234  }
1235 
1236  // The preheader of the original outer loop becomes part of the new
1237  // outer loop.
1238  NewOuter->addBlockEntry(OrigOuterPreHeader);
1239  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1240 
1241  // Tell SE that we move the loops around.
1242  SE->forgetLoop(NewOuter);
1243  SE->forgetLoop(NewInner);
1244 }
1245 
1247  bool Transformed = false;
1248 
1249  if (InnerLoop->getSubLoops().empty()) {
1250  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1251  LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1252  auto &InductionPHIs = LIL.getInnerLoopInductions();
1253  if (InductionPHIs.empty()) {
1254  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1255  return false;
1256  }
1257 
1258  SmallVector<Instruction *, 8> InnerIndexVarList;
1259  for (PHINode *CurInductionPHI : InductionPHIs) {
1260  if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1261  InnerIndexVarList.push_back(
1262  dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1263  else
1264  InnerIndexVarList.push_back(
1265  dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1266  }
1267 
1268  // Create a new latch block for the inner loop. We split at the
1269  // current latch's terminator and then move the condition and all
1270  // operands that are not either loop-invariant or the induction PHI into the
1271  // new latch block.
1272  BasicBlock *NewLatch =
1273  SplitBlock(InnerLoop->getLoopLatch(),
1274  InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1275 
1277  unsigned i = 0;
1278  auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1279  for (; i < WorkList.size(); i++) {
1280  // Duplicate instruction and move it the new latch. Update uses that
1281  // have been moved.
1282  Instruction *NewI = WorkList[i]->clone();
1283  NewI->insertBefore(NewLatch->getFirstNonPHI());
1284  assert(!NewI->mayHaveSideEffects() &&
1285  "Moving instructions with side-effects may change behavior of "
1286  "the loop nest!");
1287  for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1288  Instruction *UserI = cast<Instruction>(U.getUser());
1289  if (!InnerLoop->contains(UserI->getParent()) ||
1290  UserI->getParent() == NewLatch ||
1291  llvm::is_contained(InductionPHIs, UserI))
1292  U.set(NewI);
1293  }
1294  // Add operands of moved instruction to the worklist, except if they are
1295  // outside the inner loop or are the induction PHI.
1296  for (Value *Op : WorkList[i]->operands()) {
1297  Instruction *OpI = dyn_cast<Instruction>(Op);
1298  if (!OpI ||
1299  this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1300  llvm::is_contained(InductionPHIs, OpI))
1301  continue;
1302  WorkList.insert(OpI);
1303  }
1304  }
1305  };
1306 
1307  // FIXME: Should we interchange when we have a constant condition?
1308  Instruction *CondI = dyn_cast<Instruction>(
1309  cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1310  ->getCondition());
1311  if (CondI)
1312  WorkList.insert(CondI);
1313  MoveInstructions();
1314  for (Instruction *InnerIndexVar : InnerIndexVarList)
1315  WorkList.insert(cast<Instruction>(InnerIndexVar));
1316  MoveInstructions();
1317  }
1318 
1319  // Ensure the inner loop phi nodes have a separate basic block.
1320  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1321  if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
1322  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1323  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1324  }
1325 
1326  // Instructions in the original inner loop preheader may depend on values
1327  // defined in the outer loop header. Move them there, because the original
1328  // inner loop preheader will become the entry into the interchanged loop nest.
1329  // Currently we move all instructions and rely on LICM to move invariant
1330  // instructions outside the loop nest.
1331  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1332  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1333  if (InnerLoopPreHeader != OuterLoopHeader) {
1334  SmallPtrSet<Instruction *, 4> NeedsMoving;
1335  for (Instruction &I :
1336  make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1337  std::prev(InnerLoopPreHeader->end()))))
1338  I.moveBefore(OuterLoopHeader->getTerminator());
1339  }
1340 
1341  Transformed |= adjustLoopLinks();
1342  if (!Transformed) {
1343  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1344  return false;
1345  }
1346 
1347  return true;
1348 }
1349 
1350 /// \brief Move all instructions except the terminator from FromBB right before
1351 /// InsertBefore
1352 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1353  auto &ToList = InsertBefore->getParent()->getInstList();
1354  auto &FromList = FromBB->getInstList();
1355 
1356  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1357  FromBB->getTerminator()->getIterator());
1358 }
1359 
1360 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1361 static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1362  // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1363  // from BB1 afterwards.
1364  auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1365  SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1366  for (Instruction *I : TempInstrs)
1367  I->removeFromParent();
1368 
1369  // Move instructions from BB2 to BB1.
1370  moveBBContents(BB2, BB1->getTerminator());
1371 
1372  // Move instructions from TempInstrs to BB2.
1373  for (Instruction *I : TempInstrs)
1374  I->insertBefore(BB2->getTerminator());
1375 }
1376 
1377 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1378 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1379 // \p OldBB is exactly once in BI's successor list.
1380 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1381  BasicBlock *NewBB,
1382  std::vector<DominatorTree::UpdateType> &DTUpdates,
1383  bool MustUpdateOnce = true) {
1384  assert((!MustUpdateOnce ||
1386  [OldBB](BasicBlock *BB) {
1387  return BB == OldBB;
1388  }) == 1) && "BI must jump to OldBB exactly once.");
1389  bool Changed = false;
1390  for (Use &Op : BI->operands())
1391  if (Op == OldBB) {
1392  Op.set(NewBB);
1393  Changed = true;
1394  }
1395 
1396  if (Changed) {
1397  DTUpdates.push_back(
1399  DTUpdates.push_back(
1401  }
1402  assert(Changed && "Expected a successor to be updated");
1403 }
1404 
1405 // Move Lcssa PHIs to the right place.
1406 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1407  BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1408  BasicBlock *OuterLatch, BasicBlock *OuterExit,
1409  Loop *InnerLoop, LoopInfo *LI) {
1410 
1411  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1412  // defined either in the header or latch. Those blocks will become header and
1413  // latch of the new outer loop, and the only possible users can PHI nodes
1414  // in the exit block of the loop nest or the outer loop header (reduction
1415  // PHIs, in that case, the incoming value must be defined in the inner loop
1416  // header). We can just substitute the user with the incoming value and remove
1417  // the PHI.
1418  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1419  assert(P.getNumIncomingValues() == 1 &&
1420  "Only loops with a single exit are supported!");
1421 
1422  // Incoming values are guaranteed be instructions currently.
1423  auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1424  // In case of multi-level nested loops, follow LCSSA to find the incoming
1425  // value defined from the innermost loop.
1426  auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1427  // Skip phis with incoming values from the inner loop body, excluding the
1428  // header and latch.
1429  if (IncIInnerMost->getParent() != InnerLatch &&
1430  IncIInnerMost->getParent() != InnerHeader)
1431  continue;
1432 
1433  assert(all_of(P.users(),
1434  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1435  return (cast<PHINode>(U)->getParent() == OuterHeader &&
1436  IncI->getParent() == InnerHeader) ||
1437  cast<PHINode>(U)->getParent() == OuterExit;
1438  }) &&
1439  "Can only replace phis iff the uses are in the loop nest exit or "
1440  "the incoming value is defined in the inner header (it will "
1441  "dominate all loop blocks after interchanging)");
1442  P.replaceAllUsesWith(IncI);
1443  P.eraseFromParent();
1444  }
1445 
1446  SmallVector<PHINode *, 8> LcssaInnerExit;
1447  for (PHINode &P : InnerExit->phis())
1448  LcssaInnerExit.push_back(&P);
1449 
1450  SmallVector<PHINode *, 8> LcssaInnerLatch;
1451  for (PHINode &P : InnerLatch->phis())
1452  LcssaInnerLatch.push_back(&P);
1453 
1454  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1455  // If a PHI node has users outside of InnerExit, it has a use outside the
1456  // interchanged loop and we have to preserve it. We move these to
1457  // InnerLatch, which will become the new exit block for the innermost
1458  // loop after interchanging.
1459  for (PHINode *P : LcssaInnerExit)
1460  P->moveBefore(InnerLatch->getFirstNonPHI());
1461 
1462  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1463  // and we have to move them to the new inner latch.
1464  for (PHINode *P : LcssaInnerLatch)
1465  P->moveBefore(InnerExit->getFirstNonPHI());
1466 
1467  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1468  // incoming values defined in the outer loop, we have to add a new PHI
1469  // in the inner loop latch, which became the exit block of the outer loop,
1470  // after interchanging.
1471  if (OuterExit) {
1472  for (PHINode &P : OuterExit->phis()) {
1473  if (P.getNumIncomingValues() != 1)
1474  continue;
1475  // Skip Phis with incoming values defined in the inner loop. Those should
1476  // already have been updated.
1477  auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1478  if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1479  continue;
1480 
1481  PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1482  NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1483  NewPhi->setIncomingBlock(0, OuterLatch);
1484  // We might have incoming edges from other BBs, i.e., the original outer
1485  // header.
1486  for (auto *Pred : predecessors(InnerLatch)) {
1487  if (Pred == OuterLatch)
1488  continue;
1489  NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1490  }
1491  NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1492  P.setIncomingValue(0, NewPhi);
1493  }
1494  }
1495 
1496  // Now adjust the incoming blocks for the LCSSA PHIs.
1497  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1498  // with the new latch.
1499  InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1500 }
1501 
1502 bool LoopInterchangeTransform::adjustLoopBranches() {
1503  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1504  std::vector<DominatorTree::UpdateType> DTUpdates;
1505 
1506  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1507  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1508 
1509  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1510  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1511  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1512  // Ensure that both preheaders do not contain PHI nodes and have single
1513  // predecessors. This allows us to move them easily. We use
1514  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1515  // preheaders do not satisfy those conditions.
1516  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1517  !OuterLoopPreHeader->getUniquePredecessor())
1518  OuterLoopPreHeader =
1519  InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1520  if (InnerLoopPreHeader == OuterLoop->getHeader())
1521  InnerLoopPreHeader =
1522  InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1523 
1524  // Adjust the loop preheader
1525  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1526  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1527  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1528  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1529  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1530  BasicBlock *InnerLoopLatchPredecessor =
1531  InnerLoopLatch->getUniquePredecessor();
1532  BasicBlock *InnerLoopLatchSuccessor;
1533  BasicBlock *OuterLoopLatchSuccessor;
1534 
1535  BranchInst *OuterLoopLatchBI =
1536  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1537  BranchInst *InnerLoopLatchBI =
1538  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1539  BranchInst *OuterLoopHeaderBI =
1540  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1541  BranchInst *InnerLoopHeaderBI =
1542  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1543 
1544  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1545  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1546  !InnerLoopHeaderBI)
1547  return false;
1548 
1549  BranchInst *InnerLoopLatchPredecessorBI =
1550  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1551  BranchInst *OuterLoopPredecessorBI =
1552  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1553 
1554  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1555  return false;
1556  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1557  if (!InnerLoopHeaderSuccessor)
1558  return false;
1559 
1560  // Adjust Loop Preheader and headers.
1561  // The branches in the outer loop predecessor and the outer loop header can
1562  // be unconditional branches or conditional branches with duplicates. Consider
1563  // this when updating the successors.
1564  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1565  InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1566  // The outer loop header might or might not branch to the outer latch.
1567  // We are guaranteed to branch to the inner loop preheader.
1568  if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1569  // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1570  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1571  DTUpdates,
1572  /*MustUpdateOnce=*/false);
1573  }
1574  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1575  InnerLoopHeaderSuccessor, DTUpdates,
1576  /*MustUpdateOnce=*/false);
1577 
1578  // Adjust reduction PHI's now that the incoming block has changed.
1579  InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1580  OuterLoopHeader);
1581 
1582  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1583  OuterLoopPreHeader, DTUpdates);
1584 
1585  // -------------Adjust loop latches-----------
1586  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1587  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1588  else
1589  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1590 
1591  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1592  InnerLoopLatchSuccessor, DTUpdates);
1593 
1594  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1595  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1596  else
1597  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1598 
1599  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1600  OuterLoopLatchSuccessor, DTUpdates);
1601  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1602  DTUpdates);
1603 
1604  DT->applyUpdates(DTUpdates);
1605  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1606  OuterLoopPreHeader);
1607 
1608  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1609  OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1610  InnerLoop, LI);
1611  // For PHIs in the exit block of the outer loop, outer's latch has been
1612  // replaced by Inners'.
1613  OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1614 
1615  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1616  // Now update the reduction PHIs in the inner and outer loop headers.
1617  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1618  for (PHINode &PHI : InnerLoopHeader->phis())
1619  if (OuterInnerReductions.contains(&PHI))
1620  InnerLoopPHIs.push_back(&PHI);
1621 
1622  for (PHINode &PHI : OuterLoopHeader->phis())
1623  if (OuterInnerReductions.contains(&PHI))
1624  OuterLoopPHIs.push_back(&PHI);
1625 
1626  // Now move the remaining reduction PHIs from outer to inner loop header and
1627  // vice versa. The PHI nodes must be part of a reduction across the inner and
1628  // outer loop and all the remains to do is and updating the incoming blocks.
1629  for (PHINode *PHI : OuterLoopPHIs) {
1630  LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1631  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1632  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1633  }
1634  for (PHINode *PHI : InnerLoopPHIs) {
1635  LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1636  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1637  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1638  }
1639 
1640  // Update the incoming blocks for moved PHI nodes.
1641  OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1642  OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1643  InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1644  InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1645 
1646  // Values defined in the outer loop header could be used in the inner loop
1647  // latch. In that case, we need to create LCSSA phis for them, because after
1648  // interchanging they will be defined in the new inner loop and used in the
1649  // new outer loop.
1650  IRBuilder<> Builder(OuterLoopHeader->getContext());
1651  SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1652  for (Instruction &I :
1653  make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1654  MayNeedLCSSAPhis.push_back(&I);
1655  formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1656 
1657  return true;
1658 }
1659 
1660 bool LoopInterchangeTransform::adjustLoopLinks() {
1661  // Adjust all branches in the inner and outer loop.
1662  bool Changed = adjustLoopBranches();
1663  if (Changed) {
1664  // We have interchanged the preheaders so we need to interchange the data in
1665  // the preheaders as well. This is because the content of the inner
1666  // preheader was previously executed inside the outer loop.
1667  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1668  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1669  swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1670  }
1671  return Changed;
1672 }
1673 
1674 namespace {
1675 /// Main LoopInterchange Pass.
1676 struct LoopInterchangeLegacyPass : public LoopPass {
1677  static char ID;
1678 
1679  LoopInterchangeLegacyPass() : LoopPass(ID) {
1681  }
1682 
1683  void getAnalysisUsage(AnalysisUsage &AU) const override {
1686 
1688  }
1689 
1690  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1691  if (skipLoop(L))
1692  return false;
1693 
1694  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1695  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1696  auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1697  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1698  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1699  std::unique_ptr<CacheCost> CC = nullptr;
1700  return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
1701  }
1702 };
1703 } // namespace
1704 
1706 
1707 INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
1708  "Interchanges loops for cache reuse", false, false)
1712 
1713 INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
1714  "Interchanges loops for cache reuse", false, false)
1715 
1717  return new LoopInterchangeLegacyPass();
1718 }
1719 
1721  LoopAnalysisManager &AM,
1723  LPMUpdater &U) {
1724  Function &F = *LN.getParent();
1725 
1726  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1727  std::unique_ptr<CacheCost> CC =
1730  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1731  return PreservedAnalyses::all();
1732  U.markLoopNestChanged(true);
1734 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::initializeLoopInterchangeLegacyPassPass
void initializeLoopInterchangeLegacyPassPass(PassRegistry &)
isLegalToInterChangeLoops
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Definition: LoopInterchange.cpp:201
moveBBContents
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
Definition: LoopInterchange.cpp:1352
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:308
followLCSSA
static Value * followLCSSA(Value *SV)
Definition: LoopInterchange.cpp:693
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:734
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:158
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::BranchInst::successors
iterator_range< succ_op_iterator > successors()
Definition: Instructions.h:3244
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:107
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2785
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::User::operands
op_range operands()
Definition: User.h:242
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::Dependence::DVEntry::LE
@ LE
Definition: DependenceAnalysis.h:88
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:1001
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::cfg::UpdateKind::Insert
@ Insert
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::Dependence::DVEntry::GE
@ GE
Definition: DependenceAnalysis.h:91
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopInterchange.cpp:56
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector< Loop *, 8 >
Statistic.h
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:211
ErrorHandling.h
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1030
moveLCSSAPhis
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
Definition: LoopInterchange.cpp:1406
OptimizationRemarkEmitter.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:87
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
Right
Vector Shift Left Right
Definition: README_P9.txt:118
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
ScalarEvolution.h
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::cfg::UpdateKind::Delete
@ Delete
interchange
loop interchange
Definition: LoopInterchange.cpp:1713
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:410
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:171
llvm::LPMUpdater::markLoopNestChanged
void markLoopNestChanged(bool Changed)
Loopnest passes should use this method to indicate if the loopnest has been modified.
Definition: LoopPassManager.h:365
findInnerReductionPhi
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
Definition: LoopInterchange.cpp:704
llvm::SmallPtrSet< PHINode *, 4 >
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::codeview::VFTableSlotKind::Outer
@ Outer
STLExtras.h
populateWorklist
static void populateWorklist(Loop &L, LoopVector &LoopList)
Definition: LoopInterchange.cpp:220
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2798
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1877
loops
loops
Definition: LoopInfo.cpp:1177
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:323
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AlignStyle::Left
@ Left
Instruction.h
CommandLine.h
llvm::Dependence::DVEntry::LT
@ LT
Definition: DependenceAnalysis.h:86
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1709
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:306
llvm::CacheCost::getCacheCost
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
Definition: LoopCacheAnalysis.cpp:575
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::createLoopInterchangePass
Pass * createLoopInterchangePass()
Definition: LoopInterchange.cpp:1716
Constants.h
areInnerLoopExitPHIsSupported
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
Definition: LoopInterchange.cpp:858
InlinePriorityMode::Cost
@ Cost
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
InstrTypes.h
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition: IVDescriptors.h:213
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:412
MaxLoopNestDepth
static const unsigned MaxLoopNestDepth
Definition: LoopInterchange.cpp:77
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
swapBBContents
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
Definition: LoopInterchange.cpp:1361
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
false
Definition: StackSlotColoring.cpp:141
llvm::Dependence::DVEntry::GT
@ GT
Definition: DependenceAnalysis.h:89
llvm::Instruction
Definition: Instruction.h:42
isProfitableForVectorization
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
Definition: LoopInterchange.cpp:1094
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:364
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:76
LoopInterchangeCostThreshold
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchangeLegacyPass
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
areInnerLoopLatchPHIsSupported
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:916
llvm::LoopBase::removeBlockFromLoop
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
Definition: LoopInfo.h:477
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:723
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3215
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4442
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2809
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:710
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:118
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:291
llvm::Dependence::DVEntry::EQ
@ EQ
Definition: DependenceAnalysis.h:87
llvm::LoopPass
Definition: LoopPass.h:28
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2834
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2849
llvm::LoopNest::skipEmptyBlockUntil
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
Definition: LoopNestAnalysis.cpp:288
llvm::DenseMap
Definition: DenseMap.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1843
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1836
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
LoopNestAnalysis.h
LoopPassManager.h
llvm::LoopNest::getOutermostLoop
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Definition: LoopNestAnalysis.h:80
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
LoopInterchange.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:892
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:421
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1884
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:293
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:176
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
populateDependencyMatrix
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE)
Definition: LoopInterchange.cpp:89
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13637
areOuterLoopExitPHIsSupported
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:883
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
Definition: Instruction.cpp:725
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1518
llvm::BasicBlock::replacePhiUsesWith
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:471
llvm::map_range
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:435
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8372
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
std
Definition: BitVector.h:851
interChangeDependencies
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
Definition: LoopInterchange.cpp:179
isLexicographicallyPositive
static bool isLexicographicallyPositive(std::vector< char > &DV)
Definition: LoopInterchange.cpp:189
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
reuse
loop Interchanges loops for cache reuse
Definition: LoopInterchange.cpp:1714
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Direction
Loop::LoopBounds::Direction Direction
Definition: LoopInfo.cpp:230
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4328
llvm::LoopBase::addBlockEntry
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:440
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:267
updateSuccessor
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
Definition: LoopInterchange.cpp:1380
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:117
ScalarEvolutionExpressions.h
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:373
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3590
llvm::formLCSSAForInstructions
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:78
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
SmallVector.h
User.h
llvm::LoopInfoBase::changeTopLevelLoop
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
Definition: LoopInfo.h:1040
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::PHINode
Definition: Instructions.h:2699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
LoopCacheAnalysis.h
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::LoopInterchangePass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopInterchange.cpp:1720
llvm::SmallPtrSetImpl< PHINode * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:825
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
MaxMemInstrCount
static const unsigned MaxMemInstrCount
Definition: LoopInterchange.cpp:74
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:8242
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:918
Value.h
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3213
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3227
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43