LLVM  9.0.0svn
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/InstrTypes.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Debug.h"
42 #include "llvm/Transforms/Scalar.h"
43 #include "llvm/Transforms/Utils.h"
46 #include <cassert>
47 #include <utility>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "loop-interchange"
53 
54 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
55 
57  "loop-interchange-threshold", cl::init(0), cl::Hidden,
58  cl::desc("Interchange if you gain more than this number"));
59 
60 namespace {
61 
62 using LoopVector = SmallVector<Loop *, 8>;
63 
64 // TODO: Check if we can use a sparse matrix here.
65 using CharMatrix = std::vector<std::vector<char>>;
66 
67 } // end anonymous namespace
68 
69 // Maximum number of dependencies that can be handled in the dependency matrix.
70 static const unsigned MaxMemInstrCount = 100;
71 
72 // Maximum loop depth supported.
73 static const unsigned MaxLoopNestDepth = 10;
74 
75 #ifdef DUMP_DEP_MATRICIES
76 static void printDepMatrix(CharMatrix &DepMatrix) {
77  for (auto &Row : DepMatrix) {
78  for (auto D : Row)
79  LLVM_DEBUG(dbgs() << D << " ");
80  LLVM_DEBUG(dbgs() << "\n");
81  }
82 }
83 #endif
84 
85 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
86  Loop *L, DependenceInfo *DI) {
87  using ValueVector = SmallVector<Value *, 16>;
88 
89  ValueVector MemInstr;
90 
91  // For each block.
92  for (BasicBlock *BB : L->blocks()) {
93  // Scan the BB and collect legal loads and stores.
94  for (Instruction &I : *BB) {
95  if (!isa<Instruction>(I))
96  return false;
97  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
98  if (!Ld->isSimple())
99  return false;
100  MemInstr.push_back(&I);
101  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
102  if (!St->isSimple())
103  return false;
104  MemInstr.push_back(&I);
105  }
106  }
107  }
108 
109  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
110  << " Loads and Stores to analyze\n");
111 
112  ValueVector::iterator I, IE, J, JE;
113 
114  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
115  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
116  std::vector<char> Dep;
117  Instruction *Src = cast<Instruction>(*I);
118  Instruction *Dst = cast<Instruction>(*J);
119  if (Src == Dst)
120  continue;
121  // Ignore Input dependencies.
122  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
123  continue;
124  // Track Output, Flow, and Anti dependencies.
125  if (auto D = DI->depends(Src, Dst, true)) {
126  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
127  LLVM_DEBUG(StringRef DepType =
128  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
129  dbgs() << "Found " << DepType
130  << " dependency between Src and Dst\n"
131  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
132  unsigned Levels = D->getLevels();
133  char Direction;
134  for (unsigned II = 1; II <= Levels; ++II) {
135  const SCEV *Distance = D->getDistance(II);
136  const SCEVConstant *SCEVConst =
137  dyn_cast_or_null<SCEVConstant>(Distance);
138  if (SCEVConst) {
139  const ConstantInt *CI = SCEVConst->getValue();
140  if (CI->isNegative())
141  Direction = '<';
142  else if (CI->isZero())
143  Direction = '=';
144  else
145  Direction = '>';
146  Dep.push_back(Direction);
147  } else if (D->isScalar(II)) {
148  Direction = 'S';
149  Dep.push_back(Direction);
150  } else {
151  unsigned Dir = D->getDirection(II);
152  if (Dir == Dependence::DVEntry::LT ||
154  Direction = '<';
155  else if (Dir == Dependence::DVEntry::GT ||
157  Direction = '>';
158  else if (Dir == Dependence::DVEntry::EQ)
159  Direction = '=';
160  else
161  Direction = '*';
162  Dep.push_back(Direction);
163  }
164  }
165  while (Dep.size() != Level) {
166  Dep.push_back('I');
167  }
168 
169  DepMatrix.push_back(Dep);
170  if (DepMatrix.size() > MaxMemInstrCount) {
171  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
172  << " dependencies inside loop\n");
173  return false;
174  }
175  }
176  }
177  }
178 
179  return true;
180 }
181 
182 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
183 // matrix by exchanging the two columns.
184 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
185  unsigned ToIndx) {
186  unsigned numRows = DepMatrix.size();
187  for (unsigned i = 0; i < numRows; ++i) {
188  char TmpVal = DepMatrix[i][ToIndx];
189  DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
190  DepMatrix[i][FromIndx] = TmpVal;
191  }
192 }
193 
194 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
195 // '>'
196 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
197  unsigned Column) {
198  for (unsigned i = 0; i <= Column; ++i) {
199  if (DepMatrix[Row][i] == '<')
200  return false;
201  if (DepMatrix[Row][i] == '>')
202  return true;
203  }
204  // All dependencies were '=','S' or 'I'
205  return false;
206 }
207 
208 // Checks if no dependence exist in the dependency matrix in Row before Column.
209 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
210  unsigned Column) {
211  for (unsigned i = 0; i < Column; ++i) {
212  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
213  DepMatrix[Row][i] != 'I')
214  return false;
215  }
216  return true;
217 }
218 
219 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
220  unsigned OuterLoopId, char InnerDep,
221  char OuterDep) {
222  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
223  return false;
224 
225  if (InnerDep == OuterDep)
226  return true;
227 
228  // It is legal to interchange if and only if after interchange no row has a
229  // '>' direction as the leftmost non-'='.
230 
231  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
232  return true;
233 
234  if (InnerDep == '<')
235  return true;
236 
237  if (InnerDep == '>') {
238  // If OuterLoopId represents outermost loop then interchanging will make the
239  // 1st dependency as '>'
240  if (OuterLoopId == 0)
241  return false;
242 
243  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
244  // interchanging will result in this row having an outermost non '='
245  // dependency of '>'
246  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
247  return true;
248  }
249 
250  return false;
251 }
252 
253 // Checks if it is legal to interchange 2 loops.
254 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
255 // if the direction matrix, after the same permutation is applied to its
256 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
257 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258  unsigned InnerLoopId,
259  unsigned OuterLoopId) {
260  unsigned NumRows = DepMatrix.size();
261  // For each row check if it is valid to interchange.
262  for (unsigned Row = 0; Row < NumRows; ++Row) {
263  char InnerDep = DepMatrix[Row][InnerLoopId];
264  char OuterDep = DepMatrix[Row][OuterLoopId];
265  if (InnerDep == '*' || OuterDep == '*')
266  return false;
267  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
268  return false;
269  }
270  return true;
271 }
272 
273 static LoopVector populateWorklist(Loop &L) {
274  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
275  << L.getHeader()->getParent()->getName() << " Loop: %"
276  << L.getHeader()->getName() << '\n');
277  LoopVector LoopList;
278  Loop *CurrentLoop = &L;
279  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280  while (!Vec->empty()) {
281  // The current loop has multiple subloops in it hence it is not tightly
282  // nested.
283  // Discard all loops above it added into Worklist.
284  if (Vec->size() != 1)
285  return {};
286 
287  LoopList.push_back(CurrentLoop);
288  CurrentLoop = Vec->front();
289  Vec = &CurrentLoop->getSubLoops();
290  }
291  LoopList.push_back(CurrentLoop);
292  return LoopList;
293 }
294 
296  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
297  if (InnerIndexVar)
298  return InnerIndexVar;
299  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
300  return nullptr;
301  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
302  PHINode *PhiVar = cast<PHINode>(I);
303  Type *PhiTy = PhiVar->getType();
304  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
305  !PhiTy->isPointerTy())
306  return nullptr;
307  const SCEVAddRecExpr *AddRec =
308  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
309  if (!AddRec || !AddRec->isAffine())
310  continue;
311  const SCEV *Step = AddRec->getStepRecurrence(*SE);
312  if (!isa<SCEVConstant>(Step))
313  continue;
314  // Found the induction variable.
315  // FIXME: Handle loops with more than one induction variable. Note that,
316  // currently, legality makes sure we have only one induction variable.
317  return PhiVar;
318  }
319  return nullptr;
320 }
321 
322 namespace {
323 
324 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
325 class LoopInterchangeLegality {
326 public:
327  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
329  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
330 
331  /// Check if the loops can be interchanged.
332  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
333  CharMatrix &DepMatrix);
334 
335  /// Check if the loop structure is understood. We do not handle triangular
336  /// loops for now.
337  bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
338 
339  bool currentLimitations();
340 
341  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
342  return OuterInnerReductions;
343  }
344 
345 private:
346  bool tightlyNested(Loop *Outer, Loop *Inner);
347  bool containsUnsafeInstructions(BasicBlock *BB);
348 
349  /// Discover induction and reduction PHIs in the header of \p L. Induction
350  /// PHIs are added to \p Inductions, reductions are added to
351  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
352  /// to be passed as \p InnerLoop.
353  bool findInductionAndReductions(Loop *L,
354  SmallVector<PHINode *, 8> &Inductions,
355  Loop *InnerLoop);
356 
357  Loop *OuterLoop;
358  Loop *InnerLoop;
359 
360  ScalarEvolution *SE;
361 
362  /// Interface to emit optimization remarks.
364 
365  /// Set of reduction PHIs taking part of a reduction across the inner and
366  /// outer loop.
367  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
368 };
369 
370 /// LoopInterchangeProfitability checks if it is profitable to interchange the
371 /// loop.
372 class LoopInterchangeProfitability {
373 public:
374  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
376  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
377 
378  /// Check if the loop interchange is profitable.
379  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
380  CharMatrix &DepMatrix);
381 
382 private:
383  int getInstrOrderCost();
384 
385  Loop *OuterLoop;
386  Loop *InnerLoop;
387 
388  /// Scev analysis.
389  ScalarEvolution *SE;
390 
391  /// Interface to emit optimization remarks.
393 };
394 
395 /// LoopInterchangeTransform interchanges the loop.
396 class LoopInterchangeTransform {
397 public:
398  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
399  LoopInfo *LI, DominatorTree *DT,
400  BasicBlock *LoopNestExit,
401  const LoopInterchangeLegality &LIL)
402  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
403  LoopExit(LoopNestExit), LIL(LIL) {}
404 
405  /// Interchange OuterLoop and InnerLoop.
406  bool transform();
407  void restructureLoops(Loop *NewInner, Loop *NewOuter,
408  BasicBlock *OrigInnerPreHeader,
409  BasicBlock *OrigOuterPreHeader);
410  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
411 
412 private:
413  void splitInnerLoopLatch(Instruction *);
414  void splitInnerLoopHeader();
415  bool adjustLoopLinks();
416  void adjustLoopPreheaders();
417  bool adjustLoopBranches();
418 
419  Loop *OuterLoop;
420  Loop *InnerLoop;
421 
422  /// Scev analysis.
423  ScalarEvolution *SE;
424 
425  LoopInfo *LI;
426  DominatorTree *DT;
427  BasicBlock *LoopExit;
428 
429  const LoopInterchangeLegality &LIL;
430 };
431 
432 // Main LoopInterchange Pass.
433 struct LoopInterchange : public LoopPass {
434  static char ID;
435  ScalarEvolution *SE = nullptr;
436  LoopInfo *LI = nullptr;
437  DependenceInfo *DI = nullptr;
438  DominatorTree *DT = nullptr;
439 
440  /// Interface to emit optimization remarks.
442 
443  LoopInterchange() : LoopPass(ID) {
445  }
446 
447  void getAnalysisUsage(AnalysisUsage &AU) const override {
450 
452  }
453 
454  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
455  if (skipLoop(L) || L->getParentLoop())
456  return false;
457 
458  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
459  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
460  DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
461  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
462  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
463 
464  return processLoopList(populateWorklist(*L));
465  }
466 
467  bool isComputableLoopNest(LoopVector LoopList) {
468  for (Loop *L : LoopList) {
469  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
470  if (ExitCountOuter == SE->getCouldNotCompute()) {
471  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
472  return false;
473  }
474  if (L->getNumBackEdges() != 1) {
475  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
476  return false;
477  }
478  if (!L->getExitingBlock()) {
479  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
480  return false;
481  }
482  }
483  return true;
484  }
485 
486  unsigned selectLoopForInterchange(const LoopVector &LoopList) {
487  // TODO: Add a better heuristic to select the loop to be interchanged based
488  // on the dependence matrix. Currently we select the innermost loop.
489  return LoopList.size() - 1;
490  }
491 
492  bool processLoopList(LoopVector LoopList) {
493  bool Changed = false;
494  unsigned LoopNestDepth = LoopList.size();
495  if (LoopNestDepth < 2) {
496  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
497  return false;
498  }
499  if (LoopNestDepth > MaxLoopNestDepth) {
500  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
501  << MaxLoopNestDepth << "\n");
502  return false;
503  }
504  if (!isComputableLoopNest(LoopList)) {
505  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
506  return false;
507  }
508 
509  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
510  << "\n");
511 
512  CharMatrix DependencyMatrix;
513  Loop *OuterMostLoop = *(LoopList.begin());
514  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
515  OuterMostLoop, DI)) {
516  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
517  return false;
518  }
519 #ifdef DUMP_DEP_MATRICIES
520  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
521  printDepMatrix(DependencyMatrix);
522 #endif
523 
524  // Get the Outermost loop exit.
525  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
526  if (!LoopNestExit) {
527  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
528  return false;
529  }
530 
531  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
532  // Move the selected loop outwards to the best possible position.
533  for (unsigned i = SelecLoopId; i > 0; i--) {
534  bool Interchanged =
535  processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
536  if (!Interchanged)
537  return Changed;
538  // Loops interchanged reflect the same in LoopList
539  std::swap(LoopList[i - 1], LoopList[i]);
540 
541  // Update the DependencyMatrix
542  interChangeDependencies(DependencyMatrix, i, i - 1);
543 #ifdef DUMP_DEP_MATRICIES
544  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
545  printDepMatrix(DependencyMatrix);
546 #endif
547  Changed |= Interchanged;
548  }
549  return Changed;
550  }
551 
552  bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
553  unsigned OuterLoopId, BasicBlock *LoopNestExit,
554  std::vector<std::vector<char>> &DependencyMatrix) {
555  LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
556  << " and OuterLoopId = " << OuterLoopId << "\n");
557  Loop *InnerLoop = LoopList[InnerLoopId];
558  Loop *OuterLoop = LoopList[OuterLoopId];
559 
560  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
561  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
562  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
563  return false;
564  }
565  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
566  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
567  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
568  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
569  return false;
570  }
571 
572  ORE->emit([&]() {
573  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
574  InnerLoop->getStartLoc(),
575  InnerLoop->getHeader())
576  << "Loop interchanged with enclosing loop.";
577  });
578 
579  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
580  LIL);
581  LIT.transform();
582  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
583  LoopsInterchanged++;
584  return true;
585  }
586 };
587 
588 } // end anonymous namespace
589 
590 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
591  return any_of(*BB, [](const Instruction &I) {
592  return I.mayHaveSideEffects() || I.mayReadFromMemory();
593  });
594 }
595 
596 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
597  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
598  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
599  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
600 
601  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
602 
603  // A perfectly nested loop will not have any branch in between the outer and
604  // inner block i.e. outer header will branch to either inner preheader and
605  // outerloop latch.
606  BranchInst *OuterLoopHeaderBI =
607  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
608  if (!OuterLoopHeaderBI)
609  return false;
610 
611  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
612  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
613  Succ != OuterLoopLatch)
614  return false;
615 
616  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
617  // We do not have any basic block in between now make sure the outer header
618  // and outer loop latch doesn't contain any unsafe instructions.
619  if (containsUnsafeInstructions(OuterLoopHeader) ||
620  containsUnsafeInstructions(OuterLoopLatch))
621  return false;
622 
623  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
624  // We have a perfect loop nest.
625  return true;
626 }
627 
628 bool LoopInterchangeLegality::isLoopStructureUnderstood(
629  PHINode *InnerInduction) {
630  unsigned Num = InnerInduction->getNumOperands();
631  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
632  for (unsigned i = 0; i < Num; ++i) {
633  Value *Val = InnerInduction->getOperand(i);
634  if (isa<Constant>(Val))
635  continue;
637  if (!I)
638  return false;
639  // TODO: Handle triangular loops.
640  // e.g. for(int i=0;i<N;i++)
641  // for(int j=i;j<N;j++)
642  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
643  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
644  InnerLoopPreheader &&
645  !OuterLoop->isLoopInvariant(I)) {
646  return false;
647  }
648  }
649  return true;
650 }
651 
652 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
653 // value.
654 static Value *followLCSSA(Value *SV) {
655  PHINode *PHI = dyn_cast<PHINode>(SV);
656  if (!PHI)
657  return SV;
658 
659  if (PHI->getNumIncomingValues() != 1)
660  return SV;
661  return followLCSSA(PHI->getIncomingValue(0));
662 }
663 
664 // Check V's users to see if it is involved in a reduction in L.
666  for (Value *User : V->users()) {
667  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
668  if (PHI->getNumIncomingValues() == 1)
669  continue;
671  if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
672  return PHI;
673  return nullptr;
674  }
675  }
676 
677  return nullptr;
678 }
679 
680 bool LoopInterchangeLegality::findInductionAndReductions(
681  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
682  if (!L->getLoopLatch() || !L->getLoopPredecessor())
683  return false;
684  for (PHINode &PHI : L->getHeader()->phis()) {
687  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
688  Inductions.push_back(&PHI);
689  else {
690  // PHIs in inner loops need to be part of a reduction in the outer loop,
691  // discovered when checking the PHIs of the outer loop earlier.
692  if (!InnerLoop) {
693  if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
694  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
695  "across the outer loop.\n");
696  return false;
697  }
698  } else {
699  assert(PHI.getNumIncomingValues() == 2 &&
700  "Phis in loop header should have exactly 2 incoming values");
701  // Check if we have a PHI node in the outer loop that has a reduction
702  // result from the inner loop as an incoming value.
703  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
704  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
705  if (!InnerRedPhi ||
706  !llvm::any_of(InnerRedPhi->incoming_values(),
707  [&PHI](Value *V) { return V == &PHI; })) {
708  LLVM_DEBUG(
709  dbgs()
710  << "Failed to recognize PHI as an induction or reduction.\n");
711  return false;
712  }
713  OuterInnerReductions.insert(&PHI);
714  OuterInnerReductions.insert(InnerRedPhi);
715  }
716  }
717  }
718  return true;
719 }
720 
721 static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
722  for (PHINode &PHI : Block->phis()) {
723  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
724  if (PHI.getNumIncomingValues() > 1)
725  return false;
726  Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0));
727  if (!Ins)
728  return false;
729  // Incoming value for lcssa phi's in outer loop exit can only be inner loop
730  // exits lcssa phi else it would not be tightly nested.
731  if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
732  return false;
733  }
734  return true;
735 }
736 
737 // This function indicates the current limitations in the transform as a result
738 // of which we do not proceed.
739 bool LoopInterchangeLegality::currentLimitations() {
740  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
741  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
742 
743  // transform currently expects the loop latches to also be the exiting
744  // blocks.
745  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
746  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
747  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
748  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
749  LLVM_DEBUG(
750  dbgs() << "Loops where the latch is not the exiting block are not"
751  << " supported currently.\n");
752  ORE->emit([&]() {
753  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
754  OuterLoop->getStartLoc(),
755  OuterLoop->getHeader())
756  << "Loops where the latch is not the exiting block cannot be"
757  " interchange currently.";
758  });
759  return true;
760  }
761 
762  PHINode *InnerInductionVar;
763  SmallVector<PHINode *, 8> Inductions;
764  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
765  LLVM_DEBUG(
766  dbgs() << "Only outer loops with induction or reduction PHI nodes "
767  << "are supported currently.\n");
768  ORE->emit([&]() {
769  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
770  OuterLoop->getStartLoc(),
771  OuterLoop->getHeader())
772  << "Only outer loops with induction or reduction PHI nodes can be"
773  " interchanged currently.";
774  });
775  return true;
776  }
777 
778  // TODO: Currently we handle only loops with 1 induction variable.
779  if (Inductions.size() != 1) {
780  LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
781  << "supported currently.\n");
782  ORE->emit([&]() {
783  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
784  OuterLoop->getStartLoc(),
785  OuterLoop->getHeader())
786  << "Only outer loops with 1 induction variable can be "
787  "interchanged currently.";
788  });
789  return true;
790  }
791 
792  Inductions.clear();
793  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
794  LLVM_DEBUG(
795  dbgs() << "Only inner loops with induction or reduction PHI nodes "
796  << "are supported currently.\n");
797  ORE->emit([&]() {
798  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
799  InnerLoop->getStartLoc(),
800  InnerLoop->getHeader())
801  << "Only inner loops with induction or reduction PHI nodes can be"
802  " interchange currently.";
803  });
804  return true;
805  }
806 
807  // TODO: Currently we handle only loops with 1 induction variable.
808  if (Inductions.size() != 1) {
809  LLVM_DEBUG(
810  dbgs() << "We currently only support loops with 1 induction variable."
811  << "Failed to interchange due to current limitation\n");
812  ORE->emit([&]() {
813  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
814  InnerLoop->getStartLoc(),
815  InnerLoop->getHeader())
816  << "Only inner loops with 1 induction variable can be "
817  "interchanged currently.";
818  });
819  return true;
820  }
821  InnerInductionVar = Inductions.pop_back_val();
822 
823  // TODO: Triangular loops are not handled for now.
824  if (!isLoopStructureUnderstood(InnerInductionVar)) {
825  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
826  ORE->emit([&]() {
827  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
828  InnerLoop->getStartLoc(),
829  InnerLoop->getHeader())
830  << "Inner loop structure not understood currently.";
831  });
832  return true;
833  }
834 
835  // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
836  BasicBlock *InnerExit = InnerLoop->getExitBlock();
837  if (!containsSafePHI(InnerExit, false)) {
838  LLVM_DEBUG(
839  dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
840  ORE->emit([&]() {
841  return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner",
842  InnerLoop->getStartLoc(),
843  InnerLoop->getHeader())
844  << "Only inner loops with LCSSA PHIs can be interchange "
845  "currently.";
846  });
847  return true;
848  }
849 
850  // TODO: Current limitation: Since we split the inner loop latch at the point
851  // were induction variable is incremented (induction.next); We cannot have
852  // more than 1 user of induction.next since it would result in broken code
853  // after split.
854  // e.g.
855  // for(i=0;i<N;i++) {
856  // for(j = 0;j<M;j++) {
857  // A[j+1][i+2] = A[j][i]+k;
858  // }
859  // }
860  Instruction *InnerIndexVarInc = nullptr;
861  if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
862  InnerIndexVarInc =
863  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
864  else
865  InnerIndexVarInc =
866  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
867 
868  if (!InnerIndexVarInc) {
869  LLVM_DEBUG(
870  dbgs() << "Did not find an instruction to increment the induction "
871  << "variable.\n");
872  ORE->emit([&]() {
873  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
874  InnerLoop->getStartLoc(),
875  InnerLoop->getHeader())
876  << "The inner loop does not increment the induction variable.";
877  });
878  return true;
879  }
880 
881  // Since we split the inner loop latch on this induction variable. Make sure
882  // we do not have any instruction between the induction variable and branch
883  // instruction.
884 
885  bool FoundInduction = false;
886  for (const Instruction &I :
887  llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
888  if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
889  isa<ZExtInst>(I))
890  continue;
891 
892  // We found an instruction. If this is not induction variable then it is not
893  // safe to split this loop latch.
894  if (!I.isIdenticalTo(InnerIndexVarInc)) {
895  LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "
896  << "variable increment and branch.\n");
897  ORE->emit([&]() {
899  DEBUG_TYPE, "UnsupportedInsBetweenInduction",
900  InnerLoop->getStartLoc(), InnerLoop->getHeader())
901  << "Found unsupported instruction between induction variable "
902  "increment and branch.";
903  });
904  return true;
905  }
906 
907  FoundInduction = true;
908  break;
909  }
910  // The loop latch ended and we didn't find the induction variable return as
911  // current limitation.
912  if (!FoundInduction) {
913  LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n");
914  ORE->emit([&]() {
915  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
916  InnerLoop->getStartLoc(),
917  InnerLoop->getHeader())
918  << "Did not find the induction variable.";
919  });
920  return true;
921  }
922  return false;
923 }
924 
925 // We currently support LCSSA PHI nodes in the outer loop exit, if their
926 // incoming values do not come from the outer loop latch or if the
927 // outer loop latch has a single predecessor. In that case, the value will
928 // be available if both the inner and outer loop conditions are true, which
929 // will still be true after interchanging. If we have multiple predecessor,
930 // that may not be the case, e.g. because the outer loop latch may be executed
931 // if the inner loop is not executed.
932 static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
933  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
934  for (PHINode &PHI : LoopNestExit->phis()) {
935  // FIXME: We currently are not able to detect floating point reductions
936  // and have to use floating point PHIs as a proxy to prevent
937  // interchanging in the presence of floating point reductions.
938  if (PHI.getType()->isFloatingPointTy())
939  return false;
940  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
941  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
942  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
943  continue;
944 
945  // The incoming value is defined in the outer loop latch. Currently we
946  // only support that in case the outer loop latch has a single predecessor.
947  // This guarantees that the outer loop latch is executed if and only if
948  // the inner loop is executed (because tightlyNested() guarantees that the
949  // outer loop header only branches to the inner loop or the outer loop
950  // latch).
951  // FIXME: We could weaken this logic and allow multiple predecessors,
952  // if the values are produced outside the loop latch. We would need
953  // additional logic to update the PHI nodes in the exit block as
954  // well.
955  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
956  return false;
957  }
958  }
959  return true;
960 }
961 
962 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
963  unsigned OuterLoopId,
964  CharMatrix &DepMatrix) {
965  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
966  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
967  << " and OuterLoopId = " << OuterLoopId
968  << " due to dependence\n");
969  ORE->emit([&]() {
970  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
971  InnerLoop->getStartLoc(),
972  InnerLoop->getHeader())
973  << "Cannot interchange loops due to dependences.";
974  });
975  return false;
976  }
977  // Check if outer and inner loop contain legal instructions only.
978  for (auto *BB : OuterLoop->blocks())
979  for (Instruction &I : BB->instructionsWithoutDebug())
980  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
981  // readnone functions do not prevent interchanging.
982  if (CI->doesNotReadMemory())
983  continue;
984  LLVM_DEBUG(
985  dbgs() << "Loops with call instructions cannot be interchanged "
986  << "safely.");
987  ORE->emit([&]() {
988  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
989  CI->getDebugLoc(),
990  CI->getParent())
991  << "Cannot interchange loops due to call instruction.";
992  });
993 
994  return false;
995  }
996 
997  // TODO: The loops could not be interchanged due to current limitations in the
998  // transform module.
999  if (currentLimitations()) {
1000  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1001  return false;
1002  }
1003 
1004  // Check if the loops are tightly nested.
1005  if (!tightlyNested(OuterLoop, InnerLoop)) {
1006  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1007  ORE->emit([&]() {
1008  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1009  InnerLoop->getStartLoc(),
1010  InnerLoop->getHeader())
1011  << "Cannot interchange loops because they are not tightly "
1012  "nested.";
1013  });
1014  return false;
1015  }
1016 
1017  if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1018  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1019  ORE->emit([&]() {
1020  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1021  OuterLoop->getStartLoc(),
1022  OuterLoop->getHeader())
1023  << "Found unsupported PHI node in loop exit.";
1024  });
1025  return false;
1026  }
1027 
1028  return true;
1029 }
1030 
1031 int LoopInterchangeProfitability::getInstrOrderCost() {
1032  unsigned GoodOrder, BadOrder;
1033  BadOrder = GoodOrder = 0;
1034  for (BasicBlock *BB : InnerLoop->blocks()) {
1035  for (Instruction &Ins : *BB) {
1036  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1037  unsigned NumOp = GEP->getNumOperands();
1038  bool FoundInnerInduction = false;
1039  bool FoundOuterInduction = false;
1040  for (unsigned i = 0; i < NumOp; ++i) {
1041  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1042  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1043  if (!AR)
1044  continue;
1045 
1046  // If we find the inner induction after an outer induction e.g.
1047  // for(int i=0;i<N;i++)
1048  // for(int j=0;j<N;j++)
1049  // A[i][j] = A[i-1][j-1]+k;
1050  // then it is a good order.
1051  if (AR->getLoop() == InnerLoop) {
1052  // We found an InnerLoop induction after OuterLoop induction. It is
1053  // a good order.
1054  FoundInnerInduction = true;
1055  if (FoundOuterInduction) {
1056  GoodOrder++;
1057  break;
1058  }
1059  }
1060  // If we find the outer induction after an inner induction e.g.
1061  // for(int i=0;i<N;i++)
1062  // for(int j=0;j<N;j++)
1063  // A[j][i] = A[j-1][i-1]+k;
1064  // then it is a bad order.
1065  if (AR->getLoop() == OuterLoop) {
1066  // We found an OuterLoop induction after InnerLoop induction. It is
1067  // a bad order.
1068  FoundOuterInduction = true;
1069  if (FoundInnerInduction) {
1070  BadOrder++;
1071  break;
1072  }
1073  }
1074  }
1075  }
1076  }
1077  }
1078  return GoodOrder - BadOrder;
1079 }
1080 
1081 static bool isProfitableForVectorization(unsigned InnerLoopId,
1082  unsigned OuterLoopId,
1083  CharMatrix &DepMatrix) {
1084  // TODO: Improve this heuristic to catch more cases.
1085  // If the inner loop is loop independent or doesn't carry any dependency it is
1086  // profitable to move this to outer position.
1087  for (auto &Row : DepMatrix) {
1088  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1089  return false;
1090  // TODO: We need to improve this heuristic.
1091  if (Row[OuterLoopId] != '=')
1092  return false;
1093  }
1094  // If outer loop has dependence and inner loop is loop independent then it is
1095  // profitable to interchange to enable parallelism.
1096  // If there are no dependences, interchanging will not improve anything.
1097  return !DepMatrix.empty();
1098 }
1099 
1100 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1101  unsigned OuterLoopId,
1102  CharMatrix &DepMatrix) {
1103  // TODO: Add better profitability checks.
1104  // e.g
1105  // 1) Construct dependency matrix and move the one with no loop carried dep
1106  // inside to enable vectorization.
1107 
1108  // This is rough cost estimation algorithm. It counts the good and bad order
1109  // of induction variables in the instruction and allows reordering if number
1110  // of bad orders is more than good.
1111  int Cost = getInstrOrderCost();
1112  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1113  if (Cost < -LoopInterchangeCostThreshold)
1114  return true;
1115 
1116  // It is not profitable as per current cache profitability model. But check if
1117  // we can move this loop outside to improve parallelism.
1118  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1119  return true;
1120 
1121  ORE->emit([&]() {
1122  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1123  InnerLoop->getStartLoc(),
1124  InnerLoop->getHeader())
1125  << "Interchanging loops is too costly (cost="
1126  << ore::NV("Cost", Cost) << ", threshold="
1127  << ore::NV("Threshold", LoopInterchangeCostThreshold)
1128  << ") and it does not improve parallelism.";
1129  });
1130  return false;
1131 }
1132 
1133 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1134  Loop *InnerLoop) {
1135  for (Loop *L : *OuterLoop)
1136  if (L == InnerLoop) {
1137  OuterLoop->removeChildLoop(L);
1138  return;
1139  }
1140  llvm_unreachable("Couldn't find loop");
1141 }
1142 
1143 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1144 /// new inner and outer loop after interchanging: NewInner is the original
1145 /// outer loop and NewOuter is the original inner loop.
1146 ///
1147 /// Before interchanging, we have the following structure
1148 /// Outer preheader
1149 // Outer header
1150 // Inner preheader
1151 // Inner header
1152 // Inner body
1153 // Inner latch
1154 // outer bbs
1155 // Outer latch
1156 //
1157 // After interchanging:
1158 // Inner preheader
1159 // Inner header
1160 // Outer preheader
1161 // Outer header
1162 // Inner body
1163 // outer bbs
1164 // Outer latch
1165 // Inner latch
1166 void LoopInterchangeTransform::restructureLoops(
1167  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1168  BasicBlock *OrigOuterPreHeader) {
1169  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1170  // The original inner loop preheader moves from the new inner loop to
1171  // the parent loop, if there is one.
1172  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1173  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1174 
1175  // Switch the loop levels.
1176  if (OuterLoopParent) {
1177  // Remove the loop from its parent loop.
1178  removeChildLoop(OuterLoopParent, NewInner);
1179  removeChildLoop(NewInner, NewOuter);
1180  OuterLoopParent->addChildLoop(NewOuter);
1181  } else {
1182  removeChildLoop(NewInner, NewOuter);
1183  LI->changeTopLevelLoop(NewInner, NewOuter);
1184  }
1185  while (!NewOuter->empty())
1186  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1187  NewOuter->addChildLoop(NewInner);
1188 
1189  // BBs from the original inner loop.
1190  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1191 
1192  // Add BBs from the original outer loop to the original inner loop (excluding
1193  // BBs already in inner loop)
1194  for (BasicBlock *BB : NewInner->blocks())
1195  if (LI->getLoopFor(BB) == NewInner)
1196  NewOuter->addBlockEntry(BB);
1197 
1198  // Now remove inner loop header and latch from the new inner loop and move
1199  // other BBs (the loop body) to the new inner loop.
1200  BasicBlock *OuterHeader = NewOuter->getHeader();
1201  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1202  for (BasicBlock *BB : OrigInnerBBs) {
1203  // Nothing will change for BBs in child loops.
1204  if (LI->getLoopFor(BB) != NewOuter)
1205  continue;
1206  // Remove the new outer loop header and latch from the new inner loop.
1207  if (BB == OuterHeader || BB == OuterLatch)
1208  NewInner->removeBlockFromLoop(BB);
1209  else
1210  LI->changeLoopFor(BB, NewInner);
1211  }
1212 
1213  // The preheader of the original outer loop becomes part of the new
1214  // outer loop.
1215  NewOuter->addBlockEntry(OrigOuterPreHeader);
1216  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1217 
1218  // Tell SE that we move the loops around.
1219  SE->forgetLoop(NewOuter);
1220  SE->forgetLoop(NewInner);
1221 }
1222 
1224  bool Transformed = false;
1225  Instruction *InnerIndexVar;
1226 
1227  if (InnerLoop->getSubLoops().empty()) {
1228  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1229  LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
1230  PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1231  if (!InductionPHI) {
1232  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1233  return false;
1234  }
1235 
1236  if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1237  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1238  else
1239  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1240 
1241  // Ensure that InductionPHI is the first Phi node.
1242  if (&InductionPHI->getParent()->front() != InductionPHI)
1243  InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1244 
1245  // Split at the place were the induction variable is
1246  // incremented/decremented.
1247  // TODO: This splitting logic may not work always. Fix this.
1248  splitInnerLoopLatch(InnerIndexVar);
1249  LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n");
1250 
1251  // Splits the inner loops phi nodes out into a separate basic block.
1252  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1253  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1254  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1255  }
1256 
1257  Transformed |= adjustLoopLinks();
1258  if (!Transformed) {
1259  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1260  return false;
1261  }
1262 
1263  return true;
1264 }
1265 
1266 void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
1267  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1268  BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
1269  InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
1270 }
1271 
1272 /// \brief Move all instructions except the terminator from FromBB right before
1273 /// InsertBefore
1274 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1275  auto &ToList = InsertBefore->getParent()->getInstList();
1276  auto &FromList = FromBB->getInstList();
1277 
1278  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1279  FromBB->getTerminator()->getIterator());
1280 }
1281 
1282 static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
1283  BasicBlock *NewPred) {
1284  for (PHINode &PHI : CurrBlock->phis()) {
1285  unsigned Num = PHI.getNumIncomingValues();
1286  for (unsigned i = 0; i < Num; ++i) {
1287  if (PHI.getIncomingBlock(i) == OldPred)
1288  PHI.setIncomingBlock(i, NewPred);
1289  }
1290  }
1291 }
1292 
1293 /// Update BI to jump to NewBB instead of OldBB. Records updates to
1294 /// the dominator tree in DTUpdates, if DT should be preserved.
1295 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1296  BasicBlock *NewBB,
1297  std::vector<DominatorTree::UpdateType> &DTUpdates) {
1299  [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
1300  "BI must jump to OldBB at most once.");
1301  for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
1302  if (BI->getSuccessor(i) == OldBB) {
1303  BI->setSuccessor(i, NewBB);
1304 
1305  DTUpdates.push_back(
1307  DTUpdates.push_back(
1309  break;
1310  }
1311  }
1312 }
1313 
1314 // Move Lcssa PHIs to the right place.
1315 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
1316  BasicBlock *OuterLatch) {
1317  SmallVector<PHINode *, 8> LcssaInnerExit;
1318  for (PHINode &P : InnerExit->phis())
1319  LcssaInnerExit.push_back(&P);
1320 
1321  SmallVector<PHINode *, 8> LcssaInnerLatch;
1322  for (PHINode &P : InnerLatch->phis())
1323  LcssaInnerLatch.push_back(&P);
1324 
1325  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1326  // If a PHI node has users outside of InnerExit, it has a use outside the
1327  // interchanged loop and we have to preserve it. We move these to
1328  // InnerLatch, which will become the new exit block for the innermost
1329  // loop after interchanging. For PHIs only used in InnerExit, we can just
1330  // replace them with the incoming value.
1331  for (PHINode *P : LcssaInnerExit) {
1332  bool hasUsersOutside = false;
1333  for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
1334  Use &U = *UI;
1335  ++UI;
1336  auto *Usr = cast<Instruction>(U.getUser());
1337  if (Usr->getParent() != InnerExit) {
1338  hasUsersOutside = true;
1339  continue;
1340  }
1341  U.set(P->getIncomingValueForBlock(InnerLatch));
1342  }
1343  if (hasUsersOutside)
1344  P->moveBefore(InnerLatch->getFirstNonPHI());
1345  else
1346  P->eraseFromParent();
1347  }
1348 
1349  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1350  // and we have to move them to the new inner latch.
1351  for (PHINode *P : LcssaInnerLatch)
1352  P->moveBefore(InnerExit->getFirstNonPHI());
1353 
1354  // Now adjust the incoming blocks for the LCSSA PHIs.
1355  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1356  // with the new latch.
1357  updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
1358 }
1359 
1360 bool LoopInterchangeTransform::adjustLoopBranches() {
1361  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1362  std::vector<DominatorTree::UpdateType> DTUpdates;
1363 
1364  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1365  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1366 
1367  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1368  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1369  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1370  // Ensure that both preheaders do not contain PHI nodes and have single
1371  // predecessors. This allows us to move them easily. We use
1372  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1373  // preheaders do not satisfy those conditions.
1374  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1375  !OuterLoopPreHeader->getUniquePredecessor())
1376  OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
1377  if (InnerLoopPreHeader == OuterLoop->getHeader())
1378  InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
1379 
1380  // Adjust the loop preheader
1381  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1382  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1383  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1384  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1385  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1386  BasicBlock *InnerLoopLatchPredecessor =
1387  InnerLoopLatch->getUniquePredecessor();
1388  BasicBlock *InnerLoopLatchSuccessor;
1389  BasicBlock *OuterLoopLatchSuccessor;
1390 
1391  BranchInst *OuterLoopLatchBI =
1392  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1393  BranchInst *InnerLoopLatchBI =
1394  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1395  BranchInst *OuterLoopHeaderBI =
1396  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1397  BranchInst *InnerLoopHeaderBI =
1398  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1399 
1400  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1401  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1402  !InnerLoopHeaderBI)
1403  return false;
1404 
1405  BranchInst *InnerLoopLatchPredecessorBI =
1406  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1407  BranchInst *OuterLoopPredecessorBI =
1408  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1409 
1410  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1411  return false;
1412  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1413  if (!InnerLoopHeaderSuccessor)
1414  return false;
1415 
1416  // Adjust Loop Preheader and headers
1417  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1418  InnerLoopPreHeader, DTUpdates);
1419  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates);
1420  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1421  InnerLoopHeaderSuccessor, DTUpdates);
1422 
1423  // Adjust reduction PHI's now that the incoming block has changed.
1424  updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
1425  OuterLoopHeader);
1426 
1427  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1428  OuterLoopPreHeader, DTUpdates);
1429 
1430  // -------------Adjust loop latches-----------
1431  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1432  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1433  else
1434  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1435 
1436  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1437  InnerLoopLatchSuccessor, DTUpdates);
1438 
1439 
1440  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1441  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1442  else
1443  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1444 
1445  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1446  OuterLoopLatchSuccessor, DTUpdates);
1447  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1448  DTUpdates);
1449 
1450  DT->applyUpdates(DTUpdates);
1451  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1452  OuterLoopPreHeader);
1453 
1454  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
1455  // For PHIs in the exit block of the outer loop, outer's latch has been
1456  // replaced by Inners'.
1457  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
1458 
1459  // Now update the reduction PHIs in the inner and outer loop headers.
1460  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1461  for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
1462  InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1463  for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
1464  OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
1465 
1466  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1467  (void)OuterInnerReductions;
1468 
1469  // Now move the remaining reduction PHIs from outer to inner loop header and
1470  // vice versa. The PHI nodes must be part of a reduction across the inner and
1471  // outer loop and all the remains to do is and updating the incoming blocks.
1472  for (PHINode *PHI : OuterLoopPHIs) {
1473  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1474  assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
1475  "Expected a reduction PHI node");
1476  }
1477  for (PHINode *PHI : InnerLoopPHIs) {
1478  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1479  assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
1480  "Expected a reduction PHI node");
1481  }
1482 
1483  // Update the incoming blocks for moved PHI nodes.
1484  updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);
1485  updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch);
1486  updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader);
1487  updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch);
1488 
1489  return true;
1490 }
1491 
1492 void LoopInterchangeTransform::adjustLoopPreheaders() {
1493  // We have interchanged the preheaders so we need to interchange the data in
1494  // the preheader as well.
1495  // This is because the content of inner preheader was previously executed
1496  // inside the outer loop.
1497  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1498  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1499  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1500  BranchInst *InnerTermBI =
1501  cast<BranchInst>(InnerLoopPreHeader->getTerminator());
1502 
1503  // These instructions should now be executed inside the loop.
1504  // Move instruction into a new block after outer header.
1505  moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
1506  // These instructions were not executed previously in the loop so move them to
1507  // the older inner loop preheader.
1508  moveBBContents(OuterLoopPreHeader, InnerTermBI);
1509 }
1510 
1511 bool LoopInterchangeTransform::adjustLoopLinks() {
1512  // Adjust all branches in the inner and outer loop.
1513  bool Changed = adjustLoopBranches();
1514  if (Changed)
1515  adjustLoopPreheaders();
1516  return Changed;
1517 }
1518 
1519 char LoopInterchange::ID = 0;
1520 
1521 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
1522  "Interchanges loops for cache reuse", false, false)
1526 
1527 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
1528  "Interchanges loops for cache reuse", false, false)
1529 
1530 Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction *> *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LoopVector populateWorklist(Loop &L)
Legacy pass manager pass to access dependence information.
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
void push_back(const T &Elt)
Definition: SmallVector.h:211
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:339
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1259
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
DependenceInfo - This class is the main dependence-analysis driver.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:226
unsigned getNumSuccessors() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:358
BlockT * getHeader() const
Definition: LoopInfo.h:99
ConstantInt * getValue() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
Definition: Use.cpp:40
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
This node represents a polynomial recurrence on the trip count of the specified loop.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:246
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep)
static const unsigned MaxLoopNestDepth
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates)
Update BI to jump to NewBB instead of OldBB.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
loop interchange
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
const SCEV * getCouldNotCompute()
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
void set(Value *Val)
Definition: Value.h:670
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug() const
Return a const iterator range over the instructions in the block, skipping any debug instructions...
Definition: BasicBlock.cpp:94
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:575
loop Interchanges loops for cache reuse
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:327
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
self_iterator getIterator()
Definition: ilist_node.h:81
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:75
Used in the streaming interface as the general argument type.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:144
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:342
iterator_range< decltype(adl_begin(std::declval< T >)))> drop_begin(T &&t, int n)
static unsigned getIncomingValueNumForOperand(unsigned i)
size_t size() const
Definition: SmallVector.h:52
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
void initializeLoopInterchangePass(PassRegistry &)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
bool isNegative() const
Definition: Constants.h:187
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:62
#define DEBUG_TYPE
BlockT * getLoopPredecessor() const
If the given loop&#39;s header has exactly one unique predecessor outside the loop, return it...
Definition: LoopInfoImpl.h:201
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
A struct for saving information about induction variables.
Pass * createLoopInterchangePass()
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
iterator begin() const
Definition: LoopInfo.h:141
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static const unsigned MaxMemInstrCount
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:112
iterator_range< user_iterator > users()
Definition: Value.h:399
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchange
LoopT * getParentLoop() const
Definition: LoopInfo.h:100
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:130
This class represents an analyzed expression in the program.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:330
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch, BasicBlock *OuterLatch)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:324
static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate...
Definition: LoopInfo.h:395
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1267
bool empty() const
Definition: LoopInfo.h:145
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static Value * followLCSSA(Value *SV)
succ_range successors(Instruction *I)
Definition: CFG.h:259
OptimizationRemarkEmitter legacy analysis pass.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, BasicBlock *NewPred)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
op_range incoming_values()
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:155
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
The optimization diagnostic interface.
static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
loops
Definition: LoopInfo.cpp:789
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
static PHINode * getInductionVariable(Loop *L, ScalarEvolution *SE)
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:276