LLVM 19.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/InstrTypes.h"
34#include "llvm/IR/Instruction.h"
36#include "llvm/IR/User.h"
37#include "llvm/IR/Value.h"
40#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <utility>
48#include <vector>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "loop-interchange"
53
54STATISTIC(LoopsInterchanged, "Number of loops interchanged");
55
57 "loop-interchange-threshold", cl::init(0), cl::Hidden,
58 cl::desc("Interchange if you gain more than this number"));
59
60namespace {
61
62using LoopVector = SmallVector<Loop *, 8>;
63
64// TODO: Check if we can use a sparse matrix here.
65using CharMatrix = std::vector<std::vector<char>>;
66
67} // end anonymous namespace
68
69// Maximum number of dependencies that can be handled in the dependency matrix.
70static const unsigned MaxMemInstrCount = 100;
71
72// Maximum loop depth supported.
73static const unsigned MaxLoopNestDepth = 10;
74
75#ifdef DUMP_DEP_MATRICIES
76static void printDepMatrix(CharMatrix &DepMatrix) {
77 for (auto &Row : DepMatrix) {
78 for (auto D : Row)
79 LLVM_DEBUG(dbgs() << D << " ");
80 LLVM_DEBUG(dbgs() << "\n");
81 }
82}
83#endif
84
85static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
86 Loop *L, DependenceInfo *DI,
87 ScalarEvolution *SE) {
88 using ValueVector = SmallVector<Value *, 16>;
89
90 ValueVector MemInstr;
91
92 // For each block.
93 for (BasicBlock *BB : L->blocks()) {
94 // Scan the BB and collect legal loads and stores.
95 for (Instruction &I : *BB) {
96 if (!isa<Instruction>(I))
97 return false;
98 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
99 if (!Ld->isSimple())
100 return false;
101 MemInstr.push_back(&I);
102 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
103 if (!St->isSimple())
104 return false;
105 MemInstr.push_back(&I);
106 }
107 }
108 }
109
110 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
111 << " Loads and Stores to analyze\n");
112
113 ValueVector::iterator I, IE, J, JE;
114
115 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
116 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
117 std::vector<char> Dep;
118 Instruction *Src = cast<Instruction>(*I);
119 Instruction *Dst = cast<Instruction>(*J);
120 // Ignore Input dependencies.
121 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
122 continue;
123 // Track Output, Flow, and Anti dependencies.
124 if (auto D = DI->depends(Src, Dst, true)) {
125 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
126 // If the direction vector is negative, normalize it to
127 // make it non-negative.
128 if (D->normalize(SE))
129 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
130 LLVM_DEBUG(StringRef DepType =
131 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
132 dbgs() << "Found " << DepType
133 << " dependency between Src and Dst\n"
134 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
135 unsigned Levels = D->getLevels();
136 char Direction;
137 for (unsigned II = 1; II <= Levels; ++II) {
138 if (D->isScalar(II)) {
139 Direction = 'S';
140 Dep.push_back(Direction);
141 } else {
142 unsigned Dir = D->getDirection(II);
143 if (Dir == Dependence::DVEntry::LT ||
145 Direction = '<';
146 else if (Dir == Dependence::DVEntry::GT ||
148 Direction = '>';
149 else if (Dir == Dependence::DVEntry::EQ)
150 Direction = '=';
151 else
152 Direction = '*';
153 Dep.push_back(Direction);
154 }
155 }
156 while (Dep.size() != Level) {
157 Dep.push_back('I');
158 }
159
160 DepMatrix.push_back(Dep);
161 if (DepMatrix.size() > MaxMemInstrCount) {
162 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
163 << " dependencies inside loop\n");
164 return false;
165 }
166 }
167 }
168 }
169
170 return true;
171}
172
173// A loop is moved from index 'from' to an index 'to'. Update the Dependence
174// matrix by exchanging the two columns.
175static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
176 unsigned ToIndx) {
177 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
178 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
179}
180
181// After interchanging, check if the direction vector is valid.
182// [Theorem] A permutation of the loops in a perfect nest is legal if and only
183// if the direction matrix, after the same permutation is applied to its
184// columns, has no ">" direction as the leftmost non-"=" direction in any row.
185static bool isLexicographicallyPositive(std::vector<char> &DV) {
186 for (unsigned char Direction : DV) {
187 if (Direction == '<')
188 return true;
189 if (Direction == '>' || Direction == '*')
190 return false;
191 }
192 return true;
193}
194
195// Checks if it is legal to interchange 2 loops.
196static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
197 unsigned InnerLoopId,
198 unsigned OuterLoopId) {
199 unsigned NumRows = DepMatrix.size();
200 std::vector<char> Cur;
201 // For each row check if it is valid to interchange.
202 for (unsigned Row = 0; Row < NumRows; ++Row) {
203 // Create temporary DepVector check its lexicographical order
204 // before and after swapping OuterLoop vs InnerLoop
205 Cur = DepMatrix[Row];
207 return false;
208 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
210 return false;
211 }
212 return true;
213}
214
215static void populateWorklist(Loop &L, LoopVector &LoopList) {
216 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
217 << L.getHeader()->getParent()->getName() << " Loop: %"
218 << L.getHeader()->getName() << '\n');
219 assert(LoopList.empty() && "LoopList should initially be empty!");
220 Loop *CurrentLoop = &L;
221 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
222 while (!Vec->empty()) {
223 // The current loop has multiple subloops in it hence it is not tightly
224 // nested.
225 // Discard all loops above it added into Worklist.
226 if (Vec->size() != 1) {
227 LoopList = {};
228 return;
229 }
230
231 LoopList.push_back(CurrentLoop);
232 CurrentLoop = Vec->front();
233 Vec = &CurrentLoop->getSubLoops();
234 }
235 LoopList.push_back(CurrentLoop);
236}
237
238namespace {
239
240/// LoopInterchangeLegality checks if it is legal to interchange the loop.
241class LoopInterchangeLegality {
242public:
243 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
245 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
246
247 /// Check if the loops can be interchanged.
248 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
249 CharMatrix &DepMatrix);
250
251 /// Discover induction PHIs in the header of \p L. Induction
252 /// PHIs are added to \p Inductions.
253 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
254
255 /// Check if the loop structure is understood. We do not handle triangular
256 /// loops for now.
257 bool isLoopStructureUnderstood();
258
259 bool currentLimitations();
260
261 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
262 return OuterInnerReductions;
263 }
264
265 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
266 return InnerLoopInductions;
267 }
268
269private:
270 bool tightlyNested(Loop *Outer, Loop *Inner);
271 bool containsUnsafeInstructions(BasicBlock *BB);
272
273 /// Discover induction and reduction PHIs in the header of \p L. Induction
274 /// PHIs are added to \p Inductions, reductions are added to
275 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
276 /// to be passed as \p InnerLoop.
277 bool findInductionAndReductions(Loop *L,
278 SmallVector<PHINode *, 8> &Inductions,
279 Loop *InnerLoop);
280
281 Loop *OuterLoop;
282 Loop *InnerLoop;
283
284 ScalarEvolution *SE;
285
286 /// Interface to emit optimization remarks.
288
289 /// Set of reduction PHIs taking part of a reduction across the inner and
290 /// outer loop.
291 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
292
293 /// Set of inner loop induction PHIs
294 SmallVector<PHINode *, 8> InnerLoopInductions;
295};
296
297/// LoopInterchangeProfitability checks if it is profitable to interchange the
298/// loop.
299class LoopInterchangeProfitability {
300public:
301 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
303 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
304
305 /// Check if the loop interchange is profitable.
306 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
307 unsigned InnerLoopId, unsigned OuterLoopId,
308 CharMatrix &DepMatrix,
310 std::unique_ptr<CacheCost> &CC);
311
312private:
313 int getInstrOrderCost();
314 std::optional<bool> isProfitablePerLoopCacheAnalysis(
316 std::unique_ptr<CacheCost> &CC);
317 std::optional<bool> isProfitablePerInstrOrderCost();
318 std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
319 unsigned OuterLoopId,
320 CharMatrix &DepMatrix);
321 Loop *OuterLoop;
322 Loop *InnerLoop;
323
324 /// Scev analysis.
325 ScalarEvolution *SE;
326
327 /// Interface to emit optimization remarks.
329};
330
331/// LoopInterchangeTransform interchanges the loop.
332class LoopInterchangeTransform {
333public:
334 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
335 LoopInfo *LI, DominatorTree *DT,
336 const LoopInterchangeLegality &LIL)
337 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
338
339 /// Interchange OuterLoop and InnerLoop.
340 bool transform();
341 void restructureLoops(Loop *NewInner, Loop *NewOuter,
342 BasicBlock *OrigInnerPreHeader,
343 BasicBlock *OrigOuterPreHeader);
344 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
345
346private:
347 bool adjustLoopLinks();
348 bool adjustLoopBranches();
349
350 Loop *OuterLoop;
351 Loop *InnerLoop;
352
353 /// Scev analysis.
354 ScalarEvolution *SE;
355
356 LoopInfo *LI;
357 DominatorTree *DT;
358
359 const LoopInterchangeLegality &LIL;
360};
361
362struct LoopInterchange {
363 ScalarEvolution *SE = nullptr;
364 LoopInfo *LI = nullptr;
365 DependenceInfo *DI = nullptr;
366 DominatorTree *DT = nullptr;
367 std::unique_ptr<CacheCost> CC = nullptr;
368
369 /// Interface to emit optimization remarks.
371
372 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
373 DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
375 : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
376
377 bool run(Loop *L) {
378 if (L->getParentLoop())
379 return false;
380 SmallVector<Loop *, 8> LoopList;
381 populateWorklist(*L, LoopList);
382 return processLoopList(LoopList);
383 }
384
385 bool run(LoopNest &LN) {
386 SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
387 for (unsigned I = 1; I < LoopList.size(); ++I)
388 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
389 return false;
390 return processLoopList(LoopList);
391 }
392
393 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
394 for (Loop *L : LoopList) {
395 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
396 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
397 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
398 return false;
399 }
400 if (L->getNumBackEdges() != 1) {
401 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
402 return false;
403 }
404 if (!L->getExitingBlock()) {
405 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
406 return false;
407 }
408 }
409 return true;
410 }
411
412 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
413 // TODO: Add a better heuristic to select the loop to be interchanged based
414 // on the dependence matrix. Currently we select the innermost loop.
415 return LoopList.size() - 1;
416 }
417
418 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
419 bool Changed = false;
420 unsigned LoopNestDepth = LoopList.size();
421 if (LoopNestDepth < 2) {
422 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
423 return false;
424 }
425 if (LoopNestDepth > MaxLoopNestDepth) {
426 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
427 << MaxLoopNestDepth << "\n");
428 return false;
429 }
430 if (!isComputableLoopNest(LoopList)) {
431 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
432 return false;
433 }
434
435 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
436 << "\n");
437
438 CharMatrix DependencyMatrix;
439 Loop *OuterMostLoop = *(LoopList.begin());
440 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
441 OuterMostLoop, DI, SE)) {
442 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
443 return false;
444 }
445#ifdef DUMP_DEP_MATRICIES
446 LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
447 printDepMatrix(DependencyMatrix);
448#endif
449
450 // Get the Outermost loop exit.
451 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
452 if (!LoopNestExit) {
453 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
454 return false;
455 }
456
457 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
458 // Obtain the loop vector returned from loop cache analysis beforehand,
459 // and put each <Loop, index> pair into a map for constant time query
460 // later. Indices in loop vector reprsent the optimal order of the
461 // corresponding loop, e.g., given a loopnest with depth N, index 0
462 // indicates the loop should be placed as the outermost loop and index N
463 // indicates the loop should be placed as the innermost loop.
464 //
465 // For the old pass manager CacheCost would be null.
467 if (CC != nullptr) {
468 const auto &LoopCosts = CC->getLoopCosts();
469 for (unsigned i = 0; i < LoopCosts.size(); i++) {
470 CostMap[LoopCosts[i].first] = i;
471 }
472 }
473 // We try to achieve the globally optimal memory access for the loopnest,
474 // and do interchange based on a bubble-sort fasion. We start from
475 // the innermost loop, move it outwards to the best possible position
476 // and repeat this process.
477 for (unsigned j = SelecLoopId; j > 0; j--) {
478 bool ChangedPerIter = false;
479 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
480 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
481 DependencyMatrix, CostMap);
482 if (!Interchanged)
483 continue;
484 // Loops interchanged, update LoopList accordingly.
485 std::swap(LoopList[i - 1], LoopList[i]);
486 // Update the DependencyMatrix
487 interChangeDependencies(DependencyMatrix, i, i - 1);
488#ifdef DUMP_DEP_MATRICIES
489 LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
490 printDepMatrix(DependencyMatrix);
491#endif
492 ChangedPerIter |= Interchanged;
493 Changed |= Interchanged;
494 }
495 // Early abort if there was no interchange during an entire round of
496 // moving loops outwards.
497 if (!ChangedPerIter)
498 break;
499 }
500 return Changed;
501 }
502
503 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
504 unsigned OuterLoopId,
505 std::vector<std::vector<char>> &DependencyMatrix,
506 const DenseMap<const Loop *, unsigned> &CostMap) {
507 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
508 << " and OuterLoopId = " << OuterLoopId << "\n");
509 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
510 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
511 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
512 return false;
513 }
514 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
515 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
516 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
517 DependencyMatrix, CostMap, CC)) {
518 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
519 return false;
520 }
521
522 ORE->emit([&]() {
523 return OptimizationRemark(DEBUG_TYPE, "Interchanged",
524 InnerLoop->getStartLoc(),
525 InnerLoop->getHeader())
526 << "Loop interchanged with enclosing loop.";
527 });
528
529 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
530 LIT.transform();
531 LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
532 LoopsInterchanged++;
533
534 llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
535 return true;
536 }
537};
538
539} // end anonymous namespace
540
541bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
542 return any_of(*BB, [](const Instruction &I) {
543 return I.mayHaveSideEffects() || I.mayReadFromMemory();
544 });
545}
546
547bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
548 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
549 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
550 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
551
552 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
553
554 // A perfectly nested loop will not have any branch in between the outer and
555 // inner block i.e. outer header will branch to either inner preheader and
556 // outerloop latch.
557 BranchInst *OuterLoopHeaderBI =
558 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
559 if (!OuterLoopHeaderBI)
560 return false;
561
562 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
563 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
564 Succ != OuterLoopLatch)
565 return false;
566
567 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
568 // We do not have any basic block in between now make sure the outer header
569 // and outer loop latch doesn't contain any unsafe instructions.
570 if (containsUnsafeInstructions(OuterLoopHeader) ||
571 containsUnsafeInstructions(OuterLoopLatch))
572 return false;
573
574 // Also make sure the inner loop preheader does not contain any unsafe
575 // instructions. Note that all instructions in the preheader will be moved to
576 // the outer loop header when interchanging.
577 if (InnerLoopPreHeader != OuterLoopHeader &&
578 containsUnsafeInstructions(InnerLoopPreHeader))
579 return false;
580
581 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
582 // Ensure the inner loop exit block flows to the outer loop latch possibly
583 // through empty blocks.
584 const BasicBlock &SuccInner =
585 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
586 if (&SuccInner != OuterLoopLatch) {
587 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
588 << " does not lead to the outer loop latch.\n";);
589 return false;
590 }
591 // The inner loop exit block does flow to the outer loop latch and not some
592 // other BBs, now make sure it contains safe instructions, since it will be
593 // moved into the (new) inner loop after interchange.
594 if (containsUnsafeInstructions(InnerLoopExit))
595 return false;
596
597 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
598 // We have a perfect loop nest.
599 return true;
600}
601
602bool LoopInterchangeLegality::isLoopStructureUnderstood() {
603 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
604 for (PHINode *InnerInduction : InnerLoopInductions) {
605 unsigned Num = InnerInduction->getNumOperands();
606 for (unsigned i = 0; i < Num; ++i) {
607 Value *Val = InnerInduction->getOperand(i);
608 if (isa<Constant>(Val))
609 continue;
610 Instruction *I = dyn_cast<Instruction>(Val);
611 if (!I)
612 return false;
613 // TODO: Handle triangular loops.
614 // e.g. for(int i=0;i<N;i++)
615 // for(int j=i;j<N;j++)
616 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
617 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
618 InnerLoopPreheader &&
619 !OuterLoop->isLoopInvariant(I)) {
620 return false;
621 }
622 }
623 }
624
625 // TODO: Handle triangular loops of another form.
626 // e.g. for(int i=0;i<N;i++)
627 // for(int j=0;j<i;j++)
628 // or,
629 // for(int i=0;i<N;i++)
630 // for(int j=0;j*i<N;j++)
631 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
632 BranchInst *InnerLoopLatchBI =
633 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
634 if (!InnerLoopLatchBI->isConditional())
635 return false;
636 if (CmpInst *InnerLoopCmp =
637 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
638 Value *Op0 = InnerLoopCmp->getOperand(0);
639 Value *Op1 = InnerLoopCmp->getOperand(1);
640
641 // LHS and RHS of the inner loop exit condition, e.g.,
642 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
643 Value *Left = nullptr;
644 Value *Right = nullptr;
645
646 // Check if V only involves inner loop induction variable.
647 // Return true if V is InnerInduction, or a cast from
648 // InnerInduction, or a binary operator that involves
649 // InnerInduction and a constant.
650 std::function<bool(Value *)> IsPathToInnerIndVar;
651 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
652 if (llvm::is_contained(InnerLoopInductions, V))
653 return true;
654 if (isa<Constant>(V))
655 return true;
656 const Instruction *I = dyn_cast<Instruction>(V);
657 if (!I)
658 return false;
659 if (isa<CastInst>(I))
660 return IsPathToInnerIndVar(I->getOperand(0));
661 if (isa<BinaryOperator>(I))
662 return IsPathToInnerIndVar(I->getOperand(0)) &&
663 IsPathToInnerIndVar(I->getOperand(1));
664 return false;
665 };
666
667 // In case of multiple inner loop indvars, it is okay if LHS and RHS
668 // are both inner indvar related variables.
669 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
670 return true;
671
672 // Otherwise we check if the cmp instruction compares an inner indvar
673 // related variable (Left) with a outer loop invariant (Right).
674 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
675 Left = Op0;
676 Right = Op1;
677 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
678 Left = Op1;
679 Right = Op0;
680 }
681
682 if (Left == nullptr)
683 return false;
684
685 const SCEV *S = SE->getSCEV(Right);
686 if (!SE->isLoopInvariant(S, OuterLoop))
687 return false;
688 }
689
690 return true;
691}
692
693// If SV is a LCSSA PHI node with a single incoming value, return the incoming
694// value.
695static Value *followLCSSA(Value *SV) {
696 PHINode *PHI = dyn_cast<PHINode>(SV);
697 if (!PHI)
698 return SV;
699
700 if (PHI->getNumIncomingValues() != 1)
701 return SV;
702 return followLCSSA(PHI->getIncomingValue(0));
703}
704
705// Check V's users to see if it is involved in a reduction in L.
707 // Reduction variables cannot be constants.
708 if (isa<Constant>(V))
709 return nullptr;
710
711 for (Value *User : V->users()) {
712 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
713 if (PHI->getNumIncomingValues() == 1)
714 continue;
717 // Detect floating point reduction only when it can be reordered.
718 if (RD.getExactFPMathInst() != nullptr)
719 return nullptr;
720 return PHI;
721 }
722 return nullptr;
723 }
724 }
725
726 return nullptr;
727}
728
729bool LoopInterchangeLegality::findInductionAndReductions(
730 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
731 if (!L->getLoopLatch() || !L->getLoopPredecessor())
732 return false;
733 for (PHINode &PHI : L->getHeader()->phis()) {
736 Inductions.push_back(&PHI);
737 else {
738 // PHIs in inner loops need to be part of a reduction in the outer loop,
739 // discovered when checking the PHIs of the outer loop earlier.
740 if (!InnerLoop) {
741 if (!OuterInnerReductions.count(&PHI)) {
742 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
743 "across the outer loop.\n");
744 return false;
745 }
746 } else {
747 assert(PHI.getNumIncomingValues() == 2 &&
748 "Phis in loop header should have exactly 2 incoming values");
749 // Check if we have a PHI node in the outer loop that has a reduction
750 // result from the inner loop as an incoming value.
751 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
752 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
753 if (!InnerRedPhi ||
754 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
756 dbgs()
757 << "Failed to recognize PHI as an induction or reduction.\n");
758 return false;
759 }
760 OuterInnerReductions.insert(&PHI);
761 OuterInnerReductions.insert(InnerRedPhi);
762 }
763 }
764 }
765 return true;
766}
767
768// This function indicates the current limitations in the transform as a result
769// of which we do not proceed.
770bool LoopInterchangeLegality::currentLimitations() {
771 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
772
773 // transform currently expects the loop latches to also be the exiting
774 // blocks.
775 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
776 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
777 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
778 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
780 dbgs() << "Loops where the latch is not the exiting block are not"
781 << " supported currently.\n");
782 ORE->emit([&]() {
783 return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
784 OuterLoop->getStartLoc(),
785 OuterLoop->getHeader())
786 << "Loops where the latch is not the exiting block cannot be"
787 " interchange currently.";
788 });
789 return true;
790 }
791
792 SmallVector<PHINode *, 8> Inductions;
793 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
795 dbgs() << "Only outer loops with induction or reduction PHI nodes "
796 << "are supported currently.\n");
797 ORE->emit([&]() {
798 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
799 OuterLoop->getStartLoc(),
800 OuterLoop->getHeader())
801 << "Only outer loops with induction or reduction PHI nodes can be"
802 " interchanged currently.";
803 });
804 return true;
805 }
806
807 Inductions.clear();
808 // For multi-level loop nests, make sure that all phi nodes for inner loops
809 // at all levels can be recognized as a induction or reduction phi. Bail out
810 // if a phi node at a certain nesting level cannot be properly recognized.
811 Loop *CurLevelLoop = OuterLoop;
812 while (!CurLevelLoop->getSubLoops().empty()) {
813 // We already made sure that the loop nest is tightly nested.
814 CurLevelLoop = CurLevelLoop->getSubLoops().front();
815 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
817 dbgs() << "Only inner loops with induction or reduction PHI nodes "
818 << "are supported currently.\n");
819 ORE->emit([&]() {
820 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
821 CurLevelLoop->getStartLoc(),
822 CurLevelLoop->getHeader())
823 << "Only inner loops with induction or reduction PHI nodes can be"
824 " interchange currently.";
825 });
826 return true;
827 }
828 }
829
830 // TODO: Triangular loops are not handled for now.
831 if (!isLoopStructureUnderstood()) {
832 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
833 ORE->emit([&]() {
834 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
835 InnerLoop->getStartLoc(),
836 InnerLoop->getHeader())
837 << "Inner loop structure not understood currently.";
838 });
839 return true;
840 }
841
842 return false;
843}
844
845bool LoopInterchangeLegality::findInductions(
846 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
847 for (PHINode &PHI : L->getHeader()->phis()) {
850 Inductions.push_back(&PHI);
851 }
852 return !Inductions.empty();
853}
854
855// We currently only support LCSSA PHI nodes in the inner loop exit, if their
856// users are either reduction PHIs or PHIs outside the outer loop (which means
857// the we are only interested in the final value after the loop).
858static bool
861 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
862 for (PHINode &PHI : InnerExit->phis()) {
863 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
864 if (PHI.getNumIncomingValues() > 1)
865 return false;
866 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
867 PHINode *PN = dyn_cast<PHINode>(U);
868 return !PN ||
869 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
870 })) {
871 return false;
872 }
873 }
874 return true;
875}
876
877// We currently support LCSSA PHI nodes in the outer loop exit, if their
878// incoming values do not come from the outer loop latch or if the
879// outer loop latch has a single predecessor. In that case, the value will
880// be available if both the inner and outer loop conditions are true, which
881// will still be true after interchanging. If we have multiple predecessor,
882// that may not be the case, e.g. because the outer loop latch may be executed
883// if the inner loop is not executed.
884static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
885 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
886 for (PHINode &PHI : LoopNestExit->phis()) {
887 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
888 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
889 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
890 continue;
891
892 // The incoming value is defined in the outer loop latch. Currently we
893 // only support that in case the outer loop latch has a single predecessor.
894 // This guarantees that the outer loop latch is executed if and only if
895 // the inner loop is executed (because tightlyNested() guarantees that the
896 // outer loop header only branches to the inner loop or the outer loop
897 // latch).
898 // FIXME: We could weaken this logic and allow multiple predecessors,
899 // if the values are produced outside the loop latch. We would need
900 // additional logic to update the PHI nodes in the exit block as
901 // well.
902 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
903 return false;
904 }
905 }
906 return true;
907}
908
909// In case of multi-level nested loops, it may occur that lcssa phis exist in
910// the latch of InnerLoop, i.e., when defs of the incoming values are further
911// inside the loopnest. Sometimes those incoming values are not available
912// after interchange, since the original inner latch will become the new outer
913// latch which may have predecessor paths that do not include those incoming
914// values.
915// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
916// multi-level loop nests.
917static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
918 if (InnerLoop->getSubLoops().empty())
919 return true;
920 // If the original outer latch has only one predecessor, then values defined
921 // further inside the looploop, e.g., in the innermost loop, will be available
922 // at the new outer latch after interchange.
923 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
924 return true;
925
926 // The outer latch has more than one predecessors, i.e., the inner
927 // exit and the inner header.
928 // PHI nodes in the inner latch are lcssa phis where the incoming values
929 // are defined further inside the loopnest. Check if those phis are used
930 // in the original inner latch. If that is the case then bail out since
931 // those incoming values may not be available at the new outer latch.
932 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
933 for (PHINode &PHI : InnerLoopLatch->phis()) {
934 for (auto *U : PHI.users()) {
935 Instruction *UI = cast<Instruction>(U);
936 if (InnerLoopLatch == UI->getParent())
937 return false;
938 }
939 }
940 return true;
941}
942
943bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
944 unsigned OuterLoopId,
945 CharMatrix &DepMatrix) {
946 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
947 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
948 << " and OuterLoopId = " << OuterLoopId
949 << " due to dependence\n");
950 ORE->emit([&]() {
951 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
952 InnerLoop->getStartLoc(),
953 InnerLoop->getHeader())
954 << "Cannot interchange loops due to dependences.";
955 });
956 return false;
957 }
958 // Check if outer and inner loop contain legal instructions only.
959 for (auto *BB : OuterLoop->blocks())
961 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
962 // readnone functions do not prevent interchanging.
963 if (CI->onlyWritesMemory())
964 continue;
966 dbgs() << "Loops with call instructions cannot be interchanged "
967 << "safely.");
968 ORE->emit([&]() {
969 return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
970 CI->getDebugLoc(),
971 CI->getParent())
972 << "Cannot interchange loops due to call instruction.";
973 });
974
975 return false;
976 }
977
978 if (!findInductions(InnerLoop, InnerLoopInductions)) {
979 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n");
980 return false;
981 }
982
983 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
984 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
985 ORE->emit([&]() {
986 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
987 InnerLoop->getStartLoc(),
988 InnerLoop->getHeader())
989 << "Cannot interchange loops because unsupported PHI nodes found "
990 "in inner loop latch.";
991 });
992 return false;
993 }
994
995 // TODO: The loops could not be interchanged due to current limitations in the
996 // transform module.
997 if (currentLimitations()) {
998 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
999 return false;
1000 }
1001
1002 // Check if the loops are tightly nested.
1003 if (!tightlyNested(OuterLoop, InnerLoop)) {
1004 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1005 ORE->emit([&]() {
1006 return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1007 InnerLoop->getStartLoc(),
1008 InnerLoop->getHeader())
1009 << "Cannot interchange loops because they are not tightly "
1010 "nested.";
1011 });
1012 return false;
1013 }
1014
1015 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1016 OuterInnerReductions)) {
1017 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1018 ORE->emit([&]() {
1019 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1020 InnerLoop->getStartLoc(),
1021 InnerLoop->getHeader())
1022 << "Found unsupported PHI node in loop exit.";
1023 });
1024 return false;
1025 }
1026
1027 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1028 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1029 ORE->emit([&]() {
1030 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1031 OuterLoop->getStartLoc(),
1032 OuterLoop->getHeader())
1033 << "Found unsupported PHI node in loop exit.";
1034 });
1035 return false;
1036 }
1037
1038 return true;
1039}
1040
1041int LoopInterchangeProfitability::getInstrOrderCost() {
1042 unsigned GoodOrder, BadOrder;
1043 BadOrder = GoodOrder = 0;
1044 for (BasicBlock *BB : InnerLoop->blocks()) {
1045 for (Instruction &Ins : *BB) {
1046 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1047 unsigned NumOp = GEP->getNumOperands();
1048 bool FoundInnerInduction = false;
1049 bool FoundOuterInduction = false;
1050 for (unsigned i = 0; i < NumOp; ++i) {
1051 // Skip operands that are not SCEV-able.
1052 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1053 continue;
1054
1055 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1056 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1057 if (!AR)
1058 continue;
1059
1060 // If we find the inner induction after an outer induction e.g.
1061 // for(int i=0;i<N;i++)
1062 // for(int j=0;j<N;j++)
1063 // A[i][j] = A[i-1][j-1]+k;
1064 // then it is a good order.
1065 if (AR->getLoop() == InnerLoop) {
1066 // We found an InnerLoop induction after OuterLoop induction. It is
1067 // a good order.
1068 FoundInnerInduction = true;
1069 if (FoundOuterInduction) {
1070 GoodOrder++;
1071 break;
1072 }
1073 }
1074 // If we find the outer induction after an inner induction e.g.
1075 // for(int i=0;i<N;i++)
1076 // for(int j=0;j<N;j++)
1077 // A[j][i] = A[j-1][i-1]+k;
1078 // then it is a bad order.
1079 if (AR->getLoop() == OuterLoop) {
1080 // We found an OuterLoop induction after InnerLoop induction. It is
1081 // a bad order.
1082 FoundOuterInduction = true;
1083 if (FoundInnerInduction) {
1084 BadOrder++;
1085 break;
1086 }
1087 }
1088 }
1089 }
1090 }
1091 }
1092 return GoodOrder - BadOrder;
1093}
1094
1095std::optional<bool>
1096LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1097 const DenseMap<const Loop *, unsigned> &CostMap,
1098 std::unique_ptr<CacheCost> &CC) {
1099 // This is the new cost model returned from loop cache analysis.
1100 // A smaller index means the loop should be placed an outer loop, and vice
1101 // versa.
1102 if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) {
1103 unsigned InnerIndex = 0, OuterIndex = 0;
1104 InnerIndex = CostMap.find(InnerLoop)->second;
1105 OuterIndex = CostMap.find(OuterLoop)->second;
1106 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1107 << ", OuterIndex = " << OuterIndex << "\n");
1108 if (InnerIndex < OuterIndex)
1109 return std::optional<bool>(true);
1110 assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1111 "numbers to each loop");
1112 if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1113 return std::nullopt;
1114 return std::optional<bool>(false);
1115 }
1116 return std::nullopt;
1117}
1118
1119std::optional<bool>
1120LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1121 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1122 // good and bad order of induction variables in the instruction and allows
1123 // reordering if number of bad orders is more than good.
1124 int Cost = getInstrOrderCost();
1125 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1127 return std::optional<bool>(true);
1128
1129 return std::nullopt;
1130}
1131
1132std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1133 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1134 for (auto &Row : DepMatrix) {
1135 // If the inner loop is loop independent or doesn't carry any dependency
1136 // it is not profitable to move this to outer position, since we are
1137 // likely able to do inner loop vectorization already.
1138 if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
1139 return std::optional<bool>(false);
1140
1141 // If the outer loop is not loop independent it is not profitable to move
1142 // this to inner position, since doing so would not enable inner loop
1143 // parallelism.
1144 if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=')
1145 return std::optional<bool>(false);
1146 }
1147 // If inner loop has dependence and outer loop is loop independent then it
1148 // is/ profitable to interchange to enable inner loop parallelism.
1149 // If there are no dependences, interchanging will not improve anything.
1150 return std::optional<bool>(!DepMatrix.empty());
1151}
1152
1153bool LoopInterchangeProfitability::isProfitable(
1154 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1155 unsigned OuterLoopId, CharMatrix &DepMatrix,
1156 const DenseMap<const Loop *, unsigned> &CostMap,
1157 std::unique_ptr<CacheCost> &CC) {
1158 // isProfitable() is structured to avoid endless loop interchange.
1159 // If loop cache analysis could decide the profitability then,
1160 // profitability check will stop and return the analysis result.
1161 // If cache analysis failed to analyze the loopnest (e.g.,
1162 // due to delinearization issues) then only check whether it is
1163 // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1164 // analysis the profitability then only, isProfitableForVectorization
1165 // will decide.
1166 std::optional<bool> shouldInterchange =
1167 isProfitablePerLoopCacheAnalysis(CostMap, CC);
1168 if (!shouldInterchange.has_value()) {
1169 shouldInterchange = isProfitablePerInstrOrderCost();
1170 if (!shouldInterchange.has_value())
1171 shouldInterchange =
1172 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1173 }
1174 if (!shouldInterchange.has_value()) {
1175 ORE->emit([&]() {
1176 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1177 InnerLoop->getStartLoc(),
1178 InnerLoop->getHeader())
1179 << "Insufficient information to calculate the cost of loop for "
1180 "interchange.";
1181 });
1182 return false;
1183 } else if (!shouldInterchange.value()) {
1184 ORE->emit([&]() {
1185 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1186 InnerLoop->getStartLoc(),
1187 InnerLoop->getHeader())
1188 << "Interchanging loops is not considered to improve cache "
1189 "locality nor vectorization.";
1190 });
1191 return false;
1192 }
1193 return true;
1194}
1195
1196void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1197 Loop *InnerLoop) {
1198 for (Loop *L : *OuterLoop)
1199 if (L == InnerLoop) {
1200 OuterLoop->removeChildLoop(L);
1201 return;
1202 }
1203 llvm_unreachable("Couldn't find loop");
1204}
1205
1206/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1207/// new inner and outer loop after interchanging: NewInner is the original
1208/// outer loop and NewOuter is the original inner loop.
1209///
1210/// Before interchanging, we have the following structure
1211/// Outer preheader
1212// Outer header
1213// Inner preheader
1214// Inner header
1215// Inner body
1216// Inner latch
1217// outer bbs
1218// Outer latch
1219//
1220// After interchanging:
1221// Inner preheader
1222// Inner header
1223// Outer preheader
1224// Outer header
1225// Inner body
1226// outer bbs
1227// Outer latch
1228// Inner latch
1229void LoopInterchangeTransform::restructureLoops(
1230 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1231 BasicBlock *OrigOuterPreHeader) {
1232 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1233 // The original inner loop preheader moves from the new inner loop to
1234 // the parent loop, if there is one.
1235 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1236 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1237
1238 // Switch the loop levels.
1239 if (OuterLoopParent) {
1240 // Remove the loop from its parent loop.
1241 removeChildLoop(OuterLoopParent, NewInner);
1242 removeChildLoop(NewInner, NewOuter);
1243 OuterLoopParent->addChildLoop(NewOuter);
1244 } else {
1245 removeChildLoop(NewInner, NewOuter);
1246 LI->changeTopLevelLoop(NewInner, NewOuter);
1247 }
1248 while (!NewOuter->isInnermost())
1249 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1250 NewOuter->addChildLoop(NewInner);
1251
1252 // BBs from the original inner loop.
1253 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1254
1255 // Add BBs from the original outer loop to the original inner loop (excluding
1256 // BBs already in inner loop)
1257 for (BasicBlock *BB : NewInner->blocks())
1258 if (LI->getLoopFor(BB) == NewInner)
1259 NewOuter->addBlockEntry(BB);
1260
1261 // Now remove inner loop header and latch from the new inner loop and move
1262 // other BBs (the loop body) to the new inner loop.
1263 BasicBlock *OuterHeader = NewOuter->getHeader();
1264 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1265 for (BasicBlock *BB : OrigInnerBBs) {
1266 // Nothing will change for BBs in child loops.
1267 if (LI->getLoopFor(BB) != NewOuter)
1268 continue;
1269 // Remove the new outer loop header and latch from the new inner loop.
1270 if (BB == OuterHeader || BB == OuterLatch)
1271 NewInner->removeBlockFromLoop(BB);
1272 else
1273 LI->changeLoopFor(BB, NewInner);
1274 }
1275
1276 // The preheader of the original outer loop becomes part of the new
1277 // outer loop.
1278 NewOuter->addBlockEntry(OrigOuterPreHeader);
1279 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1280
1281 // Tell SE that we move the loops around.
1282 SE->forgetLoop(NewOuter);
1283}
1284
1285bool LoopInterchangeTransform::transform() {
1286 bool Transformed = false;
1287
1288 if (InnerLoop->getSubLoops().empty()) {
1289 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1290 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1291 auto &InductionPHIs = LIL.getInnerLoopInductions();
1292 if (InductionPHIs.empty()) {
1293 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1294 return false;
1295 }
1296
1297 SmallVector<Instruction *, 8> InnerIndexVarList;
1298 for (PHINode *CurInductionPHI : InductionPHIs) {
1299 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1300 InnerIndexVarList.push_back(
1301 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1302 else
1303 InnerIndexVarList.push_back(
1304 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1305 }
1306
1307 // Create a new latch block for the inner loop. We split at the
1308 // current latch's terminator and then move the condition and all
1309 // operands that are not either loop-invariant or the induction PHI into the
1310 // new latch block.
1311 BasicBlock *NewLatch =
1312 SplitBlock(InnerLoop->getLoopLatch(),
1313 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1314
1316 unsigned i = 0;
1317 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1318 for (; i < WorkList.size(); i++) {
1319 // Duplicate instruction and move it the new latch. Update uses that
1320 // have been moved.
1321 Instruction *NewI = WorkList[i]->clone();
1322 NewI->insertBefore(NewLatch->getFirstNonPHI());
1323 assert(!NewI->mayHaveSideEffects() &&
1324 "Moving instructions with side-effects may change behavior of "
1325 "the loop nest!");
1326 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1327 Instruction *UserI = cast<Instruction>(U.getUser());
1328 if (!InnerLoop->contains(UserI->getParent()) ||
1329 UserI->getParent() == NewLatch ||
1330 llvm::is_contained(InductionPHIs, UserI))
1331 U.set(NewI);
1332 }
1333 // Add operands of moved instruction to the worklist, except if they are
1334 // outside the inner loop or are the induction PHI.
1335 for (Value *Op : WorkList[i]->operands()) {
1336 Instruction *OpI = dyn_cast<Instruction>(Op);
1337 if (!OpI ||
1338 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1339 llvm::is_contained(InductionPHIs, OpI))
1340 continue;
1341 WorkList.insert(OpI);
1342 }
1343 }
1344 };
1345
1346 // FIXME: Should we interchange when we have a constant condition?
1347 Instruction *CondI = dyn_cast<Instruction>(
1348 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1349 ->getCondition());
1350 if (CondI)
1351 WorkList.insert(CondI);
1352 MoveInstructions();
1353 for (Instruction *InnerIndexVar : InnerIndexVarList)
1354 WorkList.insert(cast<Instruction>(InnerIndexVar));
1355 MoveInstructions();
1356 }
1357
1358 // Ensure the inner loop phi nodes have a separate basic block.
1359 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1360 if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
1361 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1362 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1363 }
1364
1365 // Instructions in the original inner loop preheader may depend on values
1366 // defined in the outer loop header. Move them there, because the original
1367 // inner loop preheader will become the entry into the interchanged loop nest.
1368 // Currently we move all instructions and rely on LICM to move invariant
1369 // instructions outside the loop nest.
1370 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1371 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1372 if (InnerLoopPreHeader != OuterLoopHeader) {
1374 for (Instruction &I :
1375 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1376 std::prev(InnerLoopPreHeader->end()))))
1377 I.moveBeforePreserving(OuterLoopHeader->getTerminator());
1378 }
1379
1380 Transformed |= adjustLoopLinks();
1381 if (!Transformed) {
1382 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1383 return false;
1384 }
1385
1386 return true;
1387}
1388
1389/// \brief Move all instructions except the terminator from FromBB right before
1390/// InsertBefore
1391static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1392 BasicBlock *ToBB = InsertBefore->getParent();
1393
1394 ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
1395 FromBB->getTerminator()->getIterator());
1396}
1397
1398/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1399static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1400 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1401 // from BB1 afterwards.
1402 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1403 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1404 for (Instruction *I : TempInstrs)
1405 I->removeFromParent();
1406
1407 // Move instructions from BB2 to BB1.
1408 moveBBContents(BB2, BB1->getTerminator());
1409
1410 // Move instructions from TempInstrs to BB2.
1411 for (Instruction *I : TempInstrs)
1412 I->insertBefore(BB2->getTerminator());
1413}
1414
1415// Update BI to jump to NewBB instead of OldBB. Records updates to the
1416// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1417// \p OldBB is exactly once in BI's successor list.
1418static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1419 BasicBlock *NewBB,
1420 std::vector<DominatorTree::UpdateType> &DTUpdates,
1421 bool MustUpdateOnce = true) {
1422 assert((!MustUpdateOnce ||
1424 [OldBB](BasicBlock *BB) {
1425 return BB == OldBB;
1426 }) == 1) && "BI must jump to OldBB exactly once.");
1427 bool Changed = false;
1428 for (Use &Op : BI->operands())
1429 if (Op == OldBB) {
1430 Op.set(NewBB);
1431 Changed = true;
1432 }
1433
1434 if (Changed) {
1435 DTUpdates.push_back(
1436 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1437 DTUpdates.push_back(
1438 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1439 }
1440 assert(Changed && "Expected a successor to be updated");
1441}
1442
1443// Move Lcssa PHIs to the right place.
1444static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1445 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1446 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1447 Loop *InnerLoop, LoopInfo *LI) {
1448
1449 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1450 // defined either in the header or latch. Those blocks will become header and
1451 // latch of the new outer loop, and the only possible users can PHI nodes
1452 // in the exit block of the loop nest or the outer loop header (reduction
1453 // PHIs, in that case, the incoming value must be defined in the inner loop
1454 // header). We can just substitute the user with the incoming value and remove
1455 // the PHI.
1456 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1457 assert(P.getNumIncomingValues() == 1 &&
1458 "Only loops with a single exit are supported!");
1459
1460 // Incoming values are guaranteed be instructions currently.
1461 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1462 // In case of multi-level nested loops, follow LCSSA to find the incoming
1463 // value defined from the innermost loop.
1464 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1465 // Skip phis with incoming values from the inner loop body, excluding the
1466 // header and latch.
1467 if (IncIInnerMost->getParent() != InnerLatch &&
1468 IncIInnerMost->getParent() != InnerHeader)
1469 continue;
1470
1471 assert(all_of(P.users(),
1472 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1473 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1474 IncI->getParent() == InnerHeader) ||
1475 cast<PHINode>(U)->getParent() == OuterExit;
1476 }) &&
1477 "Can only replace phis iff the uses are in the loop nest exit or "
1478 "the incoming value is defined in the inner header (it will "
1479 "dominate all loop blocks after interchanging)");
1480 P.replaceAllUsesWith(IncI);
1481 P.eraseFromParent();
1482 }
1483
1484 SmallVector<PHINode *, 8> LcssaInnerExit;
1485 for (PHINode &P : InnerExit->phis())
1486 LcssaInnerExit.push_back(&P);
1487
1488 SmallVector<PHINode *, 8> LcssaInnerLatch;
1489 for (PHINode &P : InnerLatch->phis())
1490 LcssaInnerLatch.push_back(&P);
1491
1492 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1493 // If a PHI node has users outside of InnerExit, it has a use outside the
1494 // interchanged loop and we have to preserve it. We move these to
1495 // InnerLatch, which will become the new exit block for the innermost
1496 // loop after interchanging.
1497 for (PHINode *P : LcssaInnerExit)
1498 P->moveBefore(InnerLatch->getFirstNonPHI());
1499
1500 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1501 // and we have to move them to the new inner latch.
1502 for (PHINode *P : LcssaInnerLatch)
1503 P->moveBefore(InnerExit->getFirstNonPHI());
1504
1505 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1506 // incoming values defined in the outer loop, we have to add a new PHI
1507 // in the inner loop latch, which became the exit block of the outer loop,
1508 // after interchanging.
1509 if (OuterExit) {
1510 for (PHINode &P : OuterExit->phis()) {
1511 if (P.getNumIncomingValues() != 1)
1512 continue;
1513 // Skip Phis with incoming values defined in the inner loop. Those should
1514 // already have been updated.
1515 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1516 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1517 continue;
1518
1519 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1520 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1521 NewPhi->setIncomingBlock(0, OuterLatch);
1522 // We might have incoming edges from other BBs, i.e., the original outer
1523 // header.
1524 for (auto *Pred : predecessors(InnerLatch)) {
1525 if (Pred == OuterLatch)
1526 continue;
1527 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1528 }
1529 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1530 P.setIncomingValue(0, NewPhi);
1531 }
1532 }
1533
1534 // Now adjust the incoming blocks for the LCSSA PHIs.
1535 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1536 // with the new latch.
1537 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1538}
1539
1540bool LoopInterchangeTransform::adjustLoopBranches() {
1541 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1542 std::vector<DominatorTree::UpdateType> DTUpdates;
1543
1544 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1545 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1546
1547 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1548 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1549 InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1550 // Ensure that both preheaders do not contain PHI nodes and have single
1551 // predecessors. This allows us to move them easily. We use
1552 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1553 // preheaders do not satisfy those conditions.
1554 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1555 !OuterLoopPreHeader->getUniquePredecessor())
1556 OuterLoopPreHeader =
1557 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1558 if (InnerLoopPreHeader == OuterLoop->getHeader())
1559 InnerLoopPreHeader =
1560 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1561
1562 // Adjust the loop preheader
1563 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1564 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1565 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1566 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1567 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1568 BasicBlock *InnerLoopLatchPredecessor =
1569 InnerLoopLatch->getUniquePredecessor();
1570 BasicBlock *InnerLoopLatchSuccessor;
1571 BasicBlock *OuterLoopLatchSuccessor;
1572
1573 BranchInst *OuterLoopLatchBI =
1574 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1575 BranchInst *InnerLoopLatchBI =
1576 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1577 BranchInst *OuterLoopHeaderBI =
1578 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1579 BranchInst *InnerLoopHeaderBI =
1580 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1581
1582 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1583 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1584 !InnerLoopHeaderBI)
1585 return false;
1586
1587 BranchInst *InnerLoopLatchPredecessorBI =
1588 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1589 BranchInst *OuterLoopPredecessorBI =
1590 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1591
1592 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1593 return false;
1594 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1595 if (!InnerLoopHeaderSuccessor)
1596 return false;
1597
1598 // Adjust Loop Preheader and headers.
1599 // The branches in the outer loop predecessor and the outer loop header can
1600 // be unconditional branches or conditional branches with duplicates. Consider
1601 // this when updating the successors.
1602 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1603 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1604 // The outer loop header might or might not branch to the outer latch.
1605 // We are guaranteed to branch to the inner loop preheader.
1606 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1607 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1608 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1609 DTUpdates,
1610 /*MustUpdateOnce=*/false);
1611 }
1612 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1613 InnerLoopHeaderSuccessor, DTUpdates,
1614 /*MustUpdateOnce=*/false);
1615
1616 // Adjust reduction PHI's now that the incoming block has changed.
1617 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1618 OuterLoopHeader);
1619
1620 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1621 OuterLoopPreHeader, DTUpdates);
1622
1623 // -------------Adjust loop latches-----------
1624 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1625 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1626 else
1627 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1628
1629 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1630 InnerLoopLatchSuccessor, DTUpdates);
1631
1632 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1633 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1634 else
1635 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1636
1637 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1638 OuterLoopLatchSuccessor, DTUpdates);
1639 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1640 DTUpdates);
1641
1642 DT->applyUpdates(DTUpdates);
1643 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1644 OuterLoopPreHeader);
1645
1646 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1647 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1648 InnerLoop, LI);
1649 // For PHIs in the exit block of the outer loop, outer's latch has been
1650 // replaced by Inners'.
1651 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1652
1653 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1654 // Now update the reduction PHIs in the inner and outer loop headers.
1655 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1656 for (PHINode &PHI : InnerLoopHeader->phis())
1657 if (OuterInnerReductions.contains(&PHI))
1658 InnerLoopPHIs.push_back(&PHI);
1659
1660 for (PHINode &PHI : OuterLoopHeader->phis())
1661 if (OuterInnerReductions.contains(&PHI))
1662 OuterLoopPHIs.push_back(&PHI);
1663
1664 // Now move the remaining reduction PHIs from outer to inner loop header and
1665 // vice versa. The PHI nodes must be part of a reduction across the inner and
1666 // outer loop and all the remains to do is and updating the incoming blocks.
1667 for (PHINode *PHI : OuterLoopPHIs) {
1668 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1669 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1670 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1671 }
1672 for (PHINode *PHI : InnerLoopPHIs) {
1673 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1674 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1675 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1676 }
1677
1678 // Update the incoming blocks for moved PHI nodes.
1679 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1680 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1681 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1682 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1683
1684 // Values defined in the outer loop header could be used in the inner loop
1685 // latch. In that case, we need to create LCSSA phis for them, because after
1686 // interchanging they will be defined in the new inner loop and used in the
1687 // new outer loop.
1688 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1689 for (Instruction &I :
1690 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1691 MayNeedLCSSAPhis.push_back(&I);
1692 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
1693
1694 return true;
1695}
1696
1697bool LoopInterchangeTransform::adjustLoopLinks() {
1698 // Adjust all branches in the inner and outer loop.
1699 bool Changed = adjustLoopBranches();
1700 if (Changed) {
1701 // We have interchanged the preheaders so we need to interchange the data in
1702 // the preheaders as well. This is because the content of the inner
1703 // preheader was previously executed inside the outer loop.
1704 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1705 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1706 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1707 }
1708 return Changed;
1709}
1710
1714 LPMUpdater &U) {
1715 Function &F = *LN.getParent();
1716
1717 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1718 std::unique_ptr<CacheCost> CC =
1721 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1722 return PreservedAnalyses::all();
1723 U.markLoopNestChanged(true);
1725}
Rewrite undef for PHI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Hexagon Common GEP
This file defines the interface for the loop cache analysis.
Loop::LoopBounds::Direction Direction
Definition: LoopInfo.cpp:230
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static const unsigned MaxLoopNestDepth
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
#define DEBUG_TYPE
static const unsigned MaxMemInstrCount
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static bool isLexicographicallyPositive(std::vector< char > &DV)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:234
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:347
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:477
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:633
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:447
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:612
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:955
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:151
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
iterator begin() const
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
This node represents a polynomial recurrence on the trip count of the specified loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:425
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1937
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1858
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:220