Bug Summary

File:lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 1269, column 3
Value stored to 'InnerLoopLatch' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn362543/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn362543=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-06-05-060531-1271-1 -x c++ /build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp -faddrsig
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/Analysis/DependenceAnalysis.h"
20#include "llvm/Analysis/LoopInfo.h"
21#include "llvm/Analysis/LoopPass.h"
22#include "llvm/Analysis/OptimizationRemarkEmitter.h"
23#include "llvm/Analysis/ScalarEvolution.h"
24#include "llvm/Analysis/ScalarEvolutionExpressions.h"
25#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/InstrTypes.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/Instructions.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
36#include "llvm/Pass.h"
37#include "llvm/Support/Casting.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/Transforms/Scalar.h"
43#include "llvm/Transforms/Utils.h"
44#include "llvm/Transforms/Utils/BasicBlockUtils.h"
45#include "llvm/Transforms/Utils/LoopUtils.h"
46#include <cassert>
47#include <utility>
48#include <vector>
49
50using namespace llvm;
51
52#define DEBUG_TYPE"loop-interchange" "loop-interchange"
53
54STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged", {0}, {
false}}
;
55
56static cl::opt<int> LoopInterchangeCostThreshold(
57 "loop-interchange-threshold", cl::init(0), cl::Hidden,
58 cl::desc("Interchange if you gain more than this number"));
59
60namespace {
61
62using LoopVector = SmallVector<Loop *, 8>;
63
64// TODO: Check if we can use a sparse matrix here.
65using CharMatrix = std::vector<std::vector<char>>;
66
67} // end anonymous namespace
68
69// Maximum number of dependencies that can be handled in the dependency matrix.
70static const unsigned MaxMemInstrCount = 100;
71
72// Maximum loop depth supported.
73static const unsigned MaxLoopNestDepth = 10;
74
75#ifdef DUMP_DEP_MATRICIES
76static void printDepMatrix(CharMatrix &DepMatrix) {
77 for (auto &Row : DepMatrix) {
78 for (auto D : Row)
79 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
80 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
81 }
82}
83#endif
84
85static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
86 Loop *L, DependenceInfo *DI) {
87 using ValueVector = SmallVector<Value *, 16>;
88
89 ValueVector MemInstr;
90
91 // For each block.
92 for (BasicBlock *BB : L->blocks()) {
93 // Scan the BB and collect legal loads and stores.
94 for (Instruction &I : *BB) {
95 if (!isa<Instruction>(I))
96 return false;
97 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
98 if (!Ld->isSimple())
99 return false;
100 MemInstr.push_back(&I);
101 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
102 if (!St->isSimple())
103 return false;
104 MemInstr.push_back(&I);
105 }
106 }
107 }
108
109 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
110 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
111
112 ValueVector::iterator I, IE, J, JE;
113
114 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
115 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
116 std::vector<char> Dep;
117 Instruction *Src = cast<Instruction>(*I);
118 Instruction *Dst = cast<Instruction>(*J);
119 if (Src == Dst)
120 continue;
121 // Ignore Input dependencies.
122 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
123 continue;
124 // Track Output, Flow, and Anti dependencies.
125 if (auto D = DI->depends(Src, Dst, true)) {
126 assert(D->isOrdered() && "Expected an output, flow or anti dep.")((D->isOrdered() && "Expected an output, flow or anti dep."
) ? static_cast<void> (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 126, __PRETTY_FUNCTION__))
;
127 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
128 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
129 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
130 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
131 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
132 unsigned Levels = D->getLevels();
133 char Direction;
134 for (unsigned II = 1; II <= Levels; ++II) {
135 const SCEV *Distance = D->getDistance(II);
136 const SCEVConstant *SCEVConst =
137 dyn_cast_or_null<SCEVConstant>(Distance);
138 if (SCEVConst) {
139 const ConstantInt *CI = SCEVConst->getValue();
140 if (CI->isNegative())
141 Direction = '<';
142 else if (CI->isZero())
143 Direction = '=';
144 else
145 Direction = '>';
146 Dep.push_back(Direction);
147 } else if (D->isScalar(II)) {
148 Direction = 'S';
149 Dep.push_back(Direction);
150 } else {
151 unsigned Dir = D->getDirection(II);
152 if (Dir == Dependence::DVEntry::LT ||
153 Dir == Dependence::DVEntry::LE)
154 Direction = '<';
155 else if (Dir == Dependence::DVEntry::GT ||
156 Dir == Dependence::DVEntry::GE)
157 Direction = '>';
158 else if (Dir == Dependence::DVEntry::EQ)
159 Direction = '=';
160 else
161 Direction = '*';
162 Dep.push_back(Direction);
163 }
164 }
165 while (Dep.size() != Level) {
166 Dep.push_back('I');
167 }
168
169 DepMatrix.push_back(Dep);
170 if (DepMatrix.size() > MaxMemInstrCount) {
171 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
172 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
173 return false;
174 }
175 }
176 }
177 }
178
179 return true;
180}
181
182// A loop is moved from index 'from' to an index 'to'. Update the Dependence
183// matrix by exchanging the two columns.
184static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
185 unsigned ToIndx) {
186 unsigned numRows = DepMatrix.size();
187 for (unsigned i = 0; i < numRows; ++i) {
188 char TmpVal = DepMatrix[i][ToIndx];
189 DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
190 DepMatrix[i][FromIndx] = TmpVal;
191 }
192}
193
194// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
195// '>'
196static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
197 unsigned Column) {
198 for (unsigned i = 0; i <= Column; ++i) {
199 if (DepMatrix[Row][i] == '<')
200 return false;
201 if (DepMatrix[Row][i] == '>')
202 return true;
203 }
204 // All dependencies were '=','S' or 'I'
205 return false;
206}
207
208// Checks if no dependence exist in the dependency matrix in Row before Column.
209static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
210 unsigned Column) {
211 for (unsigned i = 0; i < Column; ++i) {
212 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
213 DepMatrix[Row][i] != 'I')
214 return false;
215 }
216 return true;
217}
218
219static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
220 unsigned OuterLoopId, char InnerDep,
221 char OuterDep) {
222 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
223 return false;
224
225 if (InnerDep == OuterDep)
226 return true;
227
228 // It is legal to interchange if and only if after interchange no row has a
229 // '>' direction as the leftmost non-'='.
230
231 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
232 return true;
233
234 if (InnerDep == '<')
235 return true;
236
237 if (InnerDep == '>') {
238 // If OuterLoopId represents outermost loop then interchanging will make the
239 // 1st dependency as '>'
240 if (OuterLoopId == 0)
241 return false;
242
243 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
244 // interchanging will result in this row having an outermost non '='
245 // dependency of '>'
246 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
247 return true;
248 }
249
250 return false;
251}
252
253// Checks if it is legal to interchange 2 loops.
254// [Theorem] A permutation of the loops in a perfect nest is legal if and only
255// if the direction matrix, after the same permutation is applied to its
256// columns, has no ">" direction as the leftmost non-"=" direction in any row.
257static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258 unsigned InnerLoopId,
259 unsigned OuterLoopId) {
260 unsigned NumRows = DepMatrix.size();
261 // For each row check if it is valid to interchange.
262 for (unsigned Row = 0; Row < NumRows; ++Row) {
263 char InnerDep = DepMatrix[Row][InnerLoopId];
264 char OuterDep = DepMatrix[Row][OuterLoopId];
265 if (InnerDep == '*' || OuterDep == '*')
266 return false;
267 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
268 return false;
269 }
270 return true;
271}
272
273static LoopVector populateWorklist(Loop &L) {
274 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
275 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
276 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
277 LoopVector LoopList;
278 Loop *CurrentLoop = &L;
279 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280 while (!Vec->empty()) {
281 // The current loop has multiple subloops in it hence it is not tightly
282 // nested.
283 // Discard all loops above it added into Worklist.
284 if (Vec->size() != 1)
285 return {};
286
287 LoopList.push_back(CurrentLoop);
288 CurrentLoop = Vec->front();
289 Vec = &CurrentLoop->getSubLoops();
290 }
291 LoopList.push_back(CurrentLoop);
292 return LoopList;
293}
294
295static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
296 PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
297 if (InnerIndexVar)
298 return InnerIndexVar;
299 if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
300 return nullptr;
301 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
302 PHINode *PhiVar = cast<PHINode>(I);
303 Type *PhiTy = PhiVar->getType();
304 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
305 !PhiTy->isPointerTy())
306 return nullptr;
307 const SCEVAddRecExpr *AddRec =
308 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
309 if (!AddRec || !AddRec->isAffine())
310 continue;
311 const SCEV *Step = AddRec->getStepRecurrence(*SE);
312 if (!isa<SCEVConstant>(Step))
313 continue;
314 // Found the induction variable.
315 // FIXME: Handle loops with more than one induction variable. Note that,
316 // currently, legality makes sure we have only one induction variable.
317 return PhiVar;
318 }
319 return nullptr;
320}
321
322namespace {
323
324/// LoopInterchangeLegality checks if it is legal to interchange the loop.
325class LoopInterchangeLegality {
326public:
327 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
328 OptimizationRemarkEmitter *ORE)
329 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
330
331 /// Check if the loops can be interchanged.
332 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
333 CharMatrix &DepMatrix);
334
335 /// Check if the loop structure is understood. We do not handle triangular
336 /// loops for now.
337 bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
338
339 bool currentLimitations();
340
341 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
342 return OuterInnerReductions;
343 }
344
345private:
346 bool tightlyNested(Loop *Outer, Loop *Inner);
347 bool containsUnsafeInstructions(BasicBlock *BB);
348
349 /// Discover induction and reduction PHIs in the header of \p L. Induction
350 /// PHIs are added to \p Inductions, reductions are added to
351 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
352 /// to be passed as \p InnerLoop.
353 bool findInductionAndReductions(Loop *L,
354 SmallVector<PHINode *, 8> &Inductions,
355 Loop *InnerLoop);
356
357 Loop *OuterLoop;
358 Loop *InnerLoop;
359
360 ScalarEvolution *SE;
361
362 /// Interface to emit optimization remarks.
363 OptimizationRemarkEmitter *ORE;
364
365 /// Set of reduction PHIs taking part of a reduction across the inner and
366 /// outer loop.
367 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
368};
369
370/// LoopInterchangeProfitability checks if it is profitable to interchange the
371/// loop.
372class LoopInterchangeProfitability {
373public:
374 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
375 OptimizationRemarkEmitter *ORE)
376 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
377
378 /// Check if the loop interchange is profitable.
379 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
380 CharMatrix &DepMatrix);
381
382private:
383 int getInstrOrderCost();
384
385 Loop *OuterLoop;
386 Loop *InnerLoop;
387
388 /// Scev analysis.
389 ScalarEvolution *SE;
390
391 /// Interface to emit optimization remarks.
392 OptimizationRemarkEmitter *ORE;
393};
394
395/// LoopInterchangeTransform interchanges the loop.
396class LoopInterchangeTransform {
397public:
398 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
399 LoopInfo *LI, DominatorTree *DT,
400 BasicBlock *LoopNestExit,
401 const LoopInterchangeLegality &LIL)
402 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
403 LoopExit(LoopNestExit), LIL(LIL) {}
404
405 /// Interchange OuterLoop and InnerLoop.
406 bool transform();
407 void restructureLoops(Loop *NewInner, Loop *NewOuter,
408 BasicBlock *OrigInnerPreHeader,
409 BasicBlock *OrigOuterPreHeader);
410 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
411
412private:
413 void splitInnerLoopLatch(Instruction *);
414 void splitInnerLoopHeader();
415 bool adjustLoopLinks();
416 void adjustLoopPreheaders();
417 bool adjustLoopBranches();
418
419 Loop *OuterLoop;
420 Loop *InnerLoop;
421
422 /// Scev analysis.
423 ScalarEvolution *SE;
424
425 LoopInfo *LI;
426 DominatorTree *DT;
427 BasicBlock *LoopExit;
428
429 const LoopInterchangeLegality &LIL;
430};
431
432// Main LoopInterchange Pass.
433struct LoopInterchange : public LoopPass {
434 static char ID;
435 ScalarEvolution *SE = nullptr;
436 LoopInfo *LI = nullptr;
437 DependenceInfo *DI = nullptr;
438 DominatorTree *DT = nullptr;
439
440 /// Interface to emit optimization remarks.
441 OptimizationRemarkEmitter *ORE;
442
443 LoopInterchange() : LoopPass(ID) {
444 initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
445 }
446
447 void getAnalysisUsage(AnalysisUsage &AU) const override {
448 AU.addRequired<DependenceAnalysisWrapperPass>();
449 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
450
451 getLoopAnalysisUsage(AU);
452 }
453
454 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
455 if (skipLoop(L) || L->getParentLoop())
456 return false;
457
458 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
459 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
460 DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
461 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
462 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
463
464 return processLoopList(populateWorklist(*L));
465 }
466
467 bool isComputableLoopNest(LoopVector LoopList) {
468 for (Loop *L : LoopList) {
469 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
470 if (ExitCountOuter == SE->getCouldNotCompute()) {
471 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
472 return false;
473 }
474 if (L->getNumBackEdges() != 1) {
475 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
476 return false;
477 }
478 if (!L->getExitingBlock()) {
479 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
480 return false;
481 }
482 }
483 return true;
484 }
485
486 unsigned selectLoopForInterchange(const LoopVector &LoopList) {
487 // TODO: Add a better heuristic to select the loop to be interchanged based
488 // on the dependence matrix. Currently we select the innermost loop.
489 return LoopList.size() - 1;
490 }
491
492 bool processLoopList(LoopVector LoopList) {
493 bool Changed = false;
494 unsigned LoopNestDepth = LoopList.size();
495 if (LoopNestDepth < 2) {
496 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
497 return false;
498 }
499 if (LoopNestDepth > MaxLoopNestDepth) {
500 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
501 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
502 return false;
503 }
504 if (!isComputableLoopNest(LoopList)) {
505 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
506 return false;
507 }
508
509 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
510 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
511
512 CharMatrix DependencyMatrix;
513 Loop *OuterMostLoop = *(LoopList.begin());
514 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
515 OuterMostLoop, DI)) {
516 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
517 return false;
518 }
519#ifdef DUMP_DEP_MATRICIES
520 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
521 printDepMatrix(DependencyMatrix);
522#endif
523
524 // Get the Outermost loop exit.
525 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
526 if (!LoopNestExit) {
527 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
528 return false;
529 }
530
531 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
532 // Move the selected loop outwards to the best possible position.
533 for (unsigned i = SelecLoopId; i > 0; i--) {
534 bool Interchanged =
535 processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
536 if (!Interchanged)
537 return Changed;
538 // Loops interchanged reflect the same in LoopList
539 std::swap(LoopList[i - 1], LoopList[i]);
540
541 // Update the DependencyMatrix
542 interChangeDependencies(DependencyMatrix, i, i - 1);
543#ifdef DUMP_DEP_MATRICIES
544 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
545 printDepMatrix(DependencyMatrix);
546#endif
547 Changed |= Interchanged;
548 }
549 return Changed;
550 }
551
552 bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
553 unsigned OuterLoopId, BasicBlock *LoopNestExit,
554 std::vector<std::vector<char>> &DependencyMatrix) {
555 LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing Inner Loop Id = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
556 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing Inner Loop Id = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
557 Loop *InnerLoop = LoopList[InnerLoopId];
558 Loop *OuterLoop = LoopList[OuterLoopId];
559
560 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
561 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
562 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
563 return false;
564 }
565 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
566 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
567 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
568 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
569 return false;
570 }
571
572 ORE->emit([&]() {
573 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
574 InnerLoop->getStartLoc(),
575 InnerLoop->getHeader())
576 << "Loop interchanged with enclosing loop.";
577 });
578
579 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
580 LIL);
581 LIT.transform();
582 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
583 LoopsInterchanged++;
584 return true;
585 }
586};
587
588} // end anonymous namespace
589
590bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
591 return any_of(*BB, [](const Instruction &I) {
592 return I.mayHaveSideEffects() || I.mayReadFromMemory();
593 });
594}
595
596bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
597 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
598 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
599 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
600
601 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
602
603 // A perfectly nested loop will not have any branch in between the outer and
604 // inner block i.e. outer header will branch to either inner preheader and
605 // outerloop latch.
606 BranchInst *OuterLoopHeaderBI =
607 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
608 if (!OuterLoopHeaderBI)
609 return false;
610
611 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
612 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
613 Succ != OuterLoopLatch)
614 return false;
615
616 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
617 // We do not have any basic block in between now make sure the outer header
618 // and outer loop latch doesn't contain any unsafe instructions.
619 if (containsUnsafeInstructions(OuterLoopHeader) ||
620 containsUnsafeInstructions(OuterLoopLatch))
621 return false;
622
623 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
624 // We have a perfect loop nest.
625 return true;
626}
627
628bool LoopInterchangeLegality::isLoopStructureUnderstood(
629 PHINode *InnerInduction) {
630 unsigned Num = InnerInduction->getNumOperands();
631 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
632 for (unsigned i = 0; i < Num; ++i) {
633 Value *Val = InnerInduction->getOperand(i);
634 if (isa<Constant>(Val))
635 continue;
636 Instruction *I = dyn_cast<Instruction>(Val);
637 if (!I)
638 return false;
639 // TODO: Handle triangular loops.
640 // e.g. for(int i=0;i<N;i++)
641 // for(int j=i;j<N;j++)
642 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
643 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
644 InnerLoopPreheader &&
645 !OuterLoop->isLoopInvariant(I)) {
646 return false;
647 }
648 }
649 return true;
650}
651
652// If SV is a LCSSA PHI node with a single incoming value, return the incoming
653// value.
654static Value *followLCSSA(Value *SV) {
655 PHINode *PHI = dyn_cast<PHINode>(SV);
656 if (!PHI)
657 return SV;
658
659 if (PHI->getNumIncomingValues() != 1)
660 return SV;
661 return followLCSSA(PHI->getIncomingValue(0));
662}
663
664// Check V's users to see if it is involved in a reduction in L.
665static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
666 for (Value *User : V->users()) {
667 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
668 if (PHI->getNumIncomingValues() == 1)
669 continue;
670 RecurrenceDescriptor RD;
671 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
672 return PHI;
673 return nullptr;
674 }
675 }
676
677 return nullptr;
678}
679
680bool LoopInterchangeLegality::findInductionAndReductions(
681 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
682 if (!L->getLoopLatch() || !L->getLoopPredecessor())
683 return false;
684 for (PHINode &PHI : L->getHeader()->phis()) {
685 RecurrenceDescriptor RD;
686 InductionDescriptor ID;
687 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
688 Inductions.push_back(&PHI);
689 else {
690 // PHIs in inner loops need to be part of a reduction in the outer loop,
691 // discovered when checking the PHIs of the outer loop earlier.
692 if (!InnerLoop) {
693 if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
694 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
695 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
696 return false;
697 }
698 } else {
699 assert(PHI.getNumIncomingValues() == 2 &&((PHI.getNumIncomingValues() == 2 && "Phis in loop header should have exactly 2 incoming values"
) ? static_cast<void> (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 700, __PRETTY_FUNCTION__))
700 "Phis in loop header should have exactly 2 incoming values")((PHI.getNumIncomingValues() == 2 && "Phis in loop header should have exactly 2 incoming values"
) ? static_cast<void> (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 700, __PRETTY_FUNCTION__))
;
701 // Check if we have a PHI node in the outer loop that has a reduction
702 // result from the inner loop as an incoming value.
703 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
704 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
705 if (!InnerRedPhi ||
706 !llvm::any_of(InnerRedPhi->incoming_values(),
707 [&PHI](Value *V) { return V == &PHI; })) {
708 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
709 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
710 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
711 return false;
712 }
713 OuterInnerReductions.insert(&PHI);
714 OuterInnerReductions.insert(InnerRedPhi);
715 }
716 }
717 }
718 return true;
719}
720
721static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
722 for (PHINode &PHI : Block->phis()) {
723 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
724 if (PHI.getNumIncomingValues() > 1)
725 return false;
726 Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0));
727 if (!Ins)
728 return false;
729 // Incoming value for lcssa phi's in outer loop exit can only be inner loop
730 // exits lcssa phi else it would not be tightly nested.
731 if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
732 return false;
733 }
734 return true;
735}
736
737// This function indicates the current limitations in the transform as a result
738// of which we do not proceed.
739bool LoopInterchangeLegality::currentLimitations() {
740 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
741 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
742
743 // transform currently expects the loop latches to also be the exiting
744 // blocks.
745 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
746 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
747 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
748 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
749 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
750 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
751 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
752 ORE->emit([&]() {
753 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
754 OuterLoop->getStartLoc(),
755 OuterLoop->getHeader())
756 << "Loops where the latch is not the exiting block cannot be"
757 " interchange currently.";
758 });
759 return true;
760 }
761
762 PHINode *InnerInductionVar;
763 SmallVector<PHINode *, 8> Inductions;
764 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
765 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
766 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
767 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
768 ORE->emit([&]() {
769 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
770 OuterLoop->getStartLoc(),
771 OuterLoop->getHeader())
772 << "Only outer loops with induction or reduction PHI nodes can be"
773 " interchanged currently.";
774 });
775 return true;
776 }
777
778 // TODO: Currently we handle only loops with 1 induction variable.
779 if (Inductions.size() != 1) {
780 LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with more than 1 induction variables are not "
<< "supported currently.\n"; } } while (false)
781 << "supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with more than 1 induction variables are not "
<< "supported currently.\n"; } } while (false)
;
782 ORE->emit([&]() {
783 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiIndutionOuter",
784 OuterLoop->getStartLoc(),
785 OuterLoop->getHeader())
786 << "Only outer loops with 1 induction variable can be "
787 "interchanged currently.";
788 });
789 return true;
790 }
791
792 Inductions.clear();
793 if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
794 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
795 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
796 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
797 ORE->emit([&]() {
798 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
799 InnerLoop->getStartLoc(),
800 InnerLoop->getHeader())
801 << "Only inner loops with induction or reduction PHI nodes can be"
802 " interchange currently.";
803 });
804 return true;
805 }
806
807 // TODO: Currently we handle only loops with 1 induction variable.
808 if (Inductions.size() != 1) {
809 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n"
; } } while (false)
810 dbgs() << "We currently only support loops with 1 induction variable."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n"
; } } while (false)
811 << "Failed to interchange due to current limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n"
; } } while (false)
;
812 ORE->emit([&]() {
813 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiInductionInner",
814 InnerLoop->getStartLoc(),
815 InnerLoop->getHeader())
816 << "Only inner loops with 1 induction variable can be "
817 "interchanged currently.";
818 });
819 return true;
820 }
821 InnerInductionVar = Inductions.pop_back_val();
822
823 // TODO: Triangular loops are not handled for now.
824 if (!isLoopStructureUnderstood(InnerInductionVar)) {
825 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
826 ORE->emit([&]() {
827 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
828 InnerLoop->getStartLoc(),
829 InnerLoop->getHeader())
830 << "Inner loop structure not understood currently.";
831 });
832 return true;
833 }
834
835 // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
836 BasicBlock *InnerExit = InnerLoop->getExitBlock();
837 if (!containsSafePHI(InnerExit, false)) {
838 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"
; } } while (false)
839 dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"
; } } while (false)
;
840 ORE->emit([&]() {
841 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoLCSSAPHIOuterInner",
842 InnerLoop->getStartLoc(),
843 InnerLoop->getHeader())
844 << "Only inner loops with LCSSA PHIs can be interchange "
845 "currently.";
846 });
847 return true;
848 }
849
850 // TODO: Current limitation: Since we split the inner loop latch at the point
851 // were induction variable is incremented (induction.next); We cannot have
852 // more than 1 user of induction.next since it would result in broken code
853 // after split.
854 // e.g.
855 // for(i=0;i<N;i++) {
856 // for(j = 0;j<M;j++) {
857 // A[j+1][i+2] = A[j][i]+k;
858 // }
859 // }
860 Instruction *InnerIndexVarInc = nullptr;
861 if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
862 InnerIndexVarInc =
863 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
864 else
865 InnerIndexVarInc =
866 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
867
868 if (!InnerIndexVarInc) {
869 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction "
<< "variable.\n"; } } while (false)
870 dbgs() << "Did not find an instruction to increment the induction "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction "
<< "variable.\n"; } } while (false)
871 << "variable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction "
<< "variable.\n"; } } while (false)
;
872 ORE->emit([&]() {
873 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIncrementInInner",
874 InnerLoop->getStartLoc(),
875 InnerLoop->getHeader())
876 << "The inner loop does not increment the induction variable.";
877 });
878 return true;
879 }
880
881 // Since we split the inner loop latch on this induction variable. Make sure
882 // we do not have any instruction between the induction variable and branch
883 // instruction.
884
885 bool FoundInduction = false;
886 for (const Instruction &I :
887 llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
888 if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
889 isa<ZExtInst>(I))
890 continue;
891
892 // We found an instruction. If this is not induction variable then it is not
893 // safe to split this loop latch.
894 if (!I.isIdenticalTo(InnerIndexVarInc)) {
895 LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported instructions between induction "
<< "variable increment and branch.\n"; } } while (false
)
896 << "variable increment and branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported instructions between induction "
<< "variable increment and branch.\n"; } } while (false
)
;
897 ORE->emit([&]() {
898 return OptimizationRemarkMissed(
899 DEBUG_TYPE"loop-interchange", "UnsupportedInsBetweenInduction",
900 InnerLoop->getStartLoc(), InnerLoop->getHeader())
901 << "Found unsupported instruction between induction variable "
902 "increment and branch.";
903 });
904 return true;
905 }
906
907 FoundInduction = true;
908 break;
909 }
910 // The loop latch ended and we didn't find the induction variable return as
911 // current limitation.
912 if (!FoundInduction) {
913 LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Did not find the induction variable.\n"
; } } while (false)
;
914 ORE->emit([&]() {
915 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIndutionVariable",
916 InnerLoop->getStartLoc(),
917 InnerLoop->getHeader())
918 << "Did not find the induction variable.";
919 });
920 return true;
921 }
922 return false;
923}
924
925// We currently support LCSSA PHI nodes in the outer loop exit, if their
926// incoming values do not come from the outer loop latch or if the
927// outer loop latch has a single predecessor. In that case, the value will
928// be available if both the inner and outer loop conditions are true, which
929// will still be true after interchanging. If we have multiple predecessor,
930// that may not be the case, e.g. because the outer loop latch may be executed
931// if the inner loop is not executed.
932static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
933 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
934 for (PHINode &PHI : LoopNestExit->phis()) {
935 // FIXME: We currently are not able to detect floating point reductions
936 // and have to use floating point PHIs as a proxy to prevent
937 // interchanging in the presence of floating point reductions.
938 if (PHI.getType()->isFloatingPointTy())
939 return false;
940 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
941 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
942 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
943 continue;
944
945 // The incoming value is defined in the outer loop latch. Currently we
946 // only support that in case the outer loop latch has a single predecessor.
947 // This guarantees that the outer loop latch is executed if and only if
948 // the inner loop is executed (because tightlyNested() guarantees that the
949 // outer loop header only branches to the inner loop or the outer loop
950 // latch).
951 // FIXME: We could weaken this logic and allow multiple predecessors,
952 // if the values are produced outside the loop latch. We would need
953 // additional logic to update the PHI nodes in the exit block as
954 // well.
955 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
956 return false;
957 }
958 }
959 return true;
960}
961
962bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
963 unsigned OuterLoopId,
964 CharMatrix &DepMatrix) {
965 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
966 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
967 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
968 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
969 ORE->emit([&]() {
970 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
971 InnerLoop->getStartLoc(),
972 InnerLoop->getHeader())
973 << "Cannot interchange loops due to dependences.";
974 });
975 return false;
976 }
977 // Check if outer and inner loop contain legal instructions only.
978 for (auto *BB : OuterLoop->blocks())
979 for (Instruction &I : BB->instructionsWithoutDebug())
980 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
981 // readnone functions do not prevent interchanging.
982 if (CI->doesNotReadMemory())
983 continue;
984 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
985 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
986 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
987 ORE->emit([&]() {
988 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
989 CI->getDebugLoc(),
990 CI->getParent())
991 << "Cannot interchange loops due to call instruction.";
992 });
993
994 return false;
995 }
996
997 // TODO: The loops could not be interchanged due to current limitations in the
998 // transform module.
999 if (currentLimitations()) {
1000 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1001 return false;
1002 }
1003
1004 // Check if the loops are tightly nested.
1005 if (!tightlyNested(OuterLoop, InnerLoop)) {
1006 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1007 ORE->emit([&]() {
1008 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1009 InnerLoop->getStartLoc(),
1010 InnerLoop->getHeader())
1011 << "Cannot interchange loops because they are not tightly "
1012 "nested.";
1013 });
1014 return false;
1015 }
1016
1017 if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1018 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1019 ORE->emit([&]() {
1020 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1021 OuterLoop->getStartLoc(),
1022 OuterLoop->getHeader())
1023 << "Found unsupported PHI node in loop exit.";
1024 });
1025 return false;
1026 }
1027
1028 return true;
1029}
1030
1031int LoopInterchangeProfitability::getInstrOrderCost() {
1032 unsigned GoodOrder, BadOrder;
1033 BadOrder = GoodOrder = 0;
1034 for (BasicBlock *BB : InnerLoop->blocks()) {
1035 for (Instruction &Ins : *BB) {
1036 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1037 unsigned NumOp = GEP->getNumOperands();
1038 bool FoundInnerInduction = false;
1039 bool FoundOuterInduction = false;
1040 for (unsigned i = 0; i < NumOp; ++i) {
1041 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1042 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1043 if (!AR)
1044 continue;
1045
1046 // If we find the inner induction after an outer induction e.g.
1047 // for(int i=0;i<N;i++)
1048 // for(int j=0;j<N;j++)
1049 // A[i][j] = A[i-1][j-1]+k;
1050 // then it is a good order.
1051 if (AR->getLoop() == InnerLoop) {
1052 // We found an InnerLoop induction after OuterLoop induction. It is
1053 // a good order.
1054 FoundInnerInduction = true;
1055 if (FoundOuterInduction) {
1056 GoodOrder++;
1057 break;
1058 }
1059 }
1060 // If we find the outer induction after an inner induction e.g.
1061 // for(int i=0;i<N;i++)
1062 // for(int j=0;j<N;j++)
1063 // A[j][i] = A[j-1][i-1]+k;
1064 // then it is a bad order.
1065 if (AR->getLoop() == OuterLoop) {
1066 // We found an OuterLoop induction after InnerLoop induction. It is
1067 // a bad order.
1068 FoundOuterInduction = true;
1069 if (FoundInnerInduction) {
1070 BadOrder++;
1071 break;
1072 }
1073 }
1074 }
1075 }
1076 }
1077 }
1078 return GoodOrder - BadOrder;
1079}
1080
1081static bool isProfitableForVectorization(unsigned InnerLoopId,
1082 unsigned OuterLoopId,
1083 CharMatrix &DepMatrix) {
1084 // TODO: Improve this heuristic to catch more cases.
1085 // If the inner loop is loop independent or doesn't carry any dependency it is
1086 // profitable to move this to outer position.
1087 for (auto &Row : DepMatrix) {
1088 if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1089 return false;
1090 // TODO: We need to improve this heuristic.
1091 if (Row[OuterLoopId] != '=')
1092 return false;
1093 }
1094 // If outer loop has dependence and inner loop is loop independent then it is
1095 // profitable to interchange to enable parallelism.
1096 // If there are no dependences, interchanging will not improve anything.
1097 return !DepMatrix.empty();
1098}
1099
1100bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1101 unsigned OuterLoopId,
1102 CharMatrix &DepMatrix) {
1103 // TODO: Add better profitability checks.
1104 // e.g
1105 // 1) Construct dependency matrix and move the one with no loop carried dep
1106 // inside to enable vectorization.
1107
1108 // This is rough cost estimation algorithm. It counts the good and bad order
1109 // of induction variables in the instruction and allows reordering if number
1110 // of bad orders is more than good.
1111 int Cost = getInstrOrderCost();
1112 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1113 if (Cost < -LoopInterchangeCostThreshold)
1114 return true;
1115
1116 // It is not profitable as per current cache profitability model. But check if
1117 // we can move this loop outside to improve parallelism.
1118 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1119 return true;
1120
1121 ORE->emit([&]() {
1122 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1123 InnerLoop->getStartLoc(),
1124 InnerLoop->getHeader())
1125 << "Interchanging loops is too costly (cost="
1126 << ore::NV("Cost", Cost) << ", threshold="
1127 << ore::NV("Threshold", LoopInterchangeCostThreshold)
1128 << ") and it does not improve parallelism.";
1129 });
1130 return false;
1131}
1132
1133void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1134 Loop *InnerLoop) {
1135 for (Loop *L : *OuterLoop)
1136 if (L == InnerLoop) {
1137 OuterLoop->removeChildLoop(L);
1138 return;
1139 }
1140 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1140)
;
1141}
1142
1143/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1144/// new inner and outer loop after interchanging: NewInner is the original
1145/// outer loop and NewOuter is the original inner loop.
1146///
1147/// Before interchanging, we have the following structure
1148/// Outer preheader
1149// Outer header
1150// Inner preheader
1151// Inner header
1152// Inner body
1153// Inner latch
1154// outer bbs
1155// Outer latch
1156//
1157// After interchanging:
1158// Inner preheader
1159// Inner header
1160// Outer preheader
1161// Outer header
1162// Inner body
1163// outer bbs
1164// Outer latch
1165// Inner latch
1166void LoopInterchangeTransform::restructureLoops(
1167 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1168 BasicBlock *OrigOuterPreHeader) {
1169 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1170 // The original inner loop preheader moves from the new inner loop to
1171 // the parent loop, if there is one.
1172 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1173 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1174
1175 // Switch the loop levels.
1176 if (OuterLoopParent) {
1177 // Remove the loop from its parent loop.
1178 removeChildLoop(OuterLoopParent, NewInner);
1179 removeChildLoop(NewInner, NewOuter);
1180 OuterLoopParent->addChildLoop(NewOuter);
1181 } else {
1182 removeChildLoop(NewInner, NewOuter);
1183 LI->changeTopLevelLoop(NewInner, NewOuter);
1184 }
1185 while (!NewOuter->empty())
1186 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1187 NewOuter->addChildLoop(NewInner);
1188
1189 // BBs from the original inner loop.
1190 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1191
1192 // Add BBs from the original outer loop to the original inner loop (excluding
1193 // BBs already in inner loop)
1194 for (BasicBlock *BB : NewInner->blocks())
1195 if (LI->getLoopFor(BB) == NewInner)
1196 NewOuter->addBlockEntry(BB);
1197
1198 // Now remove inner loop header and latch from the new inner loop and move
1199 // other BBs (the loop body) to the new inner loop.
1200 BasicBlock *OuterHeader = NewOuter->getHeader();
1201 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1202 for (BasicBlock *BB : OrigInnerBBs) {
1203 // Nothing will change for BBs in child loops.
1204 if (LI->getLoopFor(BB) != NewOuter)
1205 continue;
1206 // Remove the new outer loop header and latch from the new inner loop.
1207 if (BB == OuterHeader || BB == OuterLatch)
1208 NewInner->removeBlockFromLoop(BB);
1209 else
1210 LI->changeLoopFor(BB, NewInner);
1211 }
1212
1213 // The preheader of the original outer loop becomes part of the new
1214 // outer loop.
1215 NewOuter->addBlockEntry(OrigOuterPreHeader);
1216 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1217
1218 // Tell SE that we move the loops around.
1219 SE->forgetLoop(NewOuter);
1220 SE->forgetLoop(NewInner);
1221}
1222
1223bool LoopInterchangeTransform::transform() {
1224 bool Transformed = false;
1225 Instruction *InnerIndexVar;
1226
1227 if (InnerLoop->getSubLoops().empty()) {
1228 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1229 LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling Split Inner Loop\n"
; } } while (false)
;
1230 PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1231 if (!InductionPHI) {
1232 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1233 return false;
1234 }
1235
1236 if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1237 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1238 else
1239 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1240
1241 // Ensure that InductionPHI is the first Phi node.
1242 if (&InductionPHI->getParent()->front() != InductionPHI)
1243 InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1244
1245 // Split at the place were the induction variable is
1246 // incremented/decremented.
1247 // TODO: This splitting logic may not work always. Fix this.
1248 splitInnerLoopLatch(InnerIndexVar);
1249 LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitInnerLoopLatch done\n"
; } } while (false)
;
1250
1251 // Splits the inner loops phi nodes out into a separate basic block.
1252 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1253 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1254 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1255 }
1256
1257 Transformed |= adjustLoopLinks();
1258 if (!Transformed) {
1259 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1260 return false;
1261 }
1262
1263 return true;
1264}
1265
1266void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
1267 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1268 BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
1269 InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
Value stored to 'InnerLoopLatch' is never read
1270}
1271
1272/// \brief Move all instructions except the terminator from FromBB right before
1273/// InsertBefore
1274static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1275 auto &ToList = InsertBefore->getParent()->getInstList();
1276 auto &FromList = FromBB->getInstList();
1277
1278 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1279 FromBB->getTerminator()->getIterator());
1280}
1281
1282/// Update BI to jump to NewBB instead of OldBB. Records updates to
1283/// the dominator tree in DTUpdates, if DT should be preserved.
1284static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1285 BasicBlock *NewBB,
1286 std::vector<DominatorTree::UpdateType> &DTUpdates) {
1287 assert(llvm::count_if(successors(BI),((llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return
BB == OldBB; }) < 2 && "BI must jump to OldBB at most once."
) ? static_cast<void> (0) : __assert_fail ("llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1289, __PRETTY_FUNCTION__))
1288 [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&((llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return
BB == OldBB; }) < 2 && "BI must jump to OldBB at most once."
) ? static_cast<void> (0) : __assert_fail ("llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1289, __PRETTY_FUNCTION__))
1289 "BI must jump to OldBB at most once.")((llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return
BB == OldBB; }) < 2 && "BI must jump to OldBB at most once."
) ? static_cast<void> (0) : __assert_fail ("llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1289, __PRETTY_FUNCTION__))
;
1290 for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
1291 if (BI->getSuccessor(i) == OldBB) {
1292 BI->setSuccessor(i, NewBB);
1293
1294 DTUpdates.push_back(
1295 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1296 DTUpdates.push_back(
1297 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1298 break;
1299 }
1300 }
1301}
1302
1303// Move Lcssa PHIs to the right place.
1304static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1305 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1306 BasicBlock *OuterLatch, BasicBlock *OuterExit) {
1307
1308 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1309 // defined either in the header or latch. Those blocks will become header and
1310 // latch of the new outer loop, and the only possible users can PHI nodes
1311 // in the exit block of the loop nest or the outer loop header (reduction
1312 // PHIs, in that case, the incoming value must be defined in the inner loop
1313 // header). We can just substitute the user with the incoming value and remove
1314 // the PHI.
1315 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1316 assert(P.getNumIncomingValues() == 1 &&((P.getNumIncomingValues() == 1 && "Only loops with a single exit are supported!"
) ? static_cast<void> (0) : __assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1317, __PRETTY_FUNCTION__))
1317 "Only loops with a single exit are supported!")((P.getNumIncomingValues() == 1 && "Only loops with a single exit are supported!"
) ? static_cast<void> (0) : __assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1317, __PRETTY_FUNCTION__))
;
1318
1319 // Incoming values are guaranteed be instructions currently.
1320 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1321 // Skip phis with incoming values from the inner loop body, excluding the
1322 // header and latch.
1323 if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1324 continue;
1325
1326 assert(all_of(P.users(),((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1327 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1328 return (cast<PHINode>(U)->getParent() == OuterHeader &&((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1329 IncI->getParent() == InnerHeader) ||((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1330 cast<PHINode>(U)->getParent() == OuterExit;((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1331 }) &&((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1332 "Can only replace phis iff the uses are in the loop nest exit or "((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1333 "the incoming value is defined in the inner header (it will "((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
1334 "dominate all loop blocks after interchanging)")((all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader
](User *U) { return (cast<PHINode>(U)->getParent() ==
OuterHeader && IncI->getParent() == InnerHeader) ||
cast<PHINode>(U)->getParent() == OuterExit; }) &&
"Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? static_cast
<void> (0) : __assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1334, __PRETTY_FUNCTION__))
;
1335 P.replaceAllUsesWith(IncI);
1336 P.eraseFromParent();
1337 }
1338
1339 SmallVector<PHINode *, 8> LcssaInnerExit;
1340 for (PHINode &P : InnerExit->phis())
1341 LcssaInnerExit.push_back(&P);
1342
1343 SmallVector<PHINode *, 8> LcssaInnerLatch;
1344 for (PHINode &P : InnerLatch->phis())
1345 LcssaInnerLatch.push_back(&P);
1346
1347 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1348 // If a PHI node has users outside of InnerExit, it has a use outside the
1349 // interchanged loop and we have to preserve it. We move these to
1350 // InnerLatch, which will become the new exit block for the innermost
1351 // loop after interchanging.
1352 for (PHINode *P : LcssaInnerExit)
1353 P->moveBefore(InnerLatch->getFirstNonPHI());
1354
1355 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1356 // and we have to move them to the new inner latch.
1357 for (PHINode *P : LcssaInnerLatch)
1358 P->moveBefore(InnerExit->getFirstNonPHI());
1359
1360 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1361 // incoming values from the outer latch or header, we have to add a new PHI
1362 // in the inner loop latch, which became the exit block of the outer loop,
1363 // after interchanging.
1364 if (OuterExit) {
1365 for (PHINode &P : OuterExit->phis()) {
1366 if (P.getNumIncomingValues() != 1)
1367 continue;
1368 // Skip Phis with incoming values not defined in the outer loop's header
1369 // and latch. Also skip incoming phis defined in the latch. Those should
1370 // already have been updated.
1371 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1372 if (!I || ((I->getParent() != OuterLatch || isa<PHINode>(I)) &&
1373 I->getParent() != OuterHeader))
1374 continue;
1375
1376 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1377 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1378 NewPhi->setIncomingBlock(0, OuterLatch);
1379 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1380 P.setIncomingValue(0, NewPhi);
1381 }
1382 }
1383
1384 // Now adjust the incoming blocks for the LCSSA PHIs.
1385 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1386 // with the new latch.
1387 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1388}
1389
1390bool LoopInterchangeTransform::adjustLoopBranches() {
1391 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1392 std::vector<DominatorTree::UpdateType> DTUpdates;
1393
1394 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1395 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1396
1397 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&((OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader
!= InnerLoop->getHeader() && OuterLoopPreHeader &&
InnerLoopPreHeader && "Guaranteed by loop-simplify form"
) ? static_cast<void> (0) : __assert_fail ("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1399, __PRETTY_FUNCTION__))
1398 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&((OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader
!= InnerLoop->getHeader() && OuterLoopPreHeader &&
InnerLoopPreHeader && "Guaranteed by loop-simplify form"
) ? static_cast<void> (0) : __assert_fail ("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1399, __PRETTY_FUNCTION__))
1399 InnerLoopPreHeader && "Guaranteed by loop-simplify form")((OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader
!= InnerLoop->getHeader() && OuterLoopPreHeader &&
InnerLoopPreHeader && "Guaranteed by loop-simplify form"
) ? static_cast<void> (0) : __assert_fail ("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1399, __PRETTY_FUNCTION__))
;
1400 // Ensure that both preheaders do not contain PHI nodes and have single
1401 // predecessors. This allows us to move them easily. We use
1402 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1403 // preheaders do not satisfy those conditions.
1404 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1405 !OuterLoopPreHeader->getUniquePredecessor())
1406 OuterLoopPreHeader =
1407 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1408 if (InnerLoopPreHeader == OuterLoop->getHeader())
1409 InnerLoopPreHeader =
1410 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1411
1412 // Adjust the loop preheader
1413 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1414 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1415 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1416 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1417 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1418 BasicBlock *InnerLoopLatchPredecessor =
1419 InnerLoopLatch->getUniquePredecessor();
1420 BasicBlock *InnerLoopLatchSuccessor;
1421 BasicBlock *OuterLoopLatchSuccessor;
1422
1423 BranchInst *OuterLoopLatchBI =
1424 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1425 BranchInst *InnerLoopLatchBI =
1426 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1427 BranchInst *OuterLoopHeaderBI =
1428 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1429 BranchInst *InnerLoopHeaderBI =
1430 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1431
1432 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1433 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1434 !InnerLoopHeaderBI)
1435 return false;
1436
1437 BranchInst *InnerLoopLatchPredecessorBI =
1438 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1439 BranchInst *OuterLoopPredecessorBI =
1440 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1441
1442 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1443 return false;
1444 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1445 if (!InnerLoopHeaderSuccessor)
1446 return false;
1447
1448 // Adjust Loop Preheader and headers
1449 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1450 InnerLoopPreHeader, DTUpdates);
1451 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates);
1452 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1453 InnerLoopHeaderSuccessor, DTUpdates);
1454
1455 // Adjust reduction PHI's now that the incoming block has changed.
1456 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1457 OuterLoopHeader);
1458
1459 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1460 OuterLoopPreHeader, DTUpdates);
1461
1462 // -------------Adjust loop latches-----------
1463 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1464 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1465 else
1466 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1467
1468 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1469 InnerLoopLatchSuccessor, DTUpdates);
1470
1471
1472 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1473 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1474 else
1475 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1476
1477 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1478 OuterLoopLatchSuccessor, DTUpdates);
1479 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1480 DTUpdates);
1481
1482 DT->applyUpdates(DTUpdates);
1483 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1484 OuterLoopPreHeader);
1485
1486 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1487 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock());
1488 // For PHIs in the exit block of the outer loop, outer's latch has been
1489 // replaced by Inners'.
1490 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1491
1492 // Now update the reduction PHIs in the inner and outer loop headers.
1493 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1494 for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
1495 InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1496 for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
1497 OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
1498
1499 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1500 (void)OuterInnerReductions;
1501
1502 // Now move the remaining reduction PHIs from outer to inner loop header and
1503 // vice versa. The PHI nodes must be part of a reduction across the inner and
1504 // outer loop and all the remains to do is and updating the incoming blocks.
1505 for (PHINode *PHI : OuterLoopPHIs) {
1506 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1507 assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&((OuterInnerReductions.find(PHI) != OuterInnerReductions.end(
) && "Expected a reduction PHI node") ? static_cast<
void> (0) : __assert_fail ("OuterInnerReductions.find(PHI) != OuterInnerReductions.end() && \"Expected a reduction PHI node\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1508, __PRETTY_FUNCTION__))
1508 "Expected a reduction PHI node")((OuterInnerReductions.find(PHI) != OuterInnerReductions.end(
) && "Expected a reduction PHI node") ? static_cast<
void> (0) : __assert_fail ("OuterInnerReductions.find(PHI) != OuterInnerReductions.end() && \"Expected a reduction PHI node\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1508, __PRETTY_FUNCTION__))
;
1509 }
1510 for (PHINode *PHI : InnerLoopPHIs) {
1511 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1512 assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&((OuterInnerReductions.find(PHI) != OuterInnerReductions.end(
) && "Expected a reduction PHI node") ? static_cast<
void> (0) : __assert_fail ("OuterInnerReductions.find(PHI) != OuterInnerReductions.end() && \"Expected a reduction PHI node\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1513, __PRETTY_FUNCTION__))
1513 "Expected a reduction PHI node")((OuterInnerReductions.find(PHI) != OuterInnerReductions.end(
) && "Expected a reduction PHI node") ? static_cast<
void> (0) : __assert_fail ("OuterInnerReductions.find(PHI) != OuterInnerReductions.end() && \"Expected a reduction PHI node\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1513, __PRETTY_FUNCTION__))
;
1514 }
1515
1516 // Update the incoming blocks for moved PHI nodes.
1517 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1518 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1519 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1520 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1521
1522 return true;
1523}
1524
1525void LoopInterchangeTransform::adjustLoopPreheaders() {
1526 // We have interchanged the preheaders so we need to interchange the data in
1527 // the preheader as well.
1528 // This is because the content of inner preheader was previously executed
1529 // inside the outer loop.
1530 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1531 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1532 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1533 BranchInst *InnerTermBI =
1534 cast<BranchInst>(InnerLoopPreHeader->getTerminator());
1535
1536 // These instructions should now be executed inside the loop.
1537 // Move instruction into a new block after outer header.
1538 moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
1539 // These instructions were not executed previously in the loop so move them to
1540 // the older inner loop preheader.
1541 moveBBContents(OuterLoopPreHeader, InnerTermBI);
1542}
1543
1544bool LoopInterchangeTransform::adjustLoopLinks() {
1545 // Adjust all branches in the inner and outer loop.
1546 bool Changed = adjustLoopBranches();
1547 if (Changed)
1548 adjustLoopPreheaders();
1549 return Changed;
1550}
1551
1552char LoopInterchange::ID = 0;
1553
1554INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",static void *initializeLoopInterchangePassOnce(PassRegistry &
Registry) {
1555 "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangePassOnce(PassRegistry &
Registry) {
1556INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1557INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry);
1558INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
1559
1560INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchange::ID, PassInfo::NormalCtor_t
(callDefaultCtor<LoopInterchange>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeLoopInterchangePassFlag; void llvm::initializeLoopInterchangePass
(PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangePassFlag
, initializeLoopInterchangePassOnce, std::ref(Registry)); }
1561 "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchange::ID, PassInfo::NormalCtor_t
(callDefaultCtor<LoopInterchange>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeLoopInterchangePassFlag; void llvm::initializeLoopInterchangePass
(PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangePassFlag
, initializeLoopInterchangePassOnce, std::ref(Registry)); }
1562
1563Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }