LLVM  16.0.0git
LoopFlatten.cpp
Go to the documentation of this file.
1 //===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass flattens pairs nested loops into a single loop.
10 //
11 // The intention is to optimise loop nests like this, which together access an
12 // array linearly:
13 //
14 // for (int i = 0; i < N; ++i)
15 // for (int j = 0; j < M; ++j)
16 // f(A[i*M+j]);
17 //
18 // into one loop:
19 //
20 // for (int i = 0; i < (N*M); ++i)
21 // f(A[i]);
22 //
23 // It can also flatten loops where the induction variables are not used in the
24 // loop. This is only worth doing if the induction variables are only used in an
25 // expression like i*M+j. If they had any other uses, we would have to insert a
26 // div/mod to reconstruct the original values, so this wouldn't be profitable.
27 //
28 // We also need to prove that N*M will not overflow. The preferred solution is
29 // to widen the IV, which avoids overflow checks, so that is tried first. If
30 // the IV cannot be widened, then we try to determine that this new tripcount
31 // expression won't overflow.
32 //
33 // Q: Does LoopFlatten use SCEV?
34 // Short answer: Yes and no.
35 //
36 // Long answer:
37 // For this transformation to be valid, we require all uses of the induction
38 // variables to be linear expressions of the form i*M+j. The different Loop
39 // APIs are used to get some loop components like the induction variable,
40 // compare statement, etc. In addition, we do some pattern matching to find the
41 // linear expressions and other loop components like the loop increment. The
42 // latter are examples of expressions that do use the induction variable, but
43 // are safe to ignore when we check all uses to be of the form i*M+j. We keep
44 // track of all of this in bookkeeping struct FlattenInfo.
45 // We assume the loops to be canonical, i.e. starting at 0 and increment with
46 // 1. This makes RHS of the compare the loop tripcount (with the right
47 // predicate). We use SCEV to then sanity check that this tripcount matches
48 // with the tripcount as computed by SCEV.
49 //
50 //===----------------------------------------------------------------------===//
51 
53 
54 #include "llvm/ADT/Statistic.h"
56 #include "llvm/Analysis/LoopInfo.h"
63 #include "llvm/IR/Dominators.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/IRBuilder.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/IR/PatternMatch.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Debug.h"
72 #include "llvm/Transforms/Scalar.h"
78 
79 using namespace llvm;
80 using namespace llvm::PatternMatch;
81 
82 #define DEBUG_TYPE "loop-flatten"
83 
84 STATISTIC(NumFlattened, "Number of loops flattened");
85 
87  "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
88  cl::desc("Limit on the cost of instructions that can be repeated due to "
89  "loop flattening"));
90 
91 static cl::opt<bool>
92  AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
93  cl::init(false),
94  cl::desc("Assume that the product of the two iteration "
95  "trip counts will never overflow"));
96 
97 static cl::opt<bool>
98  WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true),
99  cl::desc("Widen the loop induction variables, if possible, so "
100  "overflow checks won't reject flattening"));
101 
102 // We require all uses of both induction variables to match this pattern:
103 //
104 // (OuterPHI * InnerTripCount) + InnerPHI
105 //
106 // I.e., it needs to be a linear expression of the induction variables and the
107 // inner loop trip count. We keep track of all different expressions on which
108 // checks will be performed in this bookkeeping struct.
109 //
110 struct FlattenInfo {
111  Loop *OuterLoop = nullptr; // The loop pair to be flattened.
112  Loop *InnerLoop = nullptr;
113 
114  PHINode *InnerInductionPHI = nullptr; // These PHINodes correspond to loop
115  PHINode *OuterInductionPHI = nullptr; // induction variables, which are
116  // expected to start at zero and
117  // increment by one on each loop.
118 
119  Value *InnerTripCount = nullptr; // The product of these two tripcounts
120  Value *OuterTripCount = nullptr; // will be the new flattened loop
121  // tripcount. Also used to recognise a
122  // linear expression that will be replaced.
123 
124  SmallPtrSet<Value *, 4> LinearIVUses; // Contains the linear expressions
125  // of the form i*M+j that will be
126  // replaced.
127 
128  BinaryOperator *InnerIncrement = nullptr; // Uses of induction variables in
129  BinaryOperator *OuterIncrement = nullptr; // loop control statements that
130  BranchInst *InnerBranch = nullptr; // are safe to ignore.
131 
132  BranchInst *OuterBranch = nullptr; // The instruction that needs to be
133  // updated with new tripcount.
134 
136 
137  bool Widened = false; // Whether this holds the flatten info before or after
138  // widening.
139 
140  PHINode *NarrowInnerInductionPHI = nullptr; // Holds the old/narrow induction
141  PHINode *NarrowOuterInductionPHI = nullptr; // phis, i.e. the Phis before IV
142  // has been applied. Used to skip
143  // checks on phi nodes.
144 
145  FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL){};
146 
148  // This can't be the narrow phi if we haven't widened the IV first.
149  if (!Widened)
150  return false;
151  return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
152  }
154  return InnerIncrement == U;
155  }
157  return OuterIncrement == U;
158  }
160  return InnerBranch->getCondition() == U;
161  }
162 
164  for (User *U : OuterInductionPHI->users()) {
165  if (isOuterLoopIncrement(U))
166  continue;
167 
168  auto IsValidOuterPHIUses = [&] (User *U) -> bool {
169  LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
170  if (!ValidOuterPHIUses.count(U)) {
171  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
172  return false;
173  }
174  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
175  return true;
176  };
177 
178  if (auto *V = dyn_cast<TruncInst>(U)) {
179  for (auto *K : V->users()) {
180  if (!IsValidOuterPHIUses(K))
181  return false;
182  }
183  continue;
184  }
185 
186  if (!IsValidOuterPHIUses(U))
187  return false;
188  }
189  return true;
190  }
191 
192  bool matchLinearIVUser(User *U, Value *InnerTripCount,
193  SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
194  LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
195  Value *MatchedMul = nullptr;
196  Value *MatchedItCount = nullptr;
197 
198  bool IsAdd = match(U, m_c_Add(m_Specific(InnerInductionPHI),
199  m_Value(MatchedMul))) &&
200  match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI),
201  m_Value(MatchedItCount)));
202 
203  // Matches the same pattern as above, except it also looks for truncs
204  // on the phi, which can be the result of widening the induction variables.
205  bool IsAddTrunc =
206  match(U, m_c_Add(m_Trunc(m_Specific(InnerInductionPHI)),
207  m_Value(MatchedMul))) &&
208  match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)),
209  m_Value(MatchedItCount)));
210 
211  if (!MatchedItCount)
212  return false;
213 
214  // Look through extends if the IV has been widened. Don't look through
215  // extends if we already looked through a trunc.
216  if (Widened && IsAdd &&
217  (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
218  assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
219  "Unexpected type mismatch in types after widening");
220  MatchedItCount = isa<SExtInst>(MatchedItCount)
221  ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
222  : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
223  }
224 
225  if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
226  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
227  ValidOuterPHIUses.insert(MatchedMul);
228  LinearIVUses.insert(U);
229  return true;
230  }
231 
232  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
233  return false;
234  }
235 
237  Value *SExtInnerTripCount = InnerTripCount;
238  if (Widened &&
239  (isa<SExtInst>(InnerTripCount) || isa<ZExtInst>(InnerTripCount)))
240  SExtInnerTripCount = cast<Instruction>(InnerTripCount)->getOperand(0);
241 
242  for (User *U : InnerInductionPHI->users()) {
243  if (isInnerLoopIncrement(U))
244  continue;
245 
246  // After widening the IVs, a trunc instruction might have been introduced,
247  // so look through truncs.
248  if (isa<TruncInst>(U)) {
249  if (!U->hasOneUse())
250  return false;
251  U = *U->user_begin();
252  }
253 
254  // If the use is in the compare (which is also the condition of the inner
255  // branch) then the compare has been altered by another transformation e.g
256  // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
257  // a constant. Ignore this use as the compare gets removed later anyway.
258  if (isInnerLoopTest(U))
259  continue;
260 
261  if (!matchLinearIVUser(U, SExtInnerTripCount, ValidOuterPHIUses))
262  return false;
263  }
264  return true;
265  }
266 };
267 
268 static bool
269 setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
270  SmallPtrSetImpl<Instruction *> &IterationInstructions) {
271  TripCount = TC;
272  IterationInstructions.insert(Increment);
273  LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
274  LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
275  LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
276  return true;
277 }
278 
279 // Given the RHS of the loop latch compare instruction, verify with SCEV
280 // that this is indeed the loop tripcount.
281 // TODO: This used to be a straightforward check but has grown to be quite
282 // complicated now. It is therefore worth revisiting what the additional
283 // benefits are of this (compared to relying on canonical loops and pattern
284 // matching).
285 static bool verifyTripCount(Value *RHS, Loop *L,
286  SmallPtrSetImpl<Instruction *> &IterationInstructions,
287  PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
288  BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
289  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
290  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
291  LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
292  return false;
293  }
294 
295  // The Extend=false flag is used for getTripCountFromExitCount as we want
296  // to verify and match it with the pattern matched tripcount. Please note
297  // that overflow checks are performed in checkOverflow, but are first tried
298  // to avoid by widening the IV.
299  const SCEV *SCEVTripCount =
300  SE->getTripCountFromExitCount(BackedgeTakenCount, /*Extend=*/false);
301 
302  const SCEV *SCEVRHS = SE->getSCEV(RHS);
303  if (SCEVRHS == SCEVTripCount)
304  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
305  ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
306  if (ConstantRHS) {
307  const SCEV *BackedgeTCExt = nullptr;
308  if (IsWidened) {
309  const SCEV *SCEVTripCountExt;
310  // Find the extended backedge taken count and extended trip count using
311  // SCEV. One of these should now match the RHS of the compare.
312  BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
313  SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
314  if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
315  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
316  return false;
317  }
318  }
319  // If the RHS of the compare is equal to the backedge taken count we need
320  // to add one to get the trip count.
321  if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
322  ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
323  Value *NewRHS = ConstantInt::get(
324  ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
325  return setLoopComponents(NewRHS, TripCount, Increment,
326  IterationInstructions);
327  }
328  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
329  }
330  // If the RHS isn't a constant then check that the reason it doesn't match
331  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
332  // (and take the trip count to be the RHS).
333  if (!IsWidened) {
334  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
335  return false;
336  }
337  auto *TripCountInst = dyn_cast<Instruction>(RHS);
338  if (!TripCountInst) {
339  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
340  return false;
341  }
342  if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
343  SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
344  LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
345  return false;
346  }
347  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
348 }
349 
350 // Finds the induction variable, increment and trip count for a simple loop that
351 // we can flatten.
352 static bool findLoopComponents(
353  Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
354  PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
355  BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
356  LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
357 
358  if (!L->isLoopSimplifyForm()) {
359  LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
360  return false;
361  }
362 
363  // Currently, to simplify the implementation, the Loop induction variable must
364  // start at zero and increment with a step size of one.
365  if (!L->isCanonical(*SE)) {
366  LLVM_DEBUG(dbgs() << "Loop is not canonical\n");
367  return false;
368  }
369 
370  // There must be exactly one exiting block, and it must be the same at the
371  // latch.
372  BasicBlock *Latch = L->getLoopLatch();
373  if (L->getExitingBlock() != Latch) {
374  LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
375  return false;
376  }
377 
378  // Find the induction PHI. If there is no induction PHI, we can't do the
379  // transformation. TODO: could other variables trigger this? Do we have to
380  // search for the best one?
381  InductionPHI = L->getInductionVariable(*SE);
382  if (!InductionPHI) {
383  LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
384  return false;
385  }
386  LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
387 
388  bool ContinueOnTrue = L->contains(Latch->getTerminator()->getSuccessor(0));
389  auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
390  if (ContinueOnTrue)
391  return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
392  else
393  return Pred == CmpInst::ICMP_EQ;
394  };
395 
396  // Find Compare and make sure it is valid. getLatchCmpInst checks that the
397  // back branch of the latch is conditional.
399  if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
400  Compare->hasNUsesOrMore(2)) {
401  LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
402  return false;
403  }
404  BackBranch = cast<BranchInst>(Latch->getTerminator());
405  IterationInstructions.insert(BackBranch);
406  LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
407  IterationInstructions.insert(Compare);
408  LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
409 
410  // Find increment and trip count.
411  // There are exactly 2 incoming values to the induction phi; one from the
412  // pre-header and one from the latch. The incoming latch value is the
413  // increment variable.
414  Increment =
415  cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
416  if (Increment->hasNUsesOrMore(3)) {
417  LLVM_DEBUG(dbgs() << "Could not find valid increment\n");
418  return false;
419  }
420  // The trip count is the RHS of the compare. If this doesn't match the trip
421  // count computed by SCEV then this is because the trip count variable
422  // has been widened so the types don't match, or because it is a constant and
423  // another transformation has changed the compare (e.g. icmp ult %inc,
424  // tripcount -> icmp ult %j, tripcount-1), or both.
425  Value *RHS = Compare->getOperand(1);
426 
427  return verifyTripCount(RHS, L, IterationInstructions, InductionPHI, TripCount,
428  Increment, BackBranch, SE, IsWidened);
429 }
430 
431 static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
432  // All PHIs in the inner and outer headers must either be:
433  // - The induction PHI, which we are going to rewrite as one induction in
434  // the new loop. This is already checked by findLoopComponents.
435  // - An outer header PHI with all incoming values from outside the loop.
436  // LoopSimplify guarantees we have a pre-header, so we don't need to
437  // worry about that here.
438  // - Pairs of PHIs in the inner and outer headers, which implement a
439  // loop-carried dependency that will still be valid in the new loop. To
440  // be valid, this variable must be modified only in the inner loop.
441 
442  // The set of PHI nodes in the outer loop header that we know will still be
443  // valid after the transformation. These will not need to be modified (with
444  // the exception of the induction variable), but we do need to check that
445  // there are no unsafe PHI nodes.
446  SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
447  SafeOuterPHIs.insert(FI.OuterInductionPHI);
448 
449  // Check that all PHI nodes in the inner loop header match one of the valid
450  // patterns.
451  for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
452  // The induction PHIs break these rules, and that's OK because we treat
453  // them specially when doing the transformation.
454  if (&InnerPHI == FI.InnerInductionPHI)
455  continue;
456  if (FI.isNarrowInductionPhi(&InnerPHI))
457  continue;
458 
459  // Each inner loop PHI node must have two incoming values/blocks - one
460  // from the pre-header, and one from the latch.
461  assert(InnerPHI.getNumIncomingValues() == 2);
462  Value *PreHeaderValue =
463  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
464  Value *LatchValue =
465  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
466 
467  // The incoming value from the outer loop must be the PHI node in the
468  // outer loop header, with no modifications made in the top of the outer
469  // loop.
470  PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
471  if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
472  LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
473  return false;
474  }
475 
476  // The other incoming value must come from the inner loop, without any
477  // modifications in the tail end of the outer loop. We are in LCSSA form,
478  // so this will actually be a PHI in the inner loop's exit block, which
479  // only uses values from inside the inner loop.
480  PHINode *LCSSAPHI = dyn_cast<PHINode>(
482  if (!LCSSAPHI) {
483  LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
484  return false;
485  }
486 
487  // The value used by the LCSSA PHI must be the same one that the inner
488  // loop's PHI uses.
489  if (LCSSAPHI->hasConstantValue() != LatchValue) {
490  LLVM_DEBUG(
491  dbgs() << "LCSSA PHI incoming value does not match latch value\n");
492  return false;
493  }
494 
495  LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
496  LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
497  LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
498  SafeOuterPHIs.insert(OuterPHI);
499  FI.InnerPHIsToTransform.insert(&InnerPHI);
500  }
501 
502  for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
503  if (FI.isNarrowInductionPhi(&OuterPHI))
504  continue;
505  if (!SafeOuterPHIs.count(&OuterPHI)) {
506  LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
507  return false;
508  }
509  }
510 
511  LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
512  return true;
513 }
514 
515 static bool
517  SmallPtrSetImpl<Instruction *> &IterationInstructions,
518  const TargetTransformInfo *TTI) {
519  // Check for instructions in the outer but not inner loop. If any of these
520  // have side-effects then this transformation is not legal, and if there is
521  // a significant amount of code here which can't be optimised out that it's
522  // not profitable (as these instructions would get executed for each
523  // iteration of the inner loop).
524  InstructionCost RepeatedInstrCost = 0;
525  for (auto *B : FI.OuterLoop->getBlocks()) {
526  if (FI.InnerLoop->contains(B))
527  continue;
528 
529  for (auto &I : *B) {
530  if (!isa<PHINode>(&I) && !I.isTerminator() &&
532  LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
533  "side effects: ";
534  I.dump());
535  return false;
536  }
537  // The execution count of the outer loop's iteration instructions
538  // (increment, compare and branch) will be increased, but the
539  // equivalent instructions will be removed from the inner loop, so
540  // they make a net difference of zero.
541  if (IterationInstructions.count(&I))
542  continue;
543  // The unconditional branch to the inner loop's header will turn into
544  // a fall-through, so adds no cost.
545  BranchInst *Br = dyn_cast<BranchInst>(&I);
546  if (Br && Br->isUnconditional() &&
547  Br->getSuccessor(0) == FI.InnerLoop->getHeader())
548  continue;
549  // Multiplies of the outer iteration variable and inner iteration
550  // count will be optimised out.
553  continue;
556  LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
557  RepeatedInstrCost += Cost;
558  }
559  }
560 
561  LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
562  << RepeatedInstrCost << "\n");
563  // Bail out if flattening the loops would cause instructions in the outer
564  // loop but not in the inner loop to be executed extra times.
565  if (RepeatedInstrCost > RepeatedInstructionThreshold) {
566  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
567  return false;
568  }
569 
570  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
571  return true;
572 }
573 
574 
575 
576 // We require all uses of both induction variables to match this pattern:
577 //
578 // (OuterPHI * InnerTripCount) + InnerPHI
579 //
580 // Any uses of the induction variables not matching that pattern would
581 // require a div/mod to reconstruct in the flattened loop, so the
582 // transformation wouldn't be profitable.
583 static bool checkIVUsers(FlattenInfo &FI) {
584  // Check that all uses of the inner loop's induction variable match the
585  // expected pattern, recording the uses of the outer IV.
586  SmallPtrSet<Value *, 4> ValidOuterPHIUses;
587  if (!FI.checkInnerInductionPhiUsers(ValidOuterPHIUses))
588  return false;
589 
590  // Check that there are no uses of the outer IV other than the ones found
591  // as part of the pattern above.
592  if (!FI.checkOuterInductionPhiUsers(ValidOuterPHIUses))
593  return false;
594 
595  LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
596  dbgs() << "Found " << FI.LinearIVUses.size()
597  << " value(s) that can be replaced:\n";
598  for (Value *V : FI.LinearIVUses) {
599  dbgs() << " ";
600  V->dump();
601  });
602  return true;
603 }
604 
605 // Return an OverflowResult dependant on if overflow of the multiplication of
606 // InnerTripCount and OuterTripCount can be assumed not to happen.
608  AssumptionCache *AC) {
609  Function *F = FI.OuterLoop->getHeader()->getParent();
610  const DataLayout &DL = F->getParent()->getDataLayout();
611 
612  // For debugging/testing.
613  if (AssumeNoOverflow)
615 
616  // Check if the multiply could not overflow due to known ranges of the
617  // input values.
619  FI.InnerTripCount, FI.OuterTripCount, DL, AC,
622  return OR;
623 
624  for (Value *V : FI.LinearIVUses) {
625  for (Value *U : V->users()) {
626  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
627  for (Value *GEPUser : U->users()) {
628  auto *GEPUserInst = cast<Instruction>(GEPUser);
629  if (!isa<LoadInst>(GEPUserInst) &&
630  !(isa<StoreInst>(GEPUserInst) &&
631  GEP == GEPUserInst->getOperand(1)))
632  continue;
634  FI.InnerLoop))
635  continue;
636  // The IV is used as the operand of a GEP which dominates the loop
637  // latch, and the IV is at least as wide as the address space of the
638  // GEP. In this case, the GEP would wrap around the address space
639  // before the IV increment wraps, which would be UB.
640  if (GEP->isInBounds() &&
641  V->getType()->getIntegerBitWidth() >=
642  DL.getPointerTypeSizeInBits(GEP->getType())) {
643  LLVM_DEBUG(
644  dbgs() << "use of linear IV would be UB if overflow occurred: ";
645  GEP->dump());
647  }
648  }
649  }
650  }
651  }
652 
654 }
655 
658  const TargetTransformInfo *TTI) {
659  SmallPtrSet<Instruction *, 8> IterationInstructions;
660  if (!findLoopComponents(FI.InnerLoop, IterationInstructions,
662  FI.InnerIncrement, FI.InnerBranch, SE, FI.Widened))
663  return false;
664  if (!findLoopComponents(FI.OuterLoop, IterationInstructions,
666  FI.OuterIncrement, FI.OuterBranch, SE, FI.Widened))
667  return false;
668 
669  // Both of the loop trip count values must be invariant in the outer loop
670  // (non-instructions are all inherently invariant).
672  LLVM_DEBUG(dbgs() << "inner loop trip count not invariant\n");
673  return false;
674  }
676  LLVM_DEBUG(dbgs() << "outer loop trip count not invariant\n");
677  return false;
678  }
679 
680  if (!checkPHIs(FI, TTI))
681  return false;
682 
683  // FIXME: it should be possible to handle different types correctly.
685  return false;
686 
687  if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
688  return false;
689 
690  // Find the values in the loop that can be replaced with the linearized
691  // induction variable, and check that there are no other uses of the inner
692  // or outer induction variable. If there were, we could still do this
693  // transformation, but we'd have to insert a div/mod to calculate the
694  // original IVs, so it wouldn't be profitable.
695  if (!checkIVUsers(FI))
696  return false;
697 
698  LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
699  return true;
700 }
701 
705  MemorySSAUpdater *MSSAU) {
706  Function *F = FI.OuterLoop->getHeader()->getParent();
707  LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
708  {
709  using namespace ore;
711  FI.InnerLoop->getHeader());
713  Remark << "Flattened into outer loop";
714  ORE.emit(Remark);
715  }
716 
717  Value *NewTripCount = BinaryOperator::CreateMul(
718  FI.InnerTripCount, FI.OuterTripCount, "flatten.tripcount",
720  LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
721  NewTripCount->dump());
722 
723  // Fix up PHI nodes that take values from the inner loop back-edge, which
724  // we are about to remove.
726 
727  // The old Phi will be optimised away later, but for now we can't leave
728  // leave it in an invalid state, so are updating them too.
729  for (PHINode *PHI : FI.InnerPHIsToTransform)
731 
732  // Modify the trip count of the outer loop to be the product of the two
733  // trip counts.
734  cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
735 
736  // Replace the inner loop backedge with an unconditional branch to the exit.
737  BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
738  BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
739  InnerExitingBlock->getTerminator()->eraseFromParent();
740  BranchInst::Create(InnerExitBlock, InnerExitingBlock);
741 
742  // Update the DomTree and MemorySSA.
743  DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
744  if (MSSAU)
745  MSSAU->removeEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
746 
747  // Replace all uses of the polynomial calculated from the two induction
748  // variables with the one new one.
750  for (Value *V : FI.LinearIVUses) {
751  Value *OuterValue = FI.OuterInductionPHI;
752  if (FI.Widened)
753  OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
754  "flatten.trunciv");
755 
756  LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); dbgs() << "with: ";
757  OuterValue->dump());
758  V->replaceAllUsesWith(OuterValue);
759  }
760 
761  // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
762  // deleted, and any information that have about the outer loop invalidated.
763  SE->forgetLoop(FI.OuterLoop);
764  SE->forgetLoop(FI.InnerLoop);
765  if (U)
767  LI->erase(FI.InnerLoop);
768 
769  // Increment statistic value.
770  NumFlattened++;
771 
772  return true;
773 }
774 
775 static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
777  const TargetTransformInfo *TTI) {
778  if (!WidenIV) {
779  LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
780  return false;
781  }
782 
783  LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
785  auto &DL = M->getDataLayout();
786  auto *InnerType = FI.InnerInductionPHI->getType();
787  auto *OuterType = FI.OuterInductionPHI->getType();
788  unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
789  auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
790 
791  // If both induction types are less than the maximum legal integer width,
792  // promote both to the widest type available so we know calculating
793  // (OuterTripCount * InnerTripCount) as the new trip count is safe.
794  if (InnerType != OuterType ||
795  InnerType->getScalarSizeInBits() >= MaxLegalSize ||
796  MaxLegalType->getScalarSizeInBits() <
797  InnerType->getScalarSizeInBits() * 2) {
798  LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
799  return false;
800  }
801 
802  SCEVExpander Rewriter(*SE, DL, "loopflatten");
804  unsigned ElimExt = 0;
805  unsigned Widened = 0;
806 
807  auto CreateWideIV = [&](WideIVInfo WideIV, bool &Deleted) -> bool {
808  PHINode *WidePhi =
809  createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts, ElimExt, Widened,
810  true /* HasGuards */, true /* UsePostIncrementRanges */);
811  if (!WidePhi)
812  return false;
813  LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
814  LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
816  return true;
817  };
818 
819  bool Deleted;
820  if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false}, Deleted))
821  return false;
822  // Add the narrow phi to list, so that it will be adjusted later when the
823  // the transformation is performed.
824  if (!Deleted)
826 
827  if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false}, Deleted))
828  return false;
829 
830  assert(Widened && "Widened IV expected");
831  FI.Widened = true;
832 
833  // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
836 
837  // After widening, rediscover all the loop components.
838  return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
839 }
840 
844  MemorySSAUpdater *MSSAU) {
845  LLVM_DEBUG(
846  dbgs() << "Loop flattening running on outer loop "
847  << FI.OuterLoop->getHeader()->getName() << " and inner loop "
848  << FI.InnerLoop->getHeader()->getName() << " in "
849  << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
850 
851  if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
852  return false;
853 
854  // Check if we can widen the induction variables to avoid overflow checks.
855  bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
856 
857  // It can happen that after widening of the IV, flattening may not be
858  // possible/happening, e.g. when it is deemed unprofitable. So bail here if
859  // that is the case.
860  // TODO: IV widening without performing the actual flattening transformation
861  // is not ideal. While this codegen change should not matter much, it is an
862  // unnecessary change which is better to avoid. It's unlikely this happens
863  // often, because if it's unprofitibale after widening, it should be
864  // unprofitabe before widening as checked in the first round of checks. But
865  // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
866  // relaxed. Because this is making a code change (the IV widening, but not
867  // the flattening), we return true here.
868  if (FI.Widened && !CanFlatten)
869  return true;
870 
871  // If we have widened and can perform the transformation, do that here.
872  if (CanFlatten)
873  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
874 
875  // Otherwise, if we haven't widened the IV, check if the new iteration
876  // variable might overflow. In this case, we need to version the loop, and
877  // select the original version at runtime if the iteration space is too
878  // large.
879  // TODO: We currently don't version the loop.
880  OverflowResult OR = checkOverflow(FI, DT, AC);
883  LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
884  return false;
885  } else if (OR == OverflowResult::MayOverflow) {
886  LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
887  return false;
888  }
889 
890  LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
891  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
892 }
893 
896  MemorySSAUpdater *MSSAU) {
897  bool Changed = false;
898  for (Loop *InnerLoop : LN.getLoops()) {
899  auto *OuterLoop = InnerLoop->getParentLoop();
900  if (!OuterLoop)
901  continue;
902  FlattenInfo FI(OuterLoop, InnerLoop);
903  Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
904  }
905  return Changed;
906 }
907 
910  LPMUpdater &U) {
911 
912  bool Changed = false;
913 
915  if (AR.MSSA) {
916  MSSAU = MemorySSAUpdater(AR.MSSA);
917  if (VerifyMemorySSA)
918  AR.MSSA->verifyMemorySSA();
919  }
920 
921  // The loop flattening pass requires loops to be
922  // in simplified form, and also needs LCSSA. Running
923  // this pass will simplify all loops that contain inner loops,
924  // regardless of whether anything ends up being flattened.
925  Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
926  MSSAU ? MSSAU.getPointer() : nullptr);
927 
928  if (!Changed)
929  return PreservedAnalyses::all();
930 
931  if (AR.MSSA && VerifyMemorySSA)
932  AR.MSSA->verifyMemorySSA();
933 
934  auto PA = getLoopPassPreservedAnalyses();
935  if (AR.MSSA)
936  PA.preserve<MemorySSAAnalysis>();
937  return PA;
938 }
939 
940 namespace {
941 class LoopFlattenLegacyPass : public FunctionPass {
942 public:
943  static char ID; // Pass ID, replacement for typeid
944  LoopFlattenLegacyPass() : FunctionPass(ID) {
946  }
947 
948  // Possibly flatten loop L into its child.
949  bool runOnFunction(Function &F) override;
950 
951  void getAnalysisUsage(AnalysisUsage &AU) const override {
958  }
959 };
960 } // namespace
961 
963 INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
964  false, false)
967 INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
969 
971  return new LoopFlattenLegacyPass();
972 }
973 
975  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
976  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
977  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
978  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
979  auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
980  auto *TTI = &TTIP.getTTI(F);
981  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
982  auto *MSSA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
983 
985  if (MSSA)
986  MSSAU = MemorySSAUpdater(&MSSA->getMSSA());
987 
988  bool Changed = false;
989  for (Loop *L : *LI) {
990  auto LN = LoopNest::getLoopNest(*L, *SE);
991  Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr,
992  MSSAU ? MSSAU.getPointer() : nullptr);
993  }
994  return Changed;
995 }
checkOuterLoopInsts
static bool checkOuterLoopInsts(FlattenInfo &FI, SmallPtrSetImpl< Instruction * > &IterationInstructions, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:516
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", false, false) INITIALIZE_PASS_END(LoopFlattenLegacyPass
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
FlattenLoopPair
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU)
Definition: LoopFlatten.cpp:841
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::createWideIV
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Definition: SimplifyIndVar.cpp:2109
llvm::Loop::getLatchCmpInst
ICmpInst * getLatchCmpInst() const
Get the latch condition instruction.
Definition: LoopInfo.cpp:170
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1875
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:224
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:53
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::OverflowResult::NeverOverflows
@ NeverOverflows
Never overflows.
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:81
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::computeOverflowForUnsignedMul
OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo=true)
Definition: ValueTracking.cpp:4873
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
ScalarEvolutionExpander.h
Scalar.h
flatten
loop flatten
Definition: LoopFlatten.cpp:967
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4837
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:138
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:50
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1182
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:627
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
FlattenInfo::InnerInductionPHI
PHINode * InnerInductionPHI
Definition: LoopFlatten.cpp:114
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::PHINode::removeIncomingValue
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Definition: Instructions.cpp:114
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
checkPHIs
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:431
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
FlattenInfo::OuterIncrement
BinaryOperator * OuterIncrement
Definition: LoopFlatten.cpp:129
LoopFlatten.h
ScalarEvolution.h
llvm::OverflowResult::AlwaysOverflowsLow
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
Module.h
findLoopComponents
static bool findLoopComponents(Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
Definition: LoopFlatten.cpp:352
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::Loop::isCanonical
bool isCanonical(ScalarEvolution &SE) const
Return true if the loop induction variable starts at zero and increments by one each time through the...
Definition: LoopInfo.cpp:407
FlattenInfo::checkOuterInductionPhiUsers
bool checkOuterInductionPhiUsers(SmallPtrSet< Value *, 4 > &ValidOuterPHIUses)
Definition: LoopFlatten.cpp:163
FlattenInfo::OuterInductionPHI
PHINode * OuterInductionPHI
Definition: LoopFlatten.cpp:115
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet< Value *, 4 >
FlattenInfo::OuterLoop
Loop * OuterLoop
Definition: LoopFlatten.cpp:111
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopFlatten.cpp:82
FlattenInfo::LinearIVUses
SmallPtrSet< Value *, 4 > LinearIVUses
Definition: LoopFlatten.cpp:124
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
FlattenInfo::OuterBranch
BranchInst * OuterBranch
Definition: LoopFlatten.cpp:132
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::WideIVInfo
Collect information about induction variables that are used by sign/zero extend operations.
Definition: SimplifyIndVar.h:64
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RecursivelyDeleteDeadPHINode
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:628
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:311
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Flatten
bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU)
Definition: LoopFlatten.cpp:894
LoopDeletionResult::Deleted
@ Deleted
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
FlattenInfo::Widened
bool Widened
Definition: LoopFlatten.cpp:137
llvm::ScalarEvolution::getTripCountFromExitCount
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount, bool Extend=true)
Convert from an "exit count" (i.e.
Definition: ScalarEvolution.cpp:7912
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:984
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::OverflowResult::MayOverflow
@ MayOverflow
May or may not overflow.
InlinePriorityMode::Cost
@ Cost
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::PatternMatch::m_c_Add
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
Definition: PatternMatch.h:2237
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2884
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:187
FlattenInfo::isInnerLoopIncrement
bool isInnerLoopIncrement(User *U)
Definition: LoopFlatten.cpp:153
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:364
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::WideIVInfo::NarrowIV
PHINode * NarrowIV
Definition: SimplifyIndVar.h:65
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
LoopUtils.h
FlattenInfo::InnerBranch
BranchInst * InnerBranch
Definition: LoopFlatten.cpp:130
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:815
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
PatternMatch.h
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:888
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::OverflowResult
OverflowResult
Definition: ValueTracking.h:501
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3215
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4436
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
llvm::HighlightColor::Remark
@ Remark
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:475
llvm::cl::opt
Definition: CommandLine.h:1399
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
FlattenInfo::OuterTripCount
Value * OuterTripCount
Definition: LoopFlatten.cpp:120
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
setLoopComponents
static bool setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment, SmallPtrSetImpl< Instruction * > &IterationInstructions)
Definition: LoopFlatten.cpp:269
llvm::isGuaranteedToExecuteForEveryIteration
bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L)
Return true if this function can prove that the instruction I is executed for every iteration of the ...
Definition: ValueTracking.cpp:5521
llvm::PHINode::hasConstantValue
Value * hasConstantValue() const
If the specified PHI node always merges together the same value, return the value,...
Definition: Instructions.cpp:153
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:531
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
CanWidenIV
static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:775
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2619
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:644
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
AssumeNoOverflow
static cl::opt< bool > AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, cl::init(false), cl::desc("Assume that the product of the two iteration " "trip counts will never overflow"))
RepeatedInstructionThreshold
static cl::opt< unsigned > RepeatedInstructionThreshold("loop-flatten-cost-threshold", cl::Hidden, cl::init(2), cl::desc("Limit on the cost of instructions that can be repeated due to " "loop flattening"))
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3190
FlattenInfo::checkInnerInductionPhiUsers
bool checkInnerInductionPhiUsers(SmallPtrSet< Value *, 4 > &ValidOuterPHIUses)
Definition: LoopFlatten.cpp:236
I
#define I(x, y, z)
Definition: MD5.cpp:58
FlattenInfo::isNarrowInductionPhi
bool isNarrowInductionPhi(PHINode *Phi)
Definition: LoopFlatten.cpp:147
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
checkIVUsers
static bool checkIVUsers(FlattenInfo &FI)
Definition: LoopFlatten.cpp:583
FlattenInfo::isInnerLoopTest
bool isInnerLoopTest(User *U)
Definition: LoopFlatten.cpp:159
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
FlattenInfo::InnerIncrement
BinaryOperator * InnerIncrement
Definition: LoopFlatten.cpp:128
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
LoopNestAnalysis.h
llvm::LoopInfo::erase
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:872
DoFlattenLoopPair
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU)
Definition: LoopFlatten.cpp:702
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:282
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LoopPassManager.h
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:934
FlattenInfo::FlattenInfo
FlattenInfo(Loop *OL, Loop *IL)
Definition: LoopFlatten.cpp:145
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3212
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
checkOverflow
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, AssumptionCache *AC)
Definition: LoopFlatten.cpp:607
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1105
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:221
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
verifyTripCount
static bool verifyTripCount(Value *RHS, Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
Definition: LoopFlatten.cpp:285
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
FlattenInfo::NarrowInnerInductionPHI
PHINode * NarrowInnerInductionPHI
Definition: LoopFlatten.cpp:140
FlattenInfo::InnerTripCount
Value * InnerTripCount
Definition: LoopFlatten.cpp:119
FlattenInfo::NarrowOuterInductionPHI
PHINode * NarrowOuterInductionPHI
Definition: LoopFlatten.cpp:141
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
WidenIV
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
Definition: SimplifyIndVar.cpp:1196
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8298
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
FlattenInfo::InnerPHIsToTransform
SmallPtrSet< PHINode *, 4 > InnerPHIsToTransform
Definition: LoopFlatten.cpp:135
FlattenInfo::matchLinearIVUser
bool matchLinearIVUser(User *U, Value *InnerTripCount, SmallPtrSet< Value *, 4 > &ValidOuterPHIUses)
Definition: LoopFlatten.cpp:192
Function.h
llvm::initializeLoopFlattenLegacyPassPass
void initializeLoopFlattenLegacyPassPass(PassRegistry &)
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::OverflowResult::AlwaysOverflowsHigh
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
llvm::PatternMatch::m_c_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
Definition: PatternMatch.h:2244
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:117
CanFlattenLoopPair
static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:656
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1599
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
Dominators.h
CreateMul
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:253
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
FlattenInfo
Definition: LoopFlatten.cpp:110
TargetTransformInfo.h
llvm::createLoopFlattenPass
FunctionPass * createLoopFlattenPass()
Definition: LoopFlatten.cpp:970
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallPtrSetImpl< Instruction * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::LoopFlattenPass::run
PreservedAnalyses run(LoopNest &LN, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopFlatten.cpp:908
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
FlattenInfo::isOuterLoopIncrement
bool isOuterLoopIncrement(User *U)
Definition: LoopFlatten.cpp:156
llvm::Loop::getInductionVariable
PHINode * getInductionVariable(ScalarEvolution &SE) const
Return the loop induction variable if found, else return nullptr.
Definition: LoopInfo.cpp:290
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
llvm::LoopNest::getLoopNest
static std::unique_ptr< LoopNest > getLoopNest(Loop &Root, ScalarEvolution &SE)
Construct a LoopNest object.
Definition: LoopNestAnalysis.cpp:47
raw_ostream.h
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1611
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:8168
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4719
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
FlattenInfo::InnerLoop
Loop * InnerLoop
Definition: LoopFlatten.cpp:112
loops
loop Flattens loops
Definition: LoopFlatten.cpp:967
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3227
SimplifyIndVar.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38