LLVM  14.0.0git
LoopFlatten.cpp
Go to the documentation of this file.
1 //===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass flattens pairs nested loops into a single loop.
10 //
11 // The intention is to optimise loop nests like this, which together access an
12 // array linearly:
13 // for (int i = 0; i < N; ++i)
14 // for (int j = 0; j < M; ++j)
15 // f(A[i*M+j]);
16 // into one loop:
17 // for (int i = 0; i < (N*M); ++i)
18 // f(A[i]);
19 //
20 // It can also flatten loops where the induction variables are not used in the
21 // loop. This is only worth doing if the induction variables are only used in an
22 // expression like i*M+j. If they had any other uses, we would have to insert a
23 // div/mod to reconstruct the original values, so this wouldn't be profitable.
24 //
25 // We also need to prove that N*M will not overflow.
26 //
27 //===----------------------------------------------------------------------===//
28 
30 
31 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/Module.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Verifier.h"
44 #include "llvm/InitializePasses.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Debug.h"
48 #include "llvm/Transforms/Scalar.h"
53 
54 using namespace llvm;
55 using namespace llvm::PatternMatch;
56 
57 #define DEBUG_TYPE "loop-flatten"
58 
59 STATISTIC(NumFlattened, "Number of loops flattened");
60 
62  "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
63  cl::desc("Limit on the cost of instructions that can be repeated due to "
64  "loop flattening"));
65 
66 static cl::opt<bool>
67  AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
68  cl::init(false),
69  cl::desc("Assume that the product of the two iteration "
70  "trip counts will never overflow"));
71 
72 static cl::opt<bool>
73  WidenIV("loop-flatten-widen-iv", cl::Hidden,
74  cl::init(true),
75  cl::desc("Widen the loop induction variables, if possible, so "
76  "overflow checks won't reject flattening"));
77 
78 struct FlattenInfo {
79  Loop *OuterLoop = nullptr;
80  Loop *InnerLoop = nullptr;
81  // These PHINodes correspond to loop induction variables, which are expected
82  // to start at zero and increment by one on each loop.
83  PHINode *InnerInductionPHI = nullptr;
84  PHINode *OuterInductionPHI = nullptr;
85  Value *InnerTripCount = nullptr;
86  Value *OuterTripCount = nullptr;
87  BinaryOperator *InnerIncrement = nullptr;
88  BinaryOperator *OuterIncrement = nullptr;
89  BranchInst *InnerBranch = nullptr;
90  BranchInst *OuterBranch = nullptr;
93 
94  // Whether this holds the flatten info before or after widening.
95  bool Widened = false;
96 
97  // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
98  // been applied. This bookkeeping is used so we can skip some checks on these
99  // phi nodes.
101 
102  FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
103 };
104 
105 static bool
106 setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
107  SmallPtrSetImpl<Instruction *> &IterationInstructions) {
108  TripCount = TC;
109  IterationInstructions.insert(Increment);
110  LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
111  LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
112  LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
113  return true;
114 }
115 
116 // Finds the induction variable, increment and trip count for a simple loop that
117 // we can flatten.
118 static bool findLoopComponents(
119  Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
120  PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
121  BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
122  LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
123 
124  if (!L->isLoopSimplifyForm()) {
125  LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
126  return false;
127  }
128 
129  // Currently, to simplify the implementation, the Loop induction variable must
130  // start at zero and increment with a step size of one.
131  if (!L->isCanonical(*SE)) {
132  LLVM_DEBUG(dbgs() << "Loop is not canonical\n");
133  return false;
134  }
135 
136  // There must be exactly one exiting block, and it must be the same at the
137  // latch.
138  BasicBlock *Latch = L->getLoopLatch();
139  if (L->getExitingBlock() != Latch) {
140  LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
141  return false;
142  }
143 
144  // Find the induction PHI. If there is no induction PHI, we can't do the
145  // transformation. TODO: could other variables trigger this? Do we have to
146  // search for the best one?
147  InductionPHI = L->getInductionVariable(*SE);
148  if (!InductionPHI) {
149  LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
150  return false;
151  }
152  LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
153 
154  bool ContinueOnTrue = L->contains(Latch->getTerminator()->getSuccessor(0));
155  auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
156  if (ContinueOnTrue)
157  return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
158  else
159  return Pred == CmpInst::ICMP_EQ;
160  };
161 
162  // Find Compare and make sure it is valid. getLatchCmpInst checks that the
163  // back branch of the latch is conditional.
165  if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
166  Compare->hasNUsesOrMore(2)) {
167  LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
168  return false;
169  }
170  BackBranch = cast<BranchInst>(Latch->getTerminator());
171  IterationInstructions.insert(BackBranch);
172  LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
173  IterationInstructions.insert(Compare);
174  LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
175 
176  // Find increment and trip count.
177  // There are exactly 2 incoming values to the induction phi; one from the
178  // pre-header and one from the latch. The incoming latch value is the
179  // increment variable.
180  Increment =
181  dyn_cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
182  if (Increment->hasNUsesOrMore(3)) {
183  LLVM_DEBUG(dbgs() << "Could not find valid increment\n");
184  return false;
185  }
186  // The trip count is the RHS of the compare. If this doesn't match the trip
187  // count computed by SCEV then this is because the trip count variable
188  // has been widened so the types don't match, or because it is a constant and
189  // another transformation has changed the compare (e.g. icmp ult %inc,
190  // tripcount -> icmp ult %j, tripcount-1), or both.
191  Value *RHS = Compare->getOperand(1);
192  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
193  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
194  LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
195  return false;
196  }
197  const SCEV *SCEVTripCount = SE->getTripCountFromExitCount(BackedgeTakenCount);
198  const SCEV *SCEVRHS = SE->getSCEV(RHS);
199  if (SCEVRHS == SCEVTripCount)
200  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
201  ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
202  if (ConstantRHS) {
203  const SCEV *BackedgeTCExt = nullptr;
204  if (IsWidened) {
205  const SCEV *SCEVTripCountExt;
206  // Find the extended backedge taken count and extended trip count using
207  // SCEV. One of these should now match the RHS of the compare.
208  BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
209  SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt);
210  if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
211  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
212  return false;
213  }
214  }
215  // If the RHS of the compare is equal to the backedge taken count we need
216  // to add one to get the trip count.
217  if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
218  ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
219  Value *NewRHS = ConstantInt::get(
220  ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
221  return setLoopComponents(NewRHS, TripCount, Increment,
222  IterationInstructions);
223  }
224  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
225  }
226  // If the RHS isn't a constant then check that the reason it doesn't match
227  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
228  // (and take the trip count to be the RHS).
229  if (!IsWidened) {
230  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
231  return false;
232  }
233  auto *TripCountInst = dyn_cast<Instruction>(RHS);
234  if (!TripCountInst) {
235  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
236  return false;
237  }
238  if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
239  SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
240  LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
241  return false;
242  }
243  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
244 }
245 
246 static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
247  // All PHIs in the inner and outer headers must either be:
248  // - The induction PHI, which we are going to rewrite as one induction in
249  // the new loop. This is already checked by findLoopComponents.
250  // - An outer header PHI with all incoming values from outside the loop.
251  // LoopSimplify guarantees we have a pre-header, so we don't need to
252  // worry about that here.
253  // - Pairs of PHIs in the inner and outer headers, which implement a
254  // loop-carried dependency that will still be valid in the new loop. To
255  // be valid, this variable must be modified only in the inner loop.
256 
257  // The set of PHI nodes in the outer loop header that we know will still be
258  // valid after the transformation. These will not need to be modified (with
259  // the exception of the induction variable), but we do need to check that
260  // there are no unsafe PHI nodes.
261  SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
262  SafeOuterPHIs.insert(FI.OuterInductionPHI);
263 
264  // Check that all PHI nodes in the inner loop header match one of the valid
265  // patterns.
266  for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
267  // The induction PHIs break these rules, and that's OK because we treat
268  // them specially when doing the transformation.
269  if (&InnerPHI == FI.InnerInductionPHI)
270  continue;
271  if (FI.Widened && FI.OldInductionPHIs.count(&InnerPHI))
272  continue;
273 
274  // Each inner loop PHI node must have two incoming values/blocks - one
275  // from the pre-header, and one from the latch.
276  assert(InnerPHI.getNumIncomingValues() == 2);
277  Value *PreHeaderValue =
278  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
279  Value *LatchValue =
280  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
281 
282  // The incoming value from the outer loop must be the PHI node in the
283  // outer loop header, with no modifications made in the top of the outer
284  // loop.
285  PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
286  if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
287  LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
288  return false;
289  }
290 
291  // The other incoming value must come from the inner loop, without any
292  // modifications in the tail end of the outer loop. We are in LCSSA form,
293  // so this will actually be a PHI in the inner loop's exit block, which
294  // only uses values from inside the inner loop.
295  PHINode *LCSSAPHI = dyn_cast<PHINode>(
297  if (!LCSSAPHI) {
298  LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
299  return false;
300  }
301 
302  // The value used by the LCSSA PHI must be the same one that the inner
303  // loop's PHI uses.
304  if (LCSSAPHI->hasConstantValue() != LatchValue) {
305  LLVM_DEBUG(
306  dbgs() << "LCSSA PHI incoming value does not match latch value\n");
307  return false;
308  }
309 
310  LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
311  LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
312  LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
313  SafeOuterPHIs.insert(OuterPHI);
314  FI.InnerPHIsToTransform.insert(&InnerPHI);
315  }
316 
317  for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
318  if (FI.Widened && FI.OldInductionPHIs.count(&OuterPHI))
319  continue;
320  if (!SafeOuterPHIs.count(&OuterPHI)) {
321  LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
322  return false;
323  }
324  }
325 
326  LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
327  return true;
328 }
329 
330 static bool
332  SmallPtrSetImpl<Instruction *> &IterationInstructions,
333  const TargetTransformInfo *TTI) {
334  // Check for instructions in the outer but not inner loop. If any of these
335  // have side-effects then this transformation is not legal, and if there is
336  // a significant amount of code here which can't be optimised out that it's
337  // not profitable (as these instructions would get executed for each
338  // iteration of the inner loop).
339  InstructionCost RepeatedInstrCost = 0;
340  for (auto *B : FI.OuterLoop->getBlocks()) {
341  if (FI.InnerLoop->contains(B))
342  continue;
343 
344  for (auto &I : *B) {
345  if (!isa<PHINode>(&I) && !I.isTerminator() &&
347  LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
348  "side effects: ";
349  I.dump());
350  return false;
351  }
352  // The execution count of the outer loop's iteration instructions
353  // (increment, compare and branch) will be increased, but the
354  // equivalent instructions will be removed from the inner loop, so
355  // they make a net difference of zero.
356  if (IterationInstructions.count(&I))
357  continue;
358  // The uncoditional branch to the inner loop's header will turn into
359  // a fall-through, so adds no cost.
360  BranchInst *Br = dyn_cast<BranchInst>(&I);
361  if (Br && Br->isUnconditional() &&
362  Br->getSuccessor(0) == FI.InnerLoop->getHeader())
363  continue;
364  // Multiplies of the outer iteration variable and inner iteration
365  // count will be optimised out.
368  continue;
369  InstructionCost Cost =
371  LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
372  RepeatedInstrCost += Cost;
373  }
374  }
375 
376  LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
377  << RepeatedInstrCost << "\n");
378  // Bail out if flattening the loops would cause instructions in the outer
379  // loop but not in the inner loop to be executed extra times.
380  if (RepeatedInstrCost > RepeatedInstructionThreshold) {
381  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
382  return false;
383  }
384 
385  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
386  return true;
387 }
388 
389 static bool checkIVUsers(FlattenInfo &FI) {
390  // We require all uses of both induction variables to match this pattern:
391  //
392  // (OuterPHI * InnerTripCount) + InnerPHI
393  //
394  // Any uses of the induction variables not matching that pattern would
395  // require a div/mod to reconstruct in the flattened loop, so the
396  // transformation wouldn't be profitable.
397 
398  Value *InnerTripCount = FI.InnerTripCount;
399  if (FI.Widened &&
400  (isa<SExtInst>(InnerTripCount) || isa<ZExtInst>(InnerTripCount)))
401  InnerTripCount = cast<Instruction>(InnerTripCount)->getOperand(0);
402 
403  // Check that all uses of the inner loop's induction variable match the
404  // expected pattern, recording the uses of the outer IV.
405  SmallPtrSet<Value *, 4> ValidOuterPHIUses;
406  for (User *U : FI.InnerInductionPHI->users()) {
407  if (U == FI.InnerIncrement)
408  continue;
409 
410  // After widening the IVs, a trunc instruction might have been introduced,
411  // so look through truncs.
412  if (isa<TruncInst>(U)) {
413  if (!U->hasOneUse())
414  return false;
415  U = *U->user_begin();
416  }
417 
418  // If the use is in the compare (which is also the condition of the inner
419  // branch) then the compare has been altered by another transformation e.g
420  // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
421  // a constant. Ignore this use as the compare gets removed later anyway.
422  if (U == FI.InnerBranch->getCondition())
423  continue;
424 
425  LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
426 
427  Value *MatchedMul = nullptr;
428  Value *MatchedItCount = nullptr;
429  bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
430  m_Value(MatchedMul))) &&
431  match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
432  m_Value(MatchedItCount)));
433 
434  // Matches the same pattern as above, except it also looks for truncs
435  // on the phi, which can be the result of widening the induction variables.
436  bool IsAddTrunc =
438  m_Value(MatchedMul))) &&
440  m_Value(MatchedItCount)));
441 
442  if (!MatchedItCount)
443  return false;
444  // Look through extends if the IV has been widened.
445  if (FI.Widened &&
446  (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
447  assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
448  "Unexpected type mismatch in types after widening");
449  MatchedItCount = isa<SExtInst>(MatchedItCount)
450  ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
451  : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
452  }
453 
454  if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
455  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
456  ValidOuterPHIUses.insert(MatchedMul);
457  FI.LinearIVUses.insert(U);
458  } else {
459  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
460  return false;
461  }
462  }
463 
464  // Check that there are no uses of the outer IV other than the ones found
465  // as part of the pattern above.
466  for (User *U : FI.OuterInductionPHI->users()) {
467  if (U == FI.OuterIncrement)
468  continue;
469 
470  auto IsValidOuterPHIUses = [&] (User *U) -> bool {
471  LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
472  if (!ValidOuterPHIUses.count(U)) {
473  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
474  return false;
475  }
476  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
477  return true;
478  };
479 
480  if (auto *V = dyn_cast<TruncInst>(U)) {
481  for (auto *K : V->users()) {
482  if (!IsValidOuterPHIUses(K))
483  return false;
484  }
485  continue;
486  }
487 
488  if (!IsValidOuterPHIUses(U))
489  return false;
490  }
491 
492  LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
493  dbgs() << "Found " << FI.LinearIVUses.size()
494  << " value(s) that can be replaced:\n";
495  for (Value *V : FI.LinearIVUses) {
496  dbgs() << " ";
497  V->dump();
498  });
499  return true;
500 }
501 
502 // Return an OverflowResult dependant on if overflow of the multiplication of
503 // InnerTripCount and OuterTripCount can be assumed not to happen.
505  AssumptionCache *AC) {
506  Function *F = FI.OuterLoop->getHeader()->getParent();
507  const DataLayout &DL = F->getParent()->getDataLayout();
508 
509  // For debugging/testing.
510  if (AssumeNoOverflow)
512 
513  // Check if the multiply could not overflow due to known ranges of the
514  // input values.
516  FI.InnerTripCount, FI.OuterTripCount, DL, AC,
519  return OR;
520 
521  for (Value *V : FI.LinearIVUses) {
522  for (Value *U : V->users()) {
523  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
524  for (Value *GEPUser : U->users()) {
525  Instruction *GEPUserInst = dyn_cast<Instruction>(GEPUser);
526  if (!isa<LoadInst>(GEPUserInst) &&
527  !(isa<StoreInst>(GEPUserInst) &&
528  GEP == GEPUserInst->getOperand(1)))
529  continue;
531  FI.InnerLoop))
532  continue;
533  // The IV is used as the operand of a GEP which dominates the loop
534  // latch, and the IV is at least as wide as the address space of the
535  // GEP. In this case, the GEP would wrap around the address space
536  // before the IV increment wraps, which would be UB.
537  if (GEP->isInBounds() &&
538  V->getType()->getIntegerBitWidth() >=
539  DL.getPointerTypeSizeInBits(GEP->getType())) {
540  LLVM_DEBUG(
541  dbgs() << "use of linear IV would be UB if overflow occurred: ";
542  GEP->dump());
544  }
545  }
546  }
547  }
548  }
549 
551 }
552 
555  const TargetTransformInfo *TTI) {
556  SmallPtrSet<Instruction *, 8> IterationInstructions;
557  if (!findLoopComponents(FI.InnerLoop, IterationInstructions,
559  FI.InnerIncrement, FI.InnerBranch, SE, FI.Widened))
560  return false;
561  if (!findLoopComponents(FI.OuterLoop, IterationInstructions,
563  FI.OuterIncrement, FI.OuterBranch, SE, FI.Widened))
564  return false;
565 
566  // Both of the loop trip count values must be invariant in the outer loop
567  // (non-instructions are all inherently invariant).
569  LLVM_DEBUG(dbgs() << "inner loop trip count not invariant\n");
570  return false;
571  }
573  LLVM_DEBUG(dbgs() << "outer loop trip count not invariant\n");
574  return false;
575  }
576 
577  if (!checkPHIs(FI, TTI))
578  return false;
579 
580  // FIXME: it should be possible to handle different types correctly.
582  return false;
583 
584  if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
585  return false;
586 
587  // Find the values in the loop that can be replaced with the linearized
588  // induction variable, and check that there are no other uses of the inner
589  // or outer induction variable. If there were, we could still do this
590  // transformation, but we'd have to insert a div/mod to calculate the
591  // original IVs, so it wouldn't be profitable.
592  if (!checkIVUsers(FI))
593  return false;
594 
595  LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
596  return true;
597 }
598 
601  const TargetTransformInfo *TTI) {
602  Function *F = FI.OuterLoop->getHeader()->getParent();
603  LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
604  {
605  using namespace ore;
607  FI.InnerLoop->getHeader());
609  Remark << "Flattened into outer loop";
610  ORE.emit(Remark);
611  }
612 
613  Value *NewTripCount = BinaryOperator::CreateMul(
614  FI.InnerTripCount, FI.OuterTripCount, "flatten.tripcount",
616  LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
617  NewTripCount->dump());
618 
619  // Fix up PHI nodes that take values from the inner loop back-edge, which
620  // we are about to remove.
622 
623  // The old Phi will be optimised away later, but for now we can't leave
624  // leave it in an invalid state, so are updating them too.
625  for (PHINode *PHI : FI.InnerPHIsToTransform)
627 
628  // Modify the trip count of the outer loop to be the product of the two
629  // trip counts.
630  cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
631 
632  // Replace the inner loop backedge with an unconditional branch to the exit.
633  BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
634  BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
635  InnerExitingBlock->getTerminator()->eraseFromParent();
636  BranchInst::Create(InnerExitBlock, InnerExitingBlock);
637  DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
638 
639  // Replace all uses of the polynomial calculated from the two induction
640  // variables with the one new one.
642  for (Value *V : FI.LinearIVUses) {
643  Value *OuterValue = FI.OuterInductionPHI;
644  if (FI.Widened)
645  OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
646  "flatten.trunciv");
647 
648  LLVM_DEBUG(dbgs() << "Replacing: "; V->dump();
649  dbgs() << "with: "; OuterValue->dump());
650  V->replaceAllUsesWith(OuterValue);
651  }
652 
653  // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
654  // deleted, and any information that have about the outer loop invalidated.
655  SE->forgetLoop(FI.OuterLoop);
656  SE->forgetLoop(FI.InnerLoop);
657  LI->erase(FI.InnerLoop);
658 
659  // Increment statistic value.
660  NumFlattened++;
661 
662  return true;
663 }
664 
665 static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
667  const TargetTransformInfo *TTI) {
668  if (!WidenIV) {
669  LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
670  return false;
671  }
672 
673  LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
675  auto &DL = M->getDataLayout();
676  auto *InnerType = FI.InnerInductionPHI->getType();
677  auto *OuterType = FI.OuterInductionPHI->getType();
678  unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
679  auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
680 
681  // If both induction types are less than the maximum legal integer width,
682  // promote both to the widest type available so we know calculating
683  // (OuterTripCount * InnerTripCount) as the new trip count is safe.
684  if (InnerType != OuterType ||
685  InnerType->getScalarSizeInBits() >= MaxLegalSize ||
686  MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) {
687  LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
688  return false;
689  }
690 
691  SCEVExpander Rewriter(*SE, DL, "loopflatten");
693  unsigned ElimExt = 0;
694  unsigned Widened = 0;
695 
696  auto CreateWideIV = [&] (WideIVInfo WideIV, bool &Deleted) -> bool {
697  PHINode *WidePhi = createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts,
698  ElimExt, Widened, true /* HasGuards */,
699  true /* UsePostIncrementRanges */);
700  if (!WidePhi)
701  return false;
702  LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
703  LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
705  return true;
706  };
707 
708  bool Deleted;
709  if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted))
710  return false;
711  // If the inner Phi node cannot be trivially deleted, we need to at least
712  // bring it in a consistent state.
713  if (!Deleted)
715  if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted))
716  return false;
717 
718  assert(Widened && "Widened IV expected");
719  FI.Widened = true;
720 
721  // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
724 
725  // After widening, rediscover all the loop components.
726  return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
727 }
728 
731  const TargetTransformInfo *TTI) {
732  LLVM_DEBUG(
733  dbgs() << "Loop flattening running on outer loop "
734  << FI.OuterLoop->getHeader()->getName() << " and inner loop "
735  << FI.InnerLoop->getHeader()->getName() << " in "
736  << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
737 
738  if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
739  return false;
740 
741  // Check if we can widen the induction variables to avoid overflow checks.
742  if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
743  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
744 
745  // Check if the new iteration variable might overflow. In this case, we
746  // need to version the loop, and select the original version at runtime if
747  // the iteration space is too large.
748  // TODO: We currently don't version the loop.
749  OverflowResult OR = checkOverflow(FI, DT, AC);
752  LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
753  return false;
754  } else if (OR == OverflowResult::MayOverflow) {
755  LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
756  return false;
757  }
758 
759  LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
760  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
761 }
762 
765  bool Changed = false;
766  for (Loop *InnerLoop : LN.getLoops()) {
767  auto *OuterLoop = InnerLoop->getParentLoop();
768  if (!OuterLoop)
769  continue;
770  FlattenInfo FI(OuterLoop, InnerLoop);
771  Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
772  }
773  return Changed;
774 }
775 
778  LPMUpdater &U) {
779 
780  bool Changed = false;
781 
782  // The loop flattening pass requires loops to be
783  // in simplified form, and also needs LCSSA. Running
784  // this pass will simplify all loops that contain inner loops,
785  // regardless of whether anything ends up being flattened.
786  Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI);
787 
788  if (!Changed)
789  return PreservedAnalyses::all();
790 
791  return PreservedAnalyses::none();
792 }
793 
794 namespace {
795 class LoopFlattenLegacyPass : public FunctionPass {
796 public:
797  static char ID; // Pass ID, replacement for typeid
798  LoopFlattenLegacyPass() : FunctionPass(ID) {
800  }
801 
802  // Possibly flatten loop L into its child.
803  bool runOnFunction(Function &F) override;
804 
805  void getAnalysisUsage(AnalysisUsage &AU) const override {
811  }
812 };
813 } // namespace
814 
816 INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
817  false, false)
820 INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
822 
823 FunctionPass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); }
824 
826  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
827  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
828  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
829  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
830  auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
831  auto *TTI = &TTIP.getTTI(F);
832  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
833  bool Changed = false;
834  for (Loop *L : *LI) {
835  auto LN = LoopNest::getLoopNest(*L, *SE);
836  Changed |= Flatten(*LN, DT, LI, SE, AC, TTI);
837  }
838  return Changed;
839 }
checkOuterLoopInsts
static bool checkOuterLoopInsts(FlattenInfo &FI, SmallPtrSetImpl< Instruction * > &IterationInstructions, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:331
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", false, false) INITIALIZE_PASS_END(LoopFlattenLegacyPass
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::createWideIV
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Definition: SimplifyIndVar.cpp:2042
llvm::Loop::getLatchCmpInst
ICmpInst * getLatchCmpInst() const
Get the latch condition instruction.
Definition: LoopInfo.cpp:174
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
llvm::OverflowResult::NeverOverflows
@ NeverOverflows
Never overflows.
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4610
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::computeOverflowForUnsignedMul
OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo=true)
Definition: ValueTracking.cpp:4728
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
Scalar.h
flatten
loop flatten
Definition: LoopFlatten.cpp:820
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4759
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
DoFlattenLoopPair
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:599
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
FlattenInfo::InnerInductionPHI
PHINode * InnerInductionPHI
Definition: LoopFlatten.cpp:83
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::PHINode::removeIncomingValue
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Definition: Instructions.cpp:113
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
checkPHIs
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:246
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
FlattenInfo::OuterIncrement
BinaryOperator * OuterIncrement
Definition: LoopFlatten.cpp:88
LoopFlatten.h
ScalarEvolution.h
llvm::OverflowResult::AlwaysOverflowsLow
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
Module.h
findLoopComponents
static bool findLoopComponents(Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
Definition: LoopFlatten.cpp:118
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::Loop::isCanonical
bool isCanonical(ScalarEvolution &SE) const
Return true if the loop induction variable starts at zero and increments by one each time through the...
Definition: LoopInfo.cpp:410
FlattenInfo::OuterInductionPHI
PHINode * OuterInductionPHI
Definition: LoopFlatten.cpp:84
llvm::SmallPtrSet< Value *, 4 >
FlattenInfo::OuterLoop
Loop * OuterLoop
Definition: LoopFlatten.cpp:79
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopFlatten.cpp:57
FlattenInfo::LinearIVUses
SmallPtrSet< Value *, 4 > LinearIVUses
Definition: LoopFlatten.cpp:91
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
FlattenInfo::OuterBranch
BranchInst * OuterBranch
Definition: LoopFlatten.cpp:90
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::WideIVInfo
Collect information about induction variables that are used by sign/zero extend operations.
Definition: SimplifyIndVar.h:65
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RecursivelyDeleteDeadPHINode
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:621
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LoopDeletionResult::Deleted
@ Deleted
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
FlattenInfo::Widened
bool Widened
Definition: LoopFlatten.cpp:95
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::OverflowResult::MayOverflow
@ MayOverflow
May or may not overflow.
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::PatternMatch::m_c_Add
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
Definition: PatternMatch.h:2228
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2818
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:45
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::WideIVInfo::NarrowIV
PHINode * NarrowIV
Definition: SimplifyIndVar.h:66
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
LoopUtils.h
FlattenInfo::InnerBranch
BranchInst * InnerBranch
Definition: LoopFlatten.cpp:89
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
PatternMatch.h
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:866
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
FlattenInfo::OldInductionPHIs
SmallPtrSet< PHINode *, 2 > OldInductionPHIs
Definition: LoopFlatten.cpp:100
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::OverflowResult
OverflowResult
Definition: ValueTracking.h:487
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3149
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4066
llvm::HighlightColor::Remark
@ Remark
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
FlattenInfo::OuterTripCount
Value * OuterTripCount
Definition: LoopFlatten.cpp:86
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
setLoopComponents
static bool setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment, SmallPtrSetImpl< Instruction * > &IterationInstructions)
Definition: LoopFlatten.cpp:106
llvm::isGuaranteedToExecuteForEveryIteration
bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L)
Return true if this function can prove that the instruction I is executed for every iteration of the ...
Definition: ValueTracking.cpp:5334
llvm::PHINode::hasConstantValue
Value * hasConstantValue() const
If the specified PHI node always merges together the same value, return the value,...
Definition: Instructions.cpp:152
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1203
CanWidenIV
static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:665
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:249
AssumeNoOverflow
static cl::opt< bool > AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, cl::init(false), cl::desc("Assume that the product of the two iteration " "trip counts will never overflow"))
RepeatedInstructionThreshold
static cl::opt< unsigned > RepeatedInstructionThreshold("loop-flatten-cost-threshold", cl::Hidden, cl::init(2), cl::desc("Limit on the cost of instructions that can be repeated due to " "loop flattening"))
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3124
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
checkIVUsers
static bool checkIVUsers(FlattenInfo &FI)
Definition: LoopFlatten.cpp:389
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
FlattenInfo::InnerIncrement
BinaryOperator * InnerIncrement
Definition: LoopFlatten.cpp:87
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopInfo::erase
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:878
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
FlattenInfo::FlattenInfo
FlattenInfo(Loop *OL, Loop *IL)
Definition: LoopFlatten.cpp:102
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3146
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
checkOverflow
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, AssumptionCache *AC)
Definition: LoopFlatten.cpp:504
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::BinaryOperator
Definition: InstrTypes.h:189
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:990
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
FlattenInfo::InnerTripCount
Value * InnerTripCount
Definition: LoopFlatten.cpp:85
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
WidenIV
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
Verifier.h
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7452
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
FlattenInfo::InnerPHIsToTransform
SmallPtrSet< PHINode *, 4 > InnerPHIsToTransform
Definition: LoopFlatten.cpp:92
llvm::ScalarEvolution::getTripCountFromExitCount
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
Convert from an "exit count" (i.e.
Definition: ScalarEvolution.cpp:7159
Function.h
llvm::initializeLoopFlattenLegacyPassPass
void initializeLoopFlattenLegacyPassPass(PassRegistry &)
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::OverflowResult::AlwaysOverflowsHigh
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
llvm::PatternMatch::m_c_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
Definition: PatternMatch.h:2235
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:109
WidenIV
Definition: SimplifyIndVar.cpp:950
CanFlattenLoopPair
static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:553
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1577
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
FlattenLoopPair
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:729
Dominators.h
CreateMul
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:246
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
FlattenInfo
Definition: LoopFlatten.cpp:78
TargetTransformInfo.h
llvm::createLoopFlattenPass
FunctionPass * createLoopFlattenPass()
Definition: LoopFlatten.cpp:823
llvm::PHINode
Definition: Instructions.h:2633
llvm::PatternMatch
Definition: PatternMatch.h:47
Flatten
bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:763
llvm::SmallPtrSetImpl< Instruction * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::LoopFlattenPass::run
PreservedAnalyses run(LoopNest &LN, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopFlatten.cpp:776
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::Loop::getInductionVariable
PHINode * getInductionVariable(ScalarEvolution &SE) const
Return the loop induction variable if found, else return nullptr.
Definition: LoopInfo.cpp:294
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
llvm::LoopNest::getLoopNest
static std::unique_ptr< LoopNest > getLoopNest(Loop &Root, ScalarEvolution &SE)
Construct a LoopNest object.
Definition: LoopNestAnalysis.cpp:48
raw_ostream.h
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1621
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7284
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
FlattenInfo::InnerLoop
Loop * InnerLoop
Definition: LoopFlatten.cpp:80
loops
loop Flattens loops
Definition: LoopFlatten.cpp:820
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3161
SimplifyIndVar.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37