LLVM  9.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/Pass.h"
71 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/Compiler.h"
74 #include "llvm/Support/Debug.h"
78 #include "llvm/Transforms/Scalar.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <utility>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened , "Number of indvars widened");
92 STATISTIC(NumReplaced , "Number of exit values replaced");
93 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
101  "verify-indvars", cl::Hidden,
102  cl::desc("Verify the ScalarEvolution result after running indvars"));
103 
105 
107  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110  clEnumValN(OnlyCheapRepl, "cheap",
111  "only replace exit value when the cost is cheap"),
112  clEnumValN(AlwaysRepl, "always",
113  "always replace exit value whenever possible")));
114 
116  "indvars-post-increment-ranges", cl::Hidden,
117  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
118  cl::init(true));
119 
120 static cl::opt<bool>
121 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
122  cl::desc("Disable Linear Function Test Replace optimization"));
123 
124 namespace {
125 
126 struct RewritePhi;
127 
128 class IndVarSimplify {
129  LoopInfo *LI;
130  ScalarEvolution *SE;
131  DominatorTree *DT;
132  const DataLayout &DL;
133  TargetLibraryInfo *TLI;
134  const TargetTransformInfo *TTI;
135 
137 
138  bool isValidRewrite(Value *FromVal, Value *ToVal);
139 
140  bool handleFloatingPointIV(Loop *L, PHINode *PH);
141  bool rewriteNonIntegerIVs(Loop *L);
142 
143  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144 
145  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
146  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
147  bool rewriteFirstIterationLoopExitValues(Loop *L);
148  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
149 
150  bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
151  PHINode *IndVar, SCEVExpander &Rewriter);
152 
153  bool sinkUnusedInvariants(Loop *L);
154 
155 public:
156  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
157  const DataLayout &DL, TargetLibraryInfo *TLI,
158  TargetTransformInfo *TTI)
159  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
160 
161  bool run(Loop *L);
162 };
163 
164 } // end anonymous namespace
165 
166 /// Return true if the SCEV expansion generated by the rewriter can replace the
167 /// original value. SCEV guarantees that it produces the same value, but the way
168 /// it is produced may be illegal IR. Ideally, this function will only be
169 /// called for verification.
170 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
171  // If an SCEV expression subsumed multiple pointers, its expansion could
172  // reassociate the GEP changing the base pointer. This is illegal because the
173  // final address produced by a GEP chain must be inbounds relative to its
174  // underlying object. Otherwise basic alias analysis, among other things,
175  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
176  // producing an expression involving multiple pointers. Until then, we must
177  // bail out here.
178  //
179  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
180  // because it understands lcssa phis while SCEV does not.
181  Value *FromPtr = FromVal;
182  Value *ToPtr = ToVal;
183  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
184  FromPtr = GEP->getPointerOperand();
185  }
186  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
187  ToPtr = GEP->getPointerOperand();
188  }
189  if (FromPtr != FromVal || ToPtr != ToVal) {
190  // Quickly check the common case
191  if (FromPtr == ToPtr)
192  return true;
193 
194  // SCEV may have rewritten an expression that produces the GEP's pointer
195  // operand. That's ok as long as the pointer operand has the same base
196  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
197  // base of a recurrence. This handles the case in which SCEV expansion
198  // converts a pointer type recurrence into a nonrecurrent pointer base
199  // indexed by an integer recurrence.
200 
201  // If the GEP base pointer is a vector of pointers, abort.
202  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
203  return false;
204 
205  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
206  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
207  if (FromBase == ToBase)
208  return true;
209 
210  LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
211  << " != " << *ToBase << "\n");
212 
213  return false;
214  }
215  return true;
216 }
217 
218 /// Determine the insertion point for this user. By default, insert immediately
219 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
220 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
221 /// common dominator for the incoming blocks.
223  DominatorTree *DT, LoopInfo *LI) {
224  PHINode *PHI = dyn_cast<PHINode>(User);
225  if (!PHI)
226  return User;
227 
228  Instruction *InsertPt = nullptr;
229  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
230  if (PHI->getIncomingValue(i) != Def)
231  continue;
232 
233  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
234  if (!InsertPt) {
235  InsertPt = InsertBB->getTerminator();
236  continue;
237  }
238  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
239  InsertPt = InsertBB->getTerminator();
240  }
241  assert(InsertPt && "Missing phi operand");
242 
243  auto *DefI = dyn_cast<Instruction>(Def);
244  if (!DefI)
245  return InsertPt;
246 
247  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
248 
249  auto *L = LI->getLoopFor(DefI->getParent());
250  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
251 
252  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
253  if (LI->getLoopFor(DTN->getBlock()) == L)
254  return DTN->getBlock()->getTerminator();
255 
256  llvm_unreachable("DefI dominates InsertPt!");
257 }
258 
259 //===----------------------------------------------------------------------===//
260 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
261 //===----------------------------------------------------------------------===//
262 
263 /// Convert APF to an integer, if possible.
264 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
265  bool isExact = false;
266  // See if we can convert this to an int64_t
267  uint64_t UIntVal;
268  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
269  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
270  !isExact)
271  return false;
272  IntVal = UIntVal;
273  return true;
274 }
275 
276 /// If the loop has floating induction variable then insert corresponding
277 /// integer induction variable if possible.
278 /// For example,
279 /// for(double i = 0; i < 10000; ++i)
280 /// bar(i)
281 /// is converted into
282 /// for(int i = 0; i < 10000; ++i)
283 /// bar((double)i);
284 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
285  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
286  unsigned BackEdge = IncomingEdge^1;
287 
288  // Check incoming value.
289  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
290 
291  int64_t InitValue;
292  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
293  return false;
294 
295  // Check IV increment. Reject this PN if increment operation is not
296  // an add or increment value can not be represented by an integer.
297  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
298  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
299 
300  // If this is not an add of the PHI with a constantfp, or if the constant fp
301  // is not an integer, bail out.
302  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
303  int64_t IncValue;
304  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
305  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
306  return false;
307 
308  // Check Incr uses. One user is PN and the other user is an exit condition
309  // used by the conditional terminator.
310  Value::user_iterator IncrUse = Incr->user_begin();
311  Instruction *U1 = cast<Instruction>(*IncrUse++);
312  if (IncrUse == Incr->user_end()) return false;
313  Instruction *U2 = cast<Instruction>(*IncrUse++);
314  if (IncrUse != Incr->user_end()) return false;
315 
316  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
317  // only used by a branch, we can't transform it.
319  if (!Compare)
320  Compare = dyn_cast<FCmpInst>(U2);
321  if (!Compare || !Compare->hasOneUse() ||
322  !isa<BranchInst>(Compare->user_back()))
323  return false;
324 
325  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
326 
327  // We need to verify that the branch actually controls the iteration count
328  // of the loop. If not, the new IV can overflow and no one will notice.
329  // The branch block must be in the loop and one of the successors must be out
330  // of the loop.
331  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
332  if (!L->contains(TheBr->getParent()) ||
333  (L->contains(TheBr->getSuccessor(0)) &&
334  L->contains(TheBr->getSuccessor(1))))
335  return false;
336 
337  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
338  // transform it.
339  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
340  int64_t ExitValue;
341  if (ExitValueVal == nullptr ||
342  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
343  return false;
344 
345  // Find new predicate for integer comparison.
347  switch (Compare->getPredicate()) {
348  default: return false; // Unknown comparison.
349  case CmpInst::FCMP_OEQ:
350  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
351  case CmpInst::FCMP_ONE:
352  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
353  case CmpInst::FCMP_OGT:
354  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
355  case CmpInst::FCMP_OGE:
356  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
357  case CmpInst::FCMP_OLT:
358  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
359  case CmpInst::FCMP_OLE:
360  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
361  }
362 
363  // We convert the floating point induction variable to a signed i32 value if
364  // we can. This is only safe if the comparison will not overflow in a way
365  // that won't be trapped by the integer equivalent operations. Check for this
366  // now.
367  // TODO: We could use i64 if it is native and the range requires it.
368 
369  // The start/stride/exit values must all fit in signed i32.
370  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
371  return false;
372 
373  // If not actually striding (add x, 0.0), avoid touching the code.
374  if (IncValue == 0)
375  return false;
376 
377  // Positive and negative strides have different safety conditions.
378  if (IncValue > 0) {
379  // If we have a positive stride, we require the init to be less than the
380  // exit value.
381  if (InitValue >= ExitValue)
382  return false;
383 
384  uint32_t Range = uint32_t(ExitValue-InitValue);
385  // Check for infinite loop, either:
386  // while (i <= Exit) or until (i > Exit)
387  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
388  if (++Range == 0) return false; // Range overflows.
389  }
390 
391  unsigned Leftover = Range % uint32_t(IncValue);
392 
393  // If this is an equality comparison, we require that the strided value
394  // exactly land on the exit value, otherwise the IV condition will wrap
395  // around and do things the fp IV wouldn't.
396  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
397  Leftover != 0)
398  return false;
399 
400  // If the stride would wrap around the i32 before exiting, we can't
401  // transform the IV.
402  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
403  return false;
404  } else {
405  // If we have a negative stride, we require the init to be greater than the
406  // exit value.
407  if (InitValue <= ExitValue)
408  return false;
409 
410  uint32_t Range = uint32_t(InitValue-ExitValue);
411  // Check for infinite loop, either:
412  // while (i >= Exit) or until (i < Exit)
413  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
414  if (++Range == 0) return false; // Range overflows.
415  }
416 
417  unsigned Leftover = Range % uint32_t(-IncValue);
418 
419  // If this is an equality comparison, we require that the strided value
420  // exactly land on the exit value, otherwise the IV condition will wrap
421  // around and do things the fp IV wouldn't.
422  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
423  Leftover != 0)
424  return false;
425 
426  // If the stride would wrap around the i32 before exiting, we can't
427  // transform the IV.
428  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
429  return false;
430  }
431 
433 
434  // Insert new integer induction variable.
435  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
436  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
437  PN->getIncomingBlock(IncomingEdge));
438 
439  Value *NewAdd =
440  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
441  Incr->getName()+".int", Incr);
442  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
443 
444  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
445  ConstantInt::get(Int32Ty, ExitValue),
446  Compare->getName());
447 
448  // In the following deletions, PN may become dead and may be deleted.
449  // Use a WeakTrackingVH to observe whether this happens.
450  WeakTrackingVH WeakPH = PN;
451 
452  // Delete the old floating point exit comparison. The branch starts using the
453  // new comparison.
454  NewCompare->takeName(Compare);
455  Compare->replaceAllUsesWith(NewCompare);
457 
458  // Delete the old floating point increment.
459  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
461 
462  // If the FP induction variable still has uses, this is because something else
463  // in the loop uses its value. In order to canonicalize the induction
464  // variable, we chose to eliminate the IV and rewrite it in terms of an
465  // int->fp cast.
466  //
467  // We give preference to sitofp over uitofp because it is faster on most
468  // platforms.
469  if (WeakPH) {
470  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
471  &*PN->getParent()->getFirstInsertionPt());
472  PN->replaceAllUsesWith(Conv);
474  }
475  return true;
476 }
477 
478 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
479  // First step. Check to see if there are any floating-point recurrences.
480  // If there are, change them into integer recurrences, permitting analysis by
481  // the SCEV routines.
482  BasicBlock *Header = L->getHeader();
483 
485  for (PHINode &PN : Header->phis())
486  PHIs.push_back(&PN);
487 
488  bool Changed = false;
489  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
490  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
491  Changed |= handleFloatingPointIV(L, PN);
492 
493  // If the loop previously had floating-point IV, ScalarEvolution
494  // may not have been able to compute a trip count. Now that we've done some
495  // re-writing, the trip count may be computable.
496  if (Changed)
497  SE->forgetLoop(L);
498  return Changed;
499 }
500 
501 namespace {
502 
503 // Collect information about PHI nodes which can be transformed in
504 // rewriteLoopExitValues.
505 struct RewritePhi {
506  PHINode *PN;
507 
508  // Ith incoming value.
509  unsigned Ith;
510 
511  // Exit value after expansion.
512  Value *Val;
513 
514  // High Cost when expansion.
515  bool HighCost;
516 
517  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
518  : PN(P), Ith(I), Val(V), HighCost(H) {}
519 };
520 
521 } // end anonymous namespace
522 
523 //===----------------------------------------------------------------------===//
524 // rewriteLoopExitValues - Optimize IV users outside the loop.
525 // As a side effect, reduces the amount of IV processing within the loop.
526 //===----------------------------------------------------------------------===//
527 
528 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
531  Visited.insert(I);
532  WorkList.push_back(I);
533  while (!WorkList.empty()) {
534  const Instruction *Curr = WorkList.pop_back_val();
535  // This use is outside the loop, nothing to do.
536  if (!L->contains(Curr))
537  continue;
538  // Do we assume it is a "hard" use which will not be eliminated easily?
539  if (Curr->mayHaveSideEffects())
540  return true;
541  // Otherwise, add all its users to worklist.
542  for (auto U : Curr->users()) {
543  auto *UI = cast<Instruction>(U);
544  if (Visited.insert(UI).second)
545  WorkList.push_back(UI);
546  }
547  }
548  return false;
549 }
550 
551 /// Check to see if this loop has a computable loop-invariant execution count.
552 /// If so, this means that we can compute the final value of any expressions
553 /// that are recurrent in the loop, and substitute the exit values from the loop
554 /// into any instructions outside of the loop that use the final values of the
555 /// current expressions.
556 ///
557 /// This is mostly redundant with the regular IndVarSimplify activities that
558 /// happen later, except that it's more powerful in some cases, because it's
559 /// able to brute-force evaluate arbitrary instructions as long as they have
560 /// constant operands at the beginning of the loop.
561 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
562  // Check a pre-condition.
563  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
564  "Indvars did not preserve LCSSA!");
565 
566  SmallVector<BasicBlock*, 8> ExitBlocks;
567  L->getUniqueExitBlocks(ExitBlocks);
568 
569  SmallVector<RewritePhi, 8> RewritePhiSet;
570  // Find all values that are computed inside the loop, but used outside of it.
571  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
572  // the exit blocks of the loop to find them.
573  for (BasicBlock *ExitBB : ExitBlocks) {
574  // If there are no PHI nodes in this exit block, then no values defined
575  // inside the loop are used on this path, skip it.
576  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
577  if (!PN) continue;
578 
579  unsigned NumPreds = PN->getNumIncomingValues();
580 
581  // Iterate over all of the PHI nodes.
582  BasicBlock::iterator BBI = ExitBB->begin();
583  while ((PN = dyn_cast<PHINode>(BBI++))) {
584  if (PN->use_empty())
585  continue; // dead use, don't replace it
586 
587  if (!SE->isSCEVable(PN->getType()))
588  continue;
589 
590  // It's necessary to tell ScalarEvolution about this explicitly so that
591  // it can walk the def-use list and forget all SCEVs, as it may not be
592  // watching the PHI itself. Once the new exit value is in place, there
593  // may not be a def-use connection between the loop and every instruction
594  // which got a SCEVAddRecExpr for that loop.
595  SE->forgetValue(PN);
596 
597  // Iterate over all of the values in all the PHI nodes.
598  for (unsigned i = 0; i != NumPreds; ++i) {
599  // If the value being merged in is not integer or is not defined
600  // in the loop, skip it.
601  Value *InVal = PN->getIncomingValue(i);
602  if (!isa<Instruction>(InVal))
603  continue;
604 
605  // If this pred is for a subloop, not L itself, skip it.
606  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
607  continue; // The Block is in a subloop, skip it.
608 
609  // Check that InVal is defined in the loop.
610  Instruction *Inst = cast<Instruction>(InVal);
611  if (!L->contains(Inst))
612  continue;
613 
614  // Okay, this instruction has a user outside of the current loop
615  // and varies predictably *inside* the loop. Evaluate the value it
616  // contains when the loop exits, if possible.
617  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
618  if (!SE->isLoopInvariant(ExitValue, L) ||
619  !isSafeToExpand(ExitValue, *SE))
620  continue;
621 
622  // Computing the value outside of the loop brings no benefit if it is
623  // definitely used inside the loop in a way which can not be optimized
624  // away.
625  if (!isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
626  continue;
627 
628  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
629  Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
630 
631  LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
632  << '\n'
633  << " LoopVal = " << *Inst << "\n");
634 
635  if (!isValidRewrite(Inst, ExitVal)) {
636  DeadInsts.push_back(ExitVal);
637  continue;
638  }
639 
640 #ifndef NDEBUG
641  // If we reuse an instruction from a loop which is neither L nor one of
642  // its containing loops, we end up breaking LCSSA form for this loop by
643  // creating a new use of its instruction.
644  if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
645  if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
646  if (EVL != L)
647  assert(EVL->contains(L) && "LCSSA breach detected!");
648 #endif
649 
650  // Collect all the candidate PHINodes to be rewritten.
651  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
652  }
653  }
654  }
655 
656  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
657 
658  bool Changed = false;
659  // Transformation.
660  for (const RewritePhi &Phi : RewritePhiSet) {
661  PHINode *PN = Phi.PN;
662  Value *ExitVal = Phi.Val;
663 
664  // Only do the rewrite when the ExitValue can be expanded cheaply.
665  // If LoopCanBeDel is true, rewrite exit value aggressively.
666  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
667  DeadInsts.push_back(ExitVal);
668  continue;
669  }
670 
671  Changed = true;
672  ++NumReplaced;
673  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
674  PN->setIncomingValue(Phi.Ith, ExitVal);
675 
676  // If this instruction is dead now, delete it. Don't do it now to avoid
677  // invalidating iterators.
678  if (isInstructionTriviallyDead(Inst, TLI))
679  DeadInsts.push_back(Inst);
680 
681  // Replace PN with ExitVal if that is legal and does not break LCSSA.
682  if (PN->getNumIncomingValues() == 1 &&
683  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
684  PN->replaceAllUsesWith(ExitVal);
685  PN->eraseFromParent();
686  }
687  }
688 
689  // The insertion point instruction may have been deleted; clear it out
690  // so that the rewriter doesn't trip over it later.
691  Rewriter.clearInsertPoint();
692  return Changed;
693 }
694 
695 //===---------------------------------------------------------------------===//
696 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
697 // they will exit at the first iteration.
698 //===---------------------------------------------------------------------===//
699 
700 /// Check to see if this loop has loop invariant conditions which lead to loop
701 /// exits. If so, we know that if the exit path is taken, it is at the first
702 /// loop iteration. This lets us predict exit values of PHI nodes that live in
703 /// loop header.
704 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
705  // Verify the input to the pass is already in LCSSA form.
706  assert(L->isLCSSAForm(*DT));
707 
708  SmallVector<BasicBlock *, 8> ExitBlocks;
709  L->getUniqueExitBlocks(ExitBlocks);
710  auto *LoopHeader = L->getHeader();
711  assert(LoopHeader && "Invalid loop");
712 
713  bool MadeAnyChanges = false;
714  for (auto *ExitBB : ExitBlocks) {
715  // If there are no more PHI nodes in this exit block, then no more
716  // values defined inside the loop are used on this path.
717  for (PHINode &PN : ExitBB->phis()) {
718  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
719  IncomingValIdx != E; ++IncomingValIdx) {
720  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
721 
722  // We currently only support loop exits from loop header. If the
723  // incoming block is not loop header, we need to recursively check
724  // all conditions starting from loop header are loop invariants.
725  // Additional support might be added in the future.
726  if (IncomingBB != LoopHeader)
727  continue;
728 
729  // Get condition that leads to the exit path.
730  auto *TermInst = IncomingBB->getTerminator();
731 
732  Value *Cond = nullptr;
733  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
734  // Must be a conditional branch, otherwise the block
735  // should not be in the loop.
736  Cond = BI->getCondition();
737  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
738  Cond = SI->getCondition();
739  else
740  continue;
741 
742  if (!L->isLoopInvariant(Cond))
743  continue;
744 
745  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
746 
747  // Only deal with PHIs.
748  if (!ExitVal)
749  continue;
750 
751  // If ExitVal is a PHI on the loop header, then we know its
752  // value along this exit because the exit can only be taken
753  // on the first iteration.
754  auto *LoopPreheader = L->getLoopPreheader();
755  assert(LoopPreheader && "Invalid loop");
756  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
757  if (PreheaderIdx != -1) {
758  assert(ExitVal->getParent() == LoopHeader &&
759  "ExitVal must be in loop header");
760  MadeAnyChanges = true;
761  PN.setIncomingValue(IncomingValIdx,
762  ExitVal->getIncomingValue(PreheaderIdx));
763  }
764  }
765  }
766  }
767  return MadeAnyChanges;
768 }
769 
770 /// Check whether it is possible to delete the loop after rewriting exit
771 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
772 /// aggressively.
773 bool IndVarSimplify::canLoopBeDeleted(
774  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
775  BasicBlock *Preheader = L->getLoopPreheader();
776  // If there is no preheader, the loop will not be deleted.
777  if (!Preheader)
778  return false;
779 
780  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
781  // We obviate multiple ExitingBlocks case for simplicity.
782  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
783  // after exit value rewriting, we can enhance the logic here.
784  SmallVector<BasicBlock *, 4> ExitingBlocks;
785  L->getExitingBlocks(ExitingBlocks);
786  SmallVector<BasicBlock *, 8> ExitBlocks;
787  L->getUniqueExitBlocks(ExitBlocks);
788  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
789  return false;
790 
791  BasicBlock *ExitBlock = ExitBlocks[0];
792  BasicBlock::iterator BI = ExitBlock->begin();
793  while (PHINode *P = dyn_cast<PHINode>(BI)) {
794  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
795 
796  // If the Incoming value of P is found in RewritePhiSet, we know it
797  // could be rewritten to use a loop invariant value in transformation
798  // phase later. Skip it in the loop invariant check below.
799  bool found = false;
800  for (const RewritePhi &Phi : RewritePhiSet) {
801  unsigned i = Phi.Ith;
802  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
803  found = true;
804  break;
805  }
806  }
807 
808  Instruction *I;
809  if (!found && (I = dyn_cast<Instruction>(Incoming)))
810  if (!L->hasLoopInvariantOperands(I))
811  return false;
812 
813  ++BI;
814  }
815 
816  for (auto *BB : L->blocks())
817  if (llvm::any_of(*BB, [](Instruction &I) {
818  return I.mayHaveSideEffects();
819  }))
820  return false;
821 
822  return true;
823 }
824 
825 //===----------------------------------------------------------------------===//
826 // IV Widening - Extend the width of an IV to cover its widest uses.
827 //===----------------------------------------------------------------------===//
828 
829 namespace {
830 
831 // Collect information about induction variables that are used by sign/zero
832 // extend operations. This information is recorded by CollectExtend and provides
833 // the input to WidenIV.
834 struct WideIVInfo {
835  PHINode *NarrowIV = nullptr;
836 
837  // Widest integer type created [sz]ext
838  Type *WidestNativeType = nullptr;
839 
840  // Was a sext user seen before a zext?
841  bool IsSigned = false;
842 };
843 
844 } // end anonymous namespace
845 
846 /// Update information about the induction variable that is extended by this
847 /// sign or zero extend operation. This is used to determine the final width of
848 /// the IV before actually widening it.
849 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
850  const TargetTransformInfo *TTI) {
851  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
852  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
853  return;
854 
855  Type *Ty = Cast->getType();
856  uint64_t Width = SE->getTypeSizeInBits(Ty);
857  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
858  return;
859 
860  // Check that `Cast` actually extends the induction variable (we rely on this
861  // later). This takes care of cases where `Cast` is extending a truncation of
862  // the narrow induction variable, and thus can end up being narrower than the
863  // "narrow" induction variable.
864  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
865  if (NarrowIVWidth >= Width)
866  return;
867 
868  // Cast is either an sext or zext up to this point.
869  // We should not widen an indvar if arithmetics on the wider indvar are more
870  // expensive than those on the narrower indvar. We check only the cost of ADD
871  // because at least an ADD is required to increment the induction variable. We
872  // could compute more comprehensively the cost of all instructions on the
873  // induction variable when necessary.
874  if (TTI &&
877  Cast->getOperand(0)->getType())) {
878  return;
879  }
880 
881  if (!WI.WidestNativeType) {
882  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
883  WI.IsSigned = IsSigned;
884  return;
885  }
886 
887  // We extend the IV to satisfy the sign of its first user, arbitrarily.
888  if (WI.IsSigned != IsSigned)
889  return;
890 
891  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
892  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
893 }
894 
895 namespace {
896 
897 /// Record a link in the Narrow IV def-use chain along with the WideIV that
898 /// computes the same value as the Narrow IV def. This avoids caching Use*
899 /// pointers.
900 struct NarrowIVDefUse {
901  Instruction *NarrowDef = nullptr;
902  Instruction *NarrowUse = nullptr;
903  Instruction *WideDef = nullptr;
904 
905  // True if the narrow def is never negative. Tracking this information lets
906  // us use a sign extension instead of a zero extension or vice versa, when
907  // profitable and legal.
908  bool NeverNegative = false;
909 
910  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
911  bool NeverNegative)
912  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
913  NeverNegative(NeverNegative) {}
914 };
915 
916 /// The goal of this transform is to remove sign and zero extends without
917 /// creating any new induction variables. To do this, it creates a new phi of
918 /// the wider type and redirects all users, either removing extends or inserting
919 /// truncs whenever we stop propagating the type.
920 class WidenIV {
921  // Parameters
922  PHINode *OrigPhi;
923  Type *WideType;
924 
925  // Context
926  LoopInfo *LI;
927  Loop *L;
928  ScalarEvolution *SE;
929  DominatorTree *DT;
930 
931  // Does the module have any calls to the llvm.experimental.guard intrinsic
932  // at all? If not we can avoid scanning instructions looking for guards.
933  bool HasGuards;
934 
935  // Result
936  PHINode *WidePhi = nullptr;
937  Instruction *WideInc = nullptr;
938  const SCEV *WideIncExpr = nullptr;
940 
942  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
943 
944  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
945 
946  // A map tracking the kind of extension used to widen each narrow IV
947  // and narrow IV user.
948  // Key: pointer to a narrow IV or IV user.
949  // Value: the kind of extension used to widen this Instruction.
950  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
951 
952  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
953 
954  // A map with control-dependent ranges for post increment IV uses. The key is
955  // a pair of IV def and a use of this def denoting the context. The value is
956  // a ConstantRange representing possible values of the def at the given
957  // context.
958  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
959 
960  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
961  Instruction *UseI) {
962  DefUserPair Key(Def, UseI);
963  auto It = PostIncRangeInfos.find(Key);
964  return It == PostIncRangeInfos.end()
966  : Optional<ConstantRange>(It->second);
967  }
968 
969  void calculatePostIncRanges(PHINode *OrigPhi);
970  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
971 
972  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
973  DefUserPair Key(Def, UseI);
974  auto It = PostIncRangeInfos.find(Key);
975  if (It == PostIncRangeInfos.end())
976  PostIncRangeInfos.insert({Key, R});
977  else
978  It->second = R.intersectWith(It->second);
979  }
980 
981 public:
982  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
984  bool HasGuards)
985  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
986  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
987  HasGuards(HasGuards), DeadInsts(DI) {
988  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
989  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
990  }
991 
992  PHINode *createWideIV(SCEVExpander &Rewriter);
993 
994 protected:
995  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
996  Instruction *Use);
997 
998  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
999  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1000  const SCEVAddRecExpr *WideAR);
1001  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1002 
1003  ExtendKind getExtendKind(Instruction *I);
1004 
1005  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1006 
1007  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1008 
1009  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1010 
1011  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1012  unsigned OpCode) const;
1013 
1014  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1015 
1016  bool widenLoopCompare(NarrowIVDefUse DU);
1017  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1018  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1019 
1020  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1021 };
1022 
1023 } // end anonymous namespace
1024 
1025 /// Perform a quick domtree based check for loop invariance assuming that V is
1026 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
1027 /// purpose.
1028 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
1029  Instruction *Inst = dyn_cast<Instruction>(V);
1030  if (!Inst)
1031  return true;
1032 
1033  return DT->properlyDominates(Inst->getParent(), L->getHeader());
1034 }
1035 
1036 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1037  bool IsSigned, Instruction *Use) {
1038  // Set the debug location and conservative insertion point.
1039  IRBuilder<> Builder(Use);
1040  // Hoist the insertion point into loop preheaders as far as possible.
1041  for (const Loop *L = LI->getLoopFor(Use->getParent());
1042  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1043  L = L->getParentLoop())
1045 
1046  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1047  Builder.CreateZExt(NarrowOper, WideType);
1048 }
1049 
1050 /// Instantiate a wide operation to replace a narrow operation. This only needs
1051 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1052 /// 0 for any operation we decide not to clone.
1053 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1054  const SCEVAddRecExpr *WideAR) {
1055  unsigned Opcode = DU.NarrowUse->getOpcode();
1056  switch (Opcode) {
1057  default:
1058  return nullptr;
1059  case Instruction::Add:
1060  case Instruction::Mul:
1061  case Instruction::UDiv:
1062  case Instruction::Sub:
1063  return cloneArithmeticIVUser(DU, WideAR);
1064 
1065  case Instruction::And:
1066  case Instruction::Or:
1067  case Instruction::Xor:
1068  case Instruction::Shl:
1069  case Instruction::LShr:
1070  case Instruction::AShr:
1071  return cloneBitwiseIVUser(DU);
1072  }
1073 }
1074 
1075 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1076  Instruction *NarrowUse = DU.NarrowUse;
1077  Instruction *NarrowDef = DU.NarrowDef;
1078  Instruction *WideDef = DU.WideDef;
1079 
1080  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1081 
1082  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1083  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1084  // invariant and will be folded or hoisted. If it actually comes from a
1085  // widened IV, it should be removed during a future call to widenIVUse.
1086  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1087  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1088  ? WideDef
1089  : createExtendInst(NarrowUse->getOperand(0), WideType,
1090  IsSigned, NarrowUse);
1091  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1092  ? WideDef
1093  : createExtendInst(NarrowUse->getOperand(1), WideType,
1094  IsSigned, NarrowUse);
1095 
1096  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1097  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1098  NarrowBO->getName());
1099  IRBuilder<> Builder(NarrowUse);
1100  Builder.Insert(WideBO);
1101  WideBO->copyIRFlags(NarrowBO);
1102  return WideBO;
1103 }
1104 
1105 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1106  const SCEVAddRecExpr *WideAR) {
1107  Instruction *NarrowUse = DU.NarrowUse;
1108  Instruction *NarrowDef = DU.NarrowDef;
1109  Instruction *WideDef = DU.WideDef;
1110 
1111  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1112 
1113  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1114 
1115  // We're trying to find X such that
1116  //
1117  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1118  //
1119  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1120  // and check using SCEV if any of them are correct.
1121 
1122  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1123  // correct solution to X.
1124  auto GuessNonIVOperand = [&](bool SignExt) {
1125  const SCEV *WideLHS;
1126  const SCEV *WideRHS;
1127 
1128  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1129  if (SignExt)
1130  return SE->getSignExtendExpr(S, Ty);
1131  return SE->getZeroExtendExpr(S, Ty);
1132  };
1133 
1134  if (IVOpIdx == 0) {
1135  WideLHS = SE->getSCEV(WideDef);
1136  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1137  WideRHS = GetExtend(NarrowRHS, WideType);
1138  } else {
1139  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1140  WideLHS = GetExtend(NarrowLHS, WideType);
1141  WideRHS = SE->getSCEV(WideDef);
1142  }
1143 
1144  // WideUse is "WideDef `op.wide` X" as described in the comment.
1145  const SCEV *WideUse = nullptr;
1146 
1147  switch (NarrowUse->getOpcode()) {
1148  default:
1149  llvm_unreachable("No other possibility!");
1150 
1151  case Instruction::Add:
1152  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1153  break;
1154 
1155  case Instruction::Mul:
1156  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1157  break;
1158 
1159  case Instruction::UDiv:
1160  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1161  break;
1162 
1163  case Instruction::Sub:
1164  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1165  break;
1166  }
1167 
1168  return WideUse == WideAR;
1169  };
1170 
1171  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1172  if (!GuessNonIVOperand(SignExtend)) {
1173  SignExtend = !SignExtend;
1174  if (!GuessNonIVOperand(SignExtend))
1175  return nullptr;
1176  }
1177 
1178  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1179  ? WideDef
1180  : createExtendInst(NarrowUse->getOperand(0), WideType,
1181  SignExtend, NarrowUse);
1182  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1183  ? WideDef
1184  : createExtendInst(NarrowUse->getOperand(1), WideType,
1185  SignExtend, NarrowUse);
1186 
1187  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1188  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1189  NarrowBO->getName());
1190 
1191  IRBuilder<> Builder(NarrowUse);
1192  Builder.Insert(WideBO);
1193  WideBO->copyIRFlags(NarrowBO);
1194  return WideBO;
1195 }
1196 
1197 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1198  auto It = ExtendKindMap.find(I);
1199  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1200  return It->second;
1201 }
1202 
1203 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1204  unsigned OpCode) const {
1205  if (OpCode == Instruction::Add)
1206  return SE->getAddExpr(LHS, RHS);
1207  if (OpCode == Instruction::Sub)
1208  return SE->getMinusSCEV(LHS, RHS);
1209  if (OpCode == Instruction::Mul)
1210  return SE->getMulExpr(LHS, RHS);
1211 
1212  llvm_unreachable("Unsupported opcode.");
1213 }
1214 
1215 /// No-wrap operations can transfer sign extension of their result to their
1216 /// operands. Generate the SCEV value for the widened operation without
1217 /// actually modifying the IR yet. If the expression after extending the
1218 /// operands is an AddRec for this loop, return the AddRec and the kind of
1219 /// extension used.
1220 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1221  // Handle the common case of add<nsw/nuw>
1222  const unsigned OpCode = DU.NarrowUse->getOpcode();
1223  // Only Add/Sub/Mul instructions supported yet.
1224  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1225  OpCode != Instruction::Mul)
1226  return {nullptr, Unknown};
1227 
1228  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1229  // if extending the other will lead to a recurrence.
1230  const unsigned ExtendOperIdx =
1231  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1232  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1233 
1234  const SCEV *ExtendOperExpr = nullptr;
1235  const OverflowingBinaryOperator *OBO =
1236  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1237  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1238  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1239  ExtendOperExpr = SE->getSignExtendExpr(
1240  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1241  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1242  ExtendOperExpr = SE->getZeroExtendExpr(
1243  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1244  else
1245  return {nullptr, Unknown};
1246 
1247  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1248  // flags. This instruction may be guarded by control flow that the no-wrap
1249  // behavior depends on. Non-control-equivalent instructions can be mapped to
1250  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1251  // semantics to those operations.
1252  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1253  const SCEV *rhs = ExtendOperExpr;
1254 
1255  // Let's swap operands to the initial order for the case of non-commutative
1256  // operations, like SUB. See PR21014.
1257  if (ExtendOperIdx == 0)
1258  std::swap(lhs, rhs);
1259  const SCEVAddRecExpr *AddRec =
1260  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1261 
1262  if (!AddRec || AddRec->getLoop() != L)
1263  return {nullptr, Unknown};
1264 
1265  return {AddRec, ExtKind};
1266 }
1267 
1268 /// Is this instruction potentially interesting for further simplification after
1269 /// widening it's type? In other words, can the extend be safely hoisted out of
1270 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1271 /// so, return the extended recurrence and the kind of extension used. Otherwise
1272 /// return {nullptr, Unknown}.
1273 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1274  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1275  return {nullptr, Unknown};
1276 
1277  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1278  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1279  SE->getTypeSizeInBits(WideType)) {
1280  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1281  // index. So don't follow this use.
1282  return {nullptr, Unknown};
1283  }
1284 
1285  const SCEV *WideExpr;
1286  ExtendKind ExtKind;
1287  if (DU.NeverNegative) {
1288  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1289  if (isa<SCEVAddRecExpr>(WideExpr))
1290  ExtKind = SignExtended;
1291  else {
1292  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1293  ExtKind = ZeroExtended;
1294  }
1295  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1296  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1297  ExtKind = SignExtended;
1298  } else {
1299  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1300  ExtKind = ZeroExtended;
1301  }
1302  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1303  if (!AddRec || AddRec->getLoop() != L)
1304  return {nullptr, Unknown};
1305  return {AddRec, ExtKind};
1306 }
1307 
1308 /// This IV user cannot be widen. Replace this use of the original narrow IV
1309 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1310 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1311  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1312  << *DU.NarrowUse << "\n");
1313  IRBuilder<> Builder(
1314  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1315  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1316  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1317 }
1318 
1319 /// If the narrow use is a compare instruction, then widen the compare
1320 // (and possibly the other operand). The extend operation is hoisted into the
1321 // loop preheader as far as possible.
1322 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1323  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1324  if (!Cmp)
1325  return false;
1326 
1327  // We can legally widen the comparison in the following two cases:
1328  //
1329  // - The signedness of the IV extension and comparison match
1330  //
1331  // - The narrow IV is always positive (and thus its sign extension is equal
1332  // to its zero extension). For instance, let's say we're zero extending
1333  // %narrow for the following use
1334  //
1335  // icmp slt i32 %narrow, %val ... (A)
1336  //
1337  // and %narrow is always positive. Then
1338  //
1339  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1340  // == icmp slt i32 zext(%narrow), sext(%val)
1341  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1342  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1343  return false;
1344 
1345  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1346  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1347  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1348  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1349 
1350  // Widen the compare instruction.
1351  IRBuilder<> Builder(
1352  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1353  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1354 
1355  // Widen the other operand of the compare, if necessary.
1356  if (CastWidth < IVWidth) {
1357  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1358  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1359  }
1360  return true;
1361 }
1362 
1363 /// If the narrow use is an instruction whose two operands are the defining
1364 /// instruction of DU and a load instruction, then we have the following:
1365 /// if the load is hoisted outside the loop, then we do not reach this function
1366 /// as scalar evolution analysis works fine in widenIVUse with variables
1367 /// hoisted outside the loop and efficient code is subsequently generated by
1368 /// not emitting truncate instructions. But when the load is not hoisted
1369 /// (whether due to limitation in alias analysis or due to a true legality),
1370 /// then scalar evolution can not proceed with loop variant values and
1371 /// inefficient code is generated. This function handles the non-hoisted load
1372 /// special case by making the optimization generate the same type of code for
1373 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1374 /// instruction). This special case is important especially when the induction
1375 /// variables are affecting addressing mode in code generation.
1376 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1377  Instruction *NarrowUse = DU.NarrowUse;
1378  Instruction *NarrowDef = DU.NarrowDef;
1379  Instruction *WideDef = DU.WideDef;
1380 
1381  // Handle the common case of add<nsw/nuw>
1382  const unsigned OpCode = NarrowUse->getOpcode();
1383  // Only Add/Sub/Mul instructions are supported.
1384  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1385  OpCode != Instruction::Mul)
1386  return false;
1387 
1388  // The operand that is not defined by NarrowDef of DU. Let's call it the
1389  // other operand.
1390  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1391  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1392  "bad DU");
1393 
1394  const SCEV *ExtendOperExpr = nullptr;
1395  const OverflowingBinaryOperator *OBO =
1396  cast<OverflowingBinaryOperator>(NarrowUse);
1397  ExtendKind ExtKind = getExtendKind(NarrowDef);
1398  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1399  ExtendOperExpr = SE->getSignExtendExpr(
1400  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1401  else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1402  ExtendOperExpr = SE->getZeroExtendExpr(
1403  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1404  else
1405  return false;
1406 
1407  // We are interested in the other operand being a load instruction.
1408  // But, we should look into relaxing this restriction later on.
1409  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1410  if (I && I->getOpcode() != Instruction::Load)
1411  return false;
1412 
1413  // Verifying that Defining operand is an AddRec
1414  const SCEV *Op1 = SE->getSCEV(WideDef);
1415  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1416  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1417  return false;
1418  // Verifying that other operand is an Extend.
1419  if (ExtKind == SignExtended) {
1420  if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1421  return false;
1422  } else {
1423  if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1424  return false;
1425  }
1426 
1427  if (ExtKind == SignExtended) {
1428  for (Use &U : NarrowUse->uses()) {
1429  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1430  if (!User || User->getType() != WideType)
1431  return false;
1432  }
1433  } else { // ExtKind == ZeroExtended
1434  for (Use &U : NarrowUse->uses()) {
1435  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1436  if (!User || User->getType() != WideType)
1437  return false;
1438  }
1439  }
1440 
1441  return true;
1442 }
1443 
1444 /// Special Case for widening with variant Loads (see
1445 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1446 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1447  Instruction *NarrowUse = DU.NarrowUse;
1448  Instruction *NarrowDef = DU.NarrowDef;
1449  Instruction *WideDef = DU.WideDef;
1450 
1451  ExtendKind ExtKind = getExtendKind(NarrowDef);
1452 
1453  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1454 
1455  // Generating a widening use instruction.
1456  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1457  ? WideDef
1458  : createExtendInst(NarrowUse->getOperand(0), WideType,
1459  ExtKind, NarrowUse);
1460  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1461  ? WideDef
1462  : createExtendInst(NarrowUse->getOperand(1), WideType,
1463  ExtKind, NarrowUse);
1464 
1465  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1466  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1467  NarrowBO->getName());
1468  IRBuilder<> Builder(NarrowUse);
1469  Builder.Insert(WideBO);
1470  WideBO->copyIRFlags(NarrowBO);
1471 
1472  if (ExtKind == SignExtended)
1473  ExtendKindMap[NarrowUse] = SignExtended;
1474  else
1475  ExtendKindMap[NarrowUse] = ZeroExtended;
1476 
1477  // Update the Use.
1478  if (ExtKind == SignExtended) {
1479  for (Use &U : NarrowUse->uses()) {
1480  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1481  if (User && User->getType() == WideType) {
1482  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1483  << *WideBO << "\n");
1484  ++NumElimExt;
1485  User->replaceAllUsesWith(WideBO);
1486  DeadInsts.emplace_back(User);
1487  }
1488  }
1489  } else { // ExtKind == ZeroExtended
1490  for (Use &U : NarrowUse->uses()) {
1491  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1492  if (User && User->getType() == WideType) {
1493  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1494  << *WideBO << "\n");
1495  ++NumElimExt;
1496  User->replaceAllUsesWith(WideBO);
1497  DeadInsts.emplace_back(User);
1498  }
1499  }
1500  }
1501 }
1502 
1503 /// Determine whether an individual user of the narrow IV can be widened. If so,
1504 /// return the wide clone of the user.
1505 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1506  assert(ExtendKindMap.count(DU.NarrowDef) &&
1507  "Should already know the kind of extension used to widen NarrowDef");
1508 
1509  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1510  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1511  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1512  // For LCSSA phis, sink the truncate outside the loop.
1513  // After SimplifyCFG most loop exit targets have a single predecessor.
1514  // Otherwise fall back to a truncate within the loop.
1515  if (UsePhi->getNumOperands() != 1)
1516  truncateIVUse(DU, DT, LI);
1517  else {
1518  // Widening the PHI requires us to insert a trunc. The logical place
1519  // for this trunc is in the same BB as the PHI. This is not possible if
1520  // the BB is terminated by a catchswitch.
1521  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1522  return nullptr;
1523 
1524  PHINode *WidePhi =
1525  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1526  UsePhi);
1527  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1528  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1529  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1530  UsePhi->replaceAllUsesWith(Trunc);
1531  DeadInsts.emplace_back(UsePhi);
1532  LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1533  << *WidePhi << "\n");
1534  }
1535  return nullptr;
1536  }
1537  }
1538 
1539  // This narrow use can be widened by a sext if it's non-negative or its narrow
1540  // def was widended by a sext. Same for zext.
1541  auto canWidenBySExt = [&]() {
1542  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1543  };
1544  auto canWidenByZExt = [&]() {
1545  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1546  };
1547 
1548  // Our raison d'etre! Eliminate sign and zero extension.
1549  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1550  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1551  Value *NewDef = DU.WideDef;
1552  if (DU.NarrowUse->getType() != WideType) {
1553  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1554  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1555  if (CastWidth < IVWidth) {
1556  // The cast isn't as wide as the IV, so insert a Trunc.
1557  IRBuilder<> Builder(DU.NarrowUse);
1558  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1559  }
1560  else {
1561  // A wider extend was hidden behind a narrower one. This may induce
1562  // another round of IV widening in which the intermediate IV becomes
1563  // dead. It should be very rare.
1564  LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1565  << " not wide enough to subsume " << *DU.NarrowUse
1566  << "\n");
1567  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1568  NewDef = DU.NarrowUse;
1569  }
1570  }
1571  if (NewDef != DU.NarrowUse) {
1572  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1573  << " replaced by " << *DU.WideDef << "\n");
1574  ++NumElimExt;
1575  DU.NarrowUse->replaceAllUsesWith(NewDef);
1576  DeadInsts.emplace_back(DU.NarrowUse);
1577  }
1578  // Now that the extend is gone, we want to expose it's uses for potential
1579  // further simplification. We don't need to directly inform SimplifyIVUsers
1580  // of the new users, because their parent IV will be processed later as a
1581  // new loop phi. If we preserved IVUsers analysis, we would also want to
1582  // push the uses of WideDef here.
1583 
1584  // No further widening is needed. The deceased [sz]ext had done it for us.
1585  return nullptr;
1586  }
1587 
1588  // Does this user itself evaluate to a recurrence after widening?
1589  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1590  if (!WideAddRec.first)
1591  WideAddRec = getWideRecurrence(DU);
1592 
1593  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1594  if (!WideAddRec.first) {
1595  // If use is a loop condition, try to promote the condition instead of
1596  // truncating the IV first.
1597  if (widenLoopCompare(DU))
1598  return nullptr;
1599 
1600  // We are here about to generate a truncate instruction that may hurt
1601  // performance because the scalar evolution expression computed earlier
1602  // in WideAddRec.first does not indicate a polynomial induction expression.
1603  // In that case, look at the operands of the use instruction to determine
1604  // if we can still widen the use instead of truncating its operand.
1605  if (widenWithVariantLoadUse(DU)) {
1606  widenWithVariantLoadUseCodegen(DU);
1607  return nullptr;
1608  }
1609 
1610  // This user does not evaluate to a recurrence after widening, so don't
1611  // follow it. Instead insert a Trunc to kill off the original use,
1612  // eventually isolating the original narrow IV so it can be removed.
1613  truncateIVUse(DU, DT, LI);
1614  return nullptr;
1615  }
1616  // Assume block terminators cannot evaluate to a recurrence. We can't to
1617  // insert a Trunc after a terminator if there happens to be a critical edge.
1618  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1619  "SCEV is not expected to evaluate a block terminator");
1620 
1621  // Reuse the IV increment that SCEVExpander created as long as it dominates
1622  // NarrowUse.
1623  Instruction *WideUse = nullptr;
1624  if (WideAddRec.first == WideIncExpr &&
1625  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1626  WideUse = WideInc;
1627  else {
1628  WideUse = cloneIVUser(DU, WideAddRec.first);
1629  if (!WideUse)
1630  return nullptr;
1631  }
1632  // Evaluation of WideAddRec ensured that the narrow expression could be
1633  // extended outside the loop without overflow. This suggests that the wide use
1634  // evaluates to the same expression as the extended narrow use, but doesn't
1635  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1636  // where it fails, we simply throw away the newly created wide use.
1637  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1638  LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1639  << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1640  << "\n");
1641  DeadInsts.emplace_back(WideUse);
1642  return nullptr;
1643  }
1644 
1645  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1646  // Returning WideUse pushes it on the worklist.
1647  return WideUse;
1648 }
1649 
1650 /// Add eligible users of NarrowDef to NarrowIVUsers.
1651 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1652  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1653  bool NonNegativeDef =
1654  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1655  SE->getConstant(NarrowSCEV->getType(), 0));
1656  for (User *U : NarrowDef->users()) {
1657  Instruction *NarrowUser = cast<Instruction>(U);
1658 
1659  // Handle data flow merges and bizarre phi cycles.
1660  if (!Widened.insert(NarrowUser).second)
1661  continue;
1662 
1663  bool NonNegativeUse = false;
1664  if (!NonNegativeDef) {
1665  // We might have a control-dependent range information for this context.
1666  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1667  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1668  }
1669 
1670  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1671  NonNegativeDef || NonNegativeUse);
1672  }
1673 }
1674 
1675 /// Process a single induction variable. First use the SCEVExpander to create a
1676 /// wide induction variable that evaluates to the same recurrence as the
1677 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1678 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1679 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1680 ///
1681 /// It would be simpler to delete uses as they are processed, but we must avoid
1682 /// invalidating SCEV expressions.
1683 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1684  // Is this phi an induction variable?
1685  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1686  if (!AddRec)
1687  return nullptr;
1688 
1689  // Widen the induction variable expression.
1690  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1691  ? SE->getSignExtendExpr(AddRec, WideType)
1692  : SE->getZeroExtendExpr(AddRec, WideType);
1693 
1694  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1695  "Expect the new IV expression to preserve its type");
1696 
1697  // Can the IV be extended outside the loop without overflow?
1698  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1699  if (!AddRec || AddRec->getLoop() != L)
1700  return nullptr;
1701 
1702  // An AddRec must have loop-invariant operands. Since this AddRec is
1703  // materialized by a loop header phi, the expression cannot have any post-loop
1704  // operands, so they must dominate the loop header.
1705  assert(
1706  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1707  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1708  "Loop header phi recurrence inputs do not dominate the loop");
1709 
1710  // Iterate over IV uses (including transitive ones) looking for IV increments
1711  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1712  // the increment calculate control-dependent range information basing on
1713  // dominating conditions inside of the loop (e.g. a range check inside of the
1714  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1715  //
1716  // Control-dependent range information is later used to prove that a narrow
1717  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1718  // this on demand because when pushNarrowIVUsers needs this information some
1719  // of the dominating conditions might be already widened.
1720  if (UsePostIncrementRanges)
1721  calculatePostIncRanges(OrigPhi);
1722 
1723  // The rewriter provides a value for the desired IV expression. This may
1724  // either find an existing phi or materialize a new one. Either way, we
1725  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1726  // of the phi-SCC dominates the loop entry.
1727  Instruction *InsertPt = &L->getHeader()->front();
1728  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1729 
1730  // Remembering the WideIV increment generated by SCEVExpander allows
1731  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1732  // employ a general reuse mechanism because the call above is the only call to
1733  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1734  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1735  WideInc =
1736  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1737  WideIncExpr = SE->getSCEV(WideInc);
1738  // Propagate the debug location associated with the original loop increment
1739  // to the new (widened) increment.
1740  auto *OrigInc =
1741  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1742  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1743  }
1744 
1745  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1746  ++NumWidened;
1747 
1748  // Traverse the def-use chain using a worklist starting at the original IV.
1749  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1750 
1751  Widened.insert(OrigPhi);
1752  pushNarrowIVUsers(OrigPhi, WidePhi);
1753 
1754  while (!NarrowIVUsers.empty()) {
1755  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1756 
1757  // Process a def-use edge. This may replace the use, so don't hold a
1758  // use_iterator across it.
1759  Instruction *WideUse = widenIVUse(DU, Rewriter);
1760 
1761  // Follow all def-use edges from the previous narrow use.
1762  if (WideUse)
1763  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1764 
1765  // widenIVUse may have removed the def-use edge.
1766  if (DU.NarrowDef->use_empty())
1767  DeadInsts.emplace_back(DU.NarrowDef);
1768  }
1769 
1770  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1771  // evaluate the same recurrence, we can just copy the debug info over.
1773  llvm::findDbgValues(DbgValues, OrigPhi);
1774  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1775  ValueAsMetadata::get(WidePhi));
1776  for (auto &DbgValue : DbgValues)
1777  DbgValue->setOperand(0, MDPhi);
1778  return WidePhi;
1779 }
1780 
1781 /// Calculates control-dependent range for the given def at the given context
1782 /// by looking at dominating conditions inside of the loop
1783 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1784  Instruction *NarrowUser) {
1785  using namespace llvm::PatternMatch;
1786 
1787  Value *NarrowDefLHS;
1788  const APInt *NarrowDefRHS;
1789  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1790  m_APInt(NarrowDefRHS))) ||
1791  !NarrowDefRHS->isNonNegative())
1792  return;
1793 
1794  auto UpdateRangeFromCondition = [&] (Value *Condition,
1795  bool TrueDest) {
1796  CmpInst::Predicate Pred;
1797  Value *CmpRHS;
1798  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1799  m_Value(CmpRHS))))
1800  return;
1801 
1803  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1804 
1805  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1806  auto CmpConstrainedLHSRange =
1807  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1808  auto NarrowDefRange =
1809  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1810 
1811  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1812  };
1813 
1814  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1815  if (!HasGuards)
1816  return;
1817 
1818  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1819  Ctx->getParent()->rend())) {
1820  Value *C = nullptr;
1821  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1822  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1823  }
1824  };
1825 
1826  UpdateRangeFromGuards(NarrowUser);
1827 
1828  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1829  // If NarrowUserBB is statically unreachable asking dominator queries may
1830  // yield surprising results. (e.g. the block may not have a dom tree node)
1831  if (!DT->isReachableFromEntry(NarrowUserBB))
1832  return;
1833 
1834  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1835  L->contains(DTB->getBlock());
1836  DTB = DTB->getIDom()) {
1837  auto *BB = DTB->getBlock();
1838  auto *TI = BB->getTerminator();
1839  UpdateRangeFromGuards(TI);
1840 
1841  auto *BI = dyn_cast<BranchInst>(TI);
1842  if (!BI || !BI->isConditional())
1843  continue;
1844 
1845  auto *TrueSuccessor = BI->getSuccessor(0);
1846  auto *FalseSuccessor = BI->getSuccessor(1);
1847 
1848  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1849  return BBE.isSingleEdge() &&
1850  DT->dominates(BBE, NarrowUser->getParent());
1851  };
1852 
1853  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1854  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1855 
1856  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1857  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1858  }
1859 }
1860 
1861 /// Calculates PostIncRangeInfos map for the given IV
1862 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1865  Worklist.push_back(OrigPhi);
1866  Visited.insert(OrigPhi);
1867 
1868  while (!Worklist.empty()) {
1869  Instruction *NarrowDef = Worklist.pop_back_val();
1870 
1871  for (Use &U : NarrowDef->uses()) {
1872  auto *NarrowUser = cast<Instruction>(U.getUser());
1873 
1874  // Don't go looking outside the current loop.
1875  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1876  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1877  continue;
1878 
1879  if (!Visited.insert(NarrowUser).second)
1880  continue;
1881 
1882  Worklist.push_back(NarrowUser);
1883 
1884  calculatePostIncRange(NarrowDef, NarrowUser);
1885  }
1886  }
1887 }
1888 
1889 //===----------------------------------------------------------------------===//
1890 // Live IV Reduction - Minimize IVs live across the loop.
1891 //===----------------------------------------------------------------------===//
1892 
1893 //===----------------------------------------------------------------------===//
1894 // Simplification of IV users based on SCEV evaluation.
1895 //===----------------------------------------------------------------------===//
1896 
1897 namespace {
1898 
1899 class IndVarSimplifyVisitor : public IVVisitor {
1900  ScalarEvolution *SE;
1901  const TargetTransformInfo *TTI;
1902  PHINode *IVPhi;
1903 
1904 public:
1905  WideIVInfo WI;
1906 
1907  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1908  const TargetTransformInfo *TTI,
1909  const DominatorTree *DTree)
1910  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1911  DT = DTree;
1912  WI.NarrowIV = IVPhi;
1913  }
1914 
1915  // Implement the interface used by simplifyUsersOfIV.
1916  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1917 };
1918 
1919 } // end anonymous namespace
1920 
1921 /// Iteratively perform simplification on a worklist of IV users. Each
1922 /// successive simplification may push more users which may themselves be
1923 /// candidates for simplification.
1924 ///
1925 /// Sign/Zero extend elimination is interleaved with IV simplification.
1926 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1927  SCEVExpander &Rewriter,
1928  LoopInfo *LI) {
1930 
1931  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1932  Intrinsic::getName(Intrinsic::experimental_guard));
1933  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1934 
1935  SmallVector<PHINode*, 8> LoopPhis;
1936  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1937  LoopPhis.push_back(cast<PHINode>(I));
1938  }
1939  // Each round of simplification iterates through the SimplifyIVUsers worklist
1940  // for all current phis, then determines whether any IVs can be
1941  // widened. Widening adds new phis to LoopPhis, inducing another round of
1942  // simplification on the wide IVs.
1943  bool Changed = false;
1944  while (!LoopPhis.empty()) {
1945  // Evaluate as many IV expressions as possible before widening any IVs. This
1946  // forces SCEV to set no-wrap flags before evaluating sign/zero
1947  // extension. The first time SCEV attempts to normalize sign/zero extension,
1948  // the result becomes final. So for the most predictable results, we delay
1949  // evaluation of sign/zero extend evaluation until needed, and avoid running
1950  // other SCEV based analysis prior to simplifyAndExtend.
1951  do {
1952  PHINode *CurrIV = LoopPhis.pop_back_val();
1953 
1954  // Information about sign/zero extensions of CurrIV.
1955  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1956 
1957  Changed |=
1958  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1959 
1960  if (Visitor.WI.WidestNativeType) {
1961  WideIVs.push_back(Visitor.WI);
1962  }
1963  } while(!LoopPhis.empty());
1964 
1965  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1966  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1967  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1968  Changed = true;
1969  LoopPhis.push_back(WidePhi);
1970  }
1971  }
1972  }
1973  return Changed;
1974 }
1975 
1976 //===----------------------------------------------------------------------===//
1977 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1978 //===----------------------------------------------------------------------===//
1979 
1980 /// Return true if this loop's backedge taken count expression can be safely and
1981 /// cheaply expanded into an instruction sequence that can be used by
1982 /// linearFunctionTestReplace.
1983 ///
1984 /// TODO: This fails for pointer-type loop counters with greater than one byte
1985 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1986 /// we could skip this check in the case that the LFTR loop counter (chosen by
1987 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1988 /// the loop test to an inequality test by checking the target data's alignment
1989 /// of element types (given that the initial pointer value originates from or is
1990 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1991 /// However, we don't yet have a strong motivation for converting loop tests
1992 /// into inequality tests.
1994  SCEVExpander &Rewriter) {
1995  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1996  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1997  BackedgeTakenCount->isZero())
1998  return false;
1999 
2000  if (!L->getExitingBlock())
2001  return false;
2002 
2003  // Can't rewrite non-branch yet.
2004  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
2005  return false;
2006 
2007  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
2008  return false;
2009 
2010  return true;
2011 }
2012 
2013 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
2015  Instruction *IncI = dyn_cast<Instruction>(IncV);
2016  if (!IncI)
2017  return nullptr;
2018 
2019  switch (IncI->getOpcode()) {
2020  case Instruction::Add:
2021  case Instruction::Sub:
2022  break;
2023  case Instruction::GetElementPtr:
2024  // An IV counter must preserve its type.
2025  if (IncI->getNumOperands() == 2)
2026  break;
2028  default:
2029  return nullptr;
2030  }
2031 
2032  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2033  if (Phi && Phi->getParent() == L->getHeader()) {
2034  if (isLoopInvariant(IncI->getOperand(1), L, DT))
2035  return Phi;
2036  return nullptr;
2037  }
2038  if (IncI->getOpcode() == Instruction::GetElementPtr)
2039  return nullptr;
2040 
2041  // Allow add/sub to be commuted.
2042  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2043  if (Phi && Phi->getParent() == L->getHeader()) {
2044  if (isLoopInvariant(IncI->getOperand(0), L, DT))
2045  return Phi;
2046  }
2047  return nullptr;
2048 }
2049 
2050 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
2052  assert(L->getExitingBlock() && "expected loop exit");
2053 
2054  BasicBlock *LatchBlock = L->getLoopLatch();
2055  // Don't bother with LFTR if the loop is not properly simplified.
2056  if (!LatchBlock)
2057  return nullptr;
2058 
2060  assert(BI && "expected exit branch");
2061 
2062  return dyn_cast<ICmpInst>(BI->getCondition());
2063 }
2064 
2065 /// linearFunctionTestReplace policy. Return true unless we can show that the
2066 /// current exit test is already sufficiently canonical.
2067 static bool needsLFTR(Loop *L, DominatorTree *DT) {
2068  // Do LFTR to simplify the exit condition to an ICMP.
2069  ICmpInst *Cond = getLoopTest(L);
2070  if (!Cond)
2071  return true;
2072 
2073  // Do LFTR to simplify the exit ICMP to EQ/NE
2074  ICmpInst::Predicate Pred = Cond->getPredicate();
2075  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2076  return true;
2077 
2078  // Look for a loop invariant RHS
2079  Value *LHS = Cond->getOperand(0);
2080  Value *RHS = Cond->getOperand(1);
2081  if (!isLoopInvariant(RHS, L, DT)) {
2082  if (!isLoopInvariant(LHS, L, DT))
2083  return true;
2084  std::swap(LHS, RHS);
2085  }
2086  // Look for a simple IV counter LHS
2087  PHINode *Phi = dyn_cast<PHINode>(LHS);
2088  if (!Phi)
2089  Phi = getLoopPhiForCounter(LHS, L, DT);
2090 
2091  if (!Phi)
2092  return true;
2093 
2094  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2095  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2096  if (Idx < 0)
2097  return true;
2098 
2099  // Do LFTR if the exit condition's IV is *not* a simple counter.
2100  Value *IncV = Phi->getIncomingValue(Idx);
2101  return Phi != getLoopPhiForCounter(IncV, L, DT);
2102 }
2103 
2104 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2105 /// down to checking that all operands are constant and listing instructions
2106 /// that may hide undef.
2108  unsigned Depth) {
2109  if (isa<Constant>(V))
2110  return !isa<UndefValue>(V);
2111 
2112  if (Depth >= 6)
2113  return false;
2114 
2115  // Conservatively handle non-constant non-instructions. For example, Arguments
2116  // may be undef.
2117  Instruction *I = dyn_cast<Instruction>(V);
2118  if (!I)
2119  return false;
2120 
2121  // Load and return values may be undef.
2122  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2123  return false;
2124 
2125  // Optimistically handle other instructions.
2126  for (Value *Op : I->operands()) {
2127  if (!Visited.insert(Op).second)
2128  continue;
2129  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2130  return false;
2131  }
2132  return true;
2133 }
2134 
2135 /// Return true if the given value is concrete. We must prove that undef can
2136 /// never reach it.
2137 ///
2138 /// TODO: If we decide that this is a good approach to checking for undef, we
2139 /// may factor it into a common location.
2140 static bool hasConcreteDef(Value *V) {
2141  SmallPtrSet<Value*, 8> Visited;
2142  Visited.insert(V);
2143  return hasConcreteDefImpl(V, Visited, 0);
2144 }
2145 
2146 /// Return true if this IV has any uses other than the (soon to be rewritten)
2147 /// loop exit test.
2148 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2149  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2150  Value *IncV = Phi->getIncomingValue(LatchIdx);
2151 
2152  for (User *U : Phi->users())
2153  if (U != Cond && U != IncV) return false;
2154 
2155  for (User *U : IncV->users())
2156  if (U != Cond && U != Phi) return false;
2157  return true;
2158 }
2159 
2160 /// Find an affine IV in canonical form.
2161 ///
2162 /// BECount may be an i8* pointer type. The pointer difference is already
2163 /// valid count without scaling the address stride, so it remains a pointer
2164 /// expression as far as SCEV is concerned.
2165 ///
2166 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
2167 ///
2168 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
2169 ///
2170 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
2171 /// This is difficult in general for SCEV because of potential overflow. But we
2172 /// could at least handle constant BECounts.
2173 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2174  ScalarEvolution *SE, DominatorTree *DT) {
2175  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2176 
2177  Value *Cond =
2178  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
2179 
2180  // Loop over all of the PHI nodes, looking for a simple counter.
2181  PHINode *BestPhi = nullptr;
2182  const SCEV *BestInit = nullptr;
2183  BasicBlock *LatchBlock = L->getLoopLatch();
2184  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2185  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2186 
2187  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2188  PHINode *Phi = cast<PHINode>(I);
2189  if (!SE->isSCEVable(Phi->getType()))
2190  continue;
2191 
2192  // Avoid comparing an integer IV against a pointer Limit.
2193  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2194  continue;
2195 
2196  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2197  if (!AR || AR->getLoop() != L || !AR->isAffine())
2198  continue;
2199 
2200  // AR may be a pointer type, while BECount is an integer type.
2201  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2202  // AR may not be a narrower type, or we may never exit.
2203  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2204  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2205  continue;
2206 
2207  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2208  if (!Step || !Step->isOne())
2209  continue;
2210 
2211  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2212  Value *IncV = Phi->getIncomingValue(LatchIdx);
2213  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2214  continue;
2215 
2216  // Avoid reusing a potentially undef value to compute other values that may
2217  // have originally had a concrete definition.
2218  if (!hasConcreteDef(Phi)) {
2219  // We explicitly allow unknown phis as long as they are already used by
2220  // the loop test. In this case we assume that performing LFTR could not
2221  // increase the number of undef users.
2222  if (ICmpInst *Cond = getLoopTest(L)) {
2223  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2224  Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2225  continue;
2226  }
2227  }
2228  }
2229  const SCEV *Init = AR->getStart();
2230 
2231  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2232  // Don't force a live loop counter if another IV can be used.
2233  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2234  continue;
2235 
2236  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2237  // also prefers integer to pointer IVs.
2238  if (BestInit->isZero() != Init->isZero()) {
2239  if (BestInit->isZero())
2240  continue;
2241  }
2242  // If two IVs both count from zero or both count from nonzero then the
2243  // narrower is likely a dead phi that has been widened. Use the wider phi
2244  // to allow the other to be eliminated.
2245  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2246  continue;
2247  }
2248  BestPhi = Phi;
2249  BestInit = Init;
2250  }
2251  return BestPhi;
2252 }
2253 
2254 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2255 /// the new loop test.
2256 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2257  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2258  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2259  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2260  const SCEV *IVInit = AR->getStart();
2261 
2262  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2263  // finds a valid pointer IV. Sign extend BECount in order to materialize a
2264  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2265  // the existing GEPs whenever possible.
2266  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2267  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2268  // signed value. IVCount on the other hand represents the loop trip count,
2269  // which is an unsigned value. FindLoopCounter only allows induction
2270  // variables that have a positive unit stride of one. This means we don't
2271  // have to handle the case of negative offsets (yet) and just need to zero
2272  // extend IVCount.
2273  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2274  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2275 
2276  // Expand the code for the iteration count.
2277  assert(SE->isLoopInvariant(IVOffset, L) &&
2278  "Computed iteration count is not loop invariant!");
2279  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2280  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2281 
2282  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2283  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2284  // We could handle pointer IVs other than i8*, but we need to compensate for
2285  // gep index scaling. See canExpandBackedgeTakenCount comments.
2287  cast<PointerType>(GEPBase->getType())
2288  ->getElementType())->isOne() &&
2289  "unit stride pointer IV must be i8*");
2290 
2291  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2292  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2293  } else {
2294  // In any other case, convert both IVInit and IVCount to integers before
2295  // comparing. This may result in SCEV expansion of pointers, but in practice
2296  // SCEV will fold the pointer arithmetic away as such:
2297  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2298  //
2299  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2300  // for simple memset-style loops.
2301  //
2302  // IVInit integer and IVCount pointer would only occur if a canonical IV
2303  // were generated on top of case #2, which is not expected.
2304 
2305  const SCEV *IVLimit = nullptr;
2306  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2307  // For non-zero Start, compute IVCount here.
2308  if (AR->getStart()->isZero())
2309  IVLimit = IVCount;
2310  else {
2311  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2312  const SCEV *IVInit = AR->getStart();
2313 
2314  // For integer IVs, truncate the IV before computing IVInit + BECount.
2315  if (SE->getTypeSizeInBits(IVInit->getType())
2316  > SE->getTypeSizeInBits(IVCount->getType()))
2317  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2318 
2319  IVLimit = SE->getAddExpr(IVInit, IVCount);
2320  }
2321  // Expand the code for the iteration count.
2322  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2323  IRBuilder<> Builder(BI);
2324  assert(SE->isLoopInvariant(IVLimit, L) &&
2325  "Computed iteration count is not loop invariant!");
2326  // Ensure that we generate the same type as IndVar, or a smaller integer
2327  // type. In the presence of null pointer values, we have an integer type
2328  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2329  Type *LimitTy = IVCount->getType()->isPointerTy() ?
2330  IndVar->getType() : IVCount->getType();
2331  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2332  }
2333 }
2334 
2335 /// This method rewrites the exit condition of the loop to be a canonical !=
2336 /// comparison against the incremented loop induction variable. This pass is
2337 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2338 /// determine a loop-invariant trip count of the loop, which is actually a much
2339 /// broader range than just linear tests.
2340 bool IndVarSimplify::
2341 linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
2342  PHINode *IndVar, SCEVExpander &Rewriter) {
2343  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2344 
2345  // Initialize CmpIndVar and IVCount to their preincremented values.
2346  Value *CmpIndVar = IndVar;
2347  const SCEV *IVCount = BackedgeTakenCount;
2348 
2349  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2350 
2351  // If the exiting block is the same as the backedge block, we prefer to
2352  // compare against the post-incremented value, otherwise we must compare
2353  // against the preincremented value.
2354  if (L->getExitingBlock() == L->getLoopLatch()) {
2355  // Add one to the "backedge-taken" count to get the trip count.
2356  // This addition may overflow, which is valid as long as the comparison is
2357  // truncated to BackedgeTakenCount->getType().
2358  IVCount = SE->getAddExpr(BackedgeTakenCount,
2359  SE->getOne(BackedgeTakenCount->getType()));
2360  // The BackedgeTaken expression contains the number of times that the
2361  // backedge branches to the loop header. This is one less than the
2362  // number of times the loop executes, so use the incremented indvar.
2363  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2364  }
2365 
2366  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2367  assert(ExitCnt->getType()->isPointerTy() ==
2368  IndVar->getType()->isPointerTy() &&
2369  "genLoopLimit missed a cast");
2370 
2371  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2372  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2374  if (L->contains(BI->getSuccessor(0)))
2375  P = ICmpInst::ICMP_NE;
2376  else
2377  P = ICmpInst::ICMP_EQ;
2378 
2379  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2380  << " LHS:" << *CmpIndVar << '\n'
2381  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2382  << "\n"
2383  << " RHS:\t" << *ExitCnt << "\n"
2384  << " IVCount:\t" << *IVCount << "\n");
2385 
2386  IRBuilder<> Builder(BI);
2387 
2388  // The new loop exit condition should reuse the debug location of the
2389  // original loop exit condition.
2390  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2391  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2392 
2393  // LFTR can ignore IV overflow and truncate to the width of
2394  // BECount. This avoids materializing the add(zext(add)) expression.
2395  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2396  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2397  if (CmpIndVarSize > ExitCntSize) {
2398  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2399  const SCEV *ARStart = AR->getStart();
2400  const SCEV *ARStep = AR->getStepRecurrence(*SE);
2401  // For constant IVCount, avoid truncation.
2402  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2403  const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2404  APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2405  // Note that the post-inc value of BackedgeTakenCount may have overflowed
2406  // above such that IVCount is now zero.
2407  if (IVCount != BackedgeTakenCount && Count == 0) {
2408  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2409  ++Count;
2410  }
2411  else
2412  Count = Count.zext(CmpIndVarSize);
2413  APInt NewLimit;
2414  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2415  NewLimit = Start - Count;
2416  else
2417  NewLimit = Start + Count;
2418  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2419 
2420  LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
2421  } else {
2422  // We try to extend trip count first. If that doesn't work we truncate IV.
2423  // Zext(trunc(IV)) == IV implies equivalence of the following two:
2424  // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2425  // one of the two holds, extend the trip count, otherwise we truncate IV.
2426  bool Extended = false;
2427  const SCEV *IV = SE->getSCEV(CmpIndVar);
2428  const SCEV *ZExtTrunc =
2429  SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2430  ExitCnt->getType()),
2431  CmpIndVar->getType());
2432 
2433  if (ZExtTrunc == IV) {
2434  Extended = true;
2435  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2436  "wide.trip.count");
2437  } else {
2438  const SCEV *SExtTrunc =
2439  SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2440  ExitCnt->getType()),
2441  CmpIndVar->getType());
2442  if (SExtTrunc == IV) {
2443  Extended = true;
2444  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2445  "wide.trip.count");
2446  }
2447  }
2448 
2449  if (!Extended)
2450  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2451  "lftr.wideiv");
2452  }
2453  }
2454  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2455  Value *OrigCond = BI->getCondition();
2456  // It's tempting to use replaceAllUsesWith here to fully replace the old
2457  // comparison, but that's not immediately safe, since users of the old
2458  // comparison may not be dominated by the new comparison. Instead, just
2459  // update the branch to use the new comparison; in the common case this
2460  // will make old comparison dead.
2461  BI->setCondition(Cond);
2462  DeadInsts.push_back(OrigCond);
2463 
2464  ++NumLFTR;
2465  return true;
2466 }
2467 
2468 //===----------------------------------------------------------------------===//
2469 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2470 //===----------------------------------------------------------------------===//
2471 
2472 /// If there's a single exit block, sink any loop-invariant values that
2473 /// were defined in the preheader but not used inside the loop into the
2474 /// exit block to reduce register pressure in the loop.
2475 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2476  BasicBlock *ExitBlock = L->getExitBlock();
2477  if (!ExitBlock) return false;
2478 
2479  BasicBlock *Preheader = L->getLoopPreheader();
2480  if (!Preheader) return false;
2481 
2482  bool MadeAnyChanges = false;
2483  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2484  BasicBlock::iterator I(Preheader->getTerminator());
2485  while (I != Preheader->begin()) {
2486  --I;
2487  // New instructions were inserted at the end of the preheader.
2488  if (isa<PHINode>(I))
2489  break;
2490 
2491  // Don't move instructions which might have side effects, since the side
2492  // effects need to complete before instructions inside the loop. Also don't
2493  // move instructions which might read memory, since the loop may modify
2494  // memory. Note that it's okay if the instruction might have undefined
2495  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2496  // block.
2497  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2498  continue;
2499 
2500  // Skip debug info intrinsics.
2501  if (isa<DbgInfoIntrinsic>(I))
2502  continue;
2503 
2504  // Skip eh pad instructions.
2505  if (I->isEHPad())
2506  continue;
2507 
2508  // Don't sink alloca: we never want to sink static alloca's out of the
2509  // entry block, and correctly sinking dynamic alloca's requires
2510  // checks for stacksave/stackrestore intrinsics.
2511  // FIXME: Refactor this check somehow?
2512  if (isa<AllocaInst>(I))
2513  continue;
2514 
2515  // Determine if there is a use in or before the loop (direct or
2516  // otherwise).
2517  bool UsedInLoop = false;
2518  for (Use &U : I->uses()) {
2519  Instruction *User = cast<Instruction>(U.getUser());
2520  BasicBlock *UseBB = User->getParent();
2521  if (PHINode *P = dyn_cast<PHINode>(User)) {
2522  unsigned i =
2523  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2524  UseBB = P->getIncomingBlock(i);
2525  }
2526  if (UseBB == Preheader || L->contains(UseBB)) {
2527  UsedInLoop = true;
2528  break;
2529  }
2530  }
2531 
2532  // If there is, the def must remain in the preheader.
2533  if (UsedInLoop)
2534  continue;
2535 
2536  // Otherwise, sink it to the exit block.
2537  Instruction *ToMove = &*I;
2538  bool Done = false;
2539 
2540  if (I != Preheader->begin()) {
2541  // Skip debug info intrinsics.
2542  do {
2543  --I;
2544  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2545 
2546  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2547  Done = true;
2548  } else {
2549  Done = true;
2550  }
2551 
2552  MadeAnyChanges = true;
2553  ToMove->moveBefore(*ExitBlock, InsertPt);
2554  if (Done) break;
2555  InsertPt = ToMove->getIterator();
2556  }
2557 
2558  return MadeAnyChanges;
2559 }
2560 
2561 //===----------------------------------------------------------------------===//
2562 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2563 //===----------------------------------------------------------------------===//
2564 
2565 bool IndVarSimplify::run(Loop *L) {
2566  // We need (and expect!) the incoming loop to be in LCSSA.
2567  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2568  "LCSSA required to run indvars!");
2569  bool Changed = false;
2570 
2571  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2572  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2573  // canonicalization can be a pessimization without LSR to "clean up"
2574  // afterwards.
2575  // - We depend on having a preheader; in particular,
2576  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2577  // and we're in trouble if we can't find the induction variable even when
2578  // we've manually inserted one.
2579  // - LFTR relies on having a single backedge.
2580  if (!L->isLoopSimplifyForm())
2581  return false;
2582 
2583  // If there are any floating-point recurrences, attempt to
2584  // transform them to use integer recurrences.
2585  Changed |= rewriteNonIntegerIVs(L);
2586 
2587  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2588 
2589  // Create a rewriter object which we'll use to transform the code with.
2590  SCEVExpander Rewriter(*SE, DL, "indvars");
2591 #ifndef NDEBUG
2592  Rewriter.setDebugType(DEBUG_TYPE);
2593 #endif
2594 
2595  // Eliminate redundant IV users.
2596  //
2597  // Simplification works best when run before other consumers of SCEV. We
2598  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2599  // other expressions involving loop IVs have been evaluated. This helps SCEV
2600  // set no-wrap flags before normalizing sign/zero extension.
2601  Rewriter.disableCanonicalMode();
2602  Changed |= simplifyAndExtend(L, Rewriter, LI);
2603 
2604  // Check to see if this loop has a computable loop-invariant execution count.
2605  // If so, this means that we can compute the final value of any expressions
2606  // that are recurrent in the loop, and substitute the exit values from the
2607  // loop into any instructions outside of the loop that use the final values of
2608  // the current expressions.
2609  //
2610  if (ReplaceExitValue != NeverRepl &&
2611  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2612  Changed |= rewriteLoopExitValues(L, Rewriter);
2613 
2614  // Eliminate redundant IV cycles.
2615  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2616 
2617  // If we have a trip count expression, rewrite the loop's exit condition
2618  // using it. We can currently only handle loops with a single exit.
2619  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2620  needsLFTR(L, DT)) {
2621  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2622  if (IndVar) {
2623  // Check preconditions for proper SCEVExpander operation. SCEV does not
2624  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2625  // pass that uses the SCEVExpander must do it. This does not work well for
2626  // loop passes because SCEVExpander makes assumptions about all loops,
2627  // while LoopPassManager only forces the current loop to be simplified.
2628  //
2629  // FIXME: SCEV expansion has no way to bail out, so the caller must
2630  // explicitly check any assumptions made by SCEV. Brittle.
2631  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2632  if (!AR || AR->getLoop()->getLoopPreheader())
2633  Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2634  Rewriter);
2635  }
2636  }
2637  // Clear the rewriter cache, because values that are in the rewriter's cache
2638  // can be deleted in the loop below, causing the AssertingVH in the cache to
2639  // trigger.
2640  Rewriter.clear();
2641 
2642  // Now that we're done iterating through lists, clean up any instructions
2643  // which are now dead.
2644  while (!DeadInsts.empty())
2645  if (Instruction *Inst =
2646  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2647  Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2648 
2649  // The Rewriter may not be used from this point on.
2650 
2651  // Loop-invariant instructions in the preheader that aren't used in the
2652  // loop may be sunk below the loop to reduce register pressure.
2653  Changed |= sinkUnusedInvariants(L);
2654 
2655  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2656  // trip count and therefore can further simplify exit values in addition to
2657  // rewriteLoopExitValues.
2658  Changed |= rewriteFirstIterationLoopExitValues(L);
2659 
2660  // Clean up dead instructions.
2661  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2662 
2663  // Check a post-condition.
2664  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2665  "Indvars did not preserve LCSSA!");
2666 
2667  // Verify that LFTR, and any other change have not interfered with SCEV's
2668  // ability to compute trip count.
2669 #ifndef NDEBUG
2670  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2671  SE->forgetLoop(L);
2672  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2673  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2674  SE->getTypeSizeInBits(NewBECount->getType()))
2675  NewBECount = SE->getTruncateOrNoop(NewBECount,
2676  BackedgeTakenCount->getType());
2677  else
2678  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2679  NewBECount->getType());
2680  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2681  }
2682 #endif
2683 
2684  return Changed;
2685 }
2686 
2689  LPMUpdater &) {
2690  Function *F = L.getHeader()->getParent();
2691  const DataLayout &DL = F->getParent()->getDataLayout();
2692 
2693  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2694  if (!IVS.run(&L))
2695  return PreservedAnalyses::all();
2696 
2697  auto PA = getLoopPassPreservedAnalyses();
2698  PA.preserveSet<CFGAnalyses>();
2699  return PA;
2700 }
2701 
2702 namespace {
2703 
2704 struct IndVarSimplifyLegacyPass : public LoopPass {
2705  static char ID; // Pass identification, replacement for typeid
2706 
2707  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2709  }
2710 
2711  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2712  if (skipLoop(L))
2713  return false;
2714 
2715  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2716  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2717  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2718  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2719  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2720  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2721  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2722  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2723 
2724  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2725  return IVS.run(L);
2726  }
2727 
2728  void getAnalysisUsage(AnalysisUsage &AU) const override {
2729  AU.setPreservesCFG();
2731  }
2732 };
2733 
2734 } // end anonymous namespace
2735 
2737 
2738 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2739  "Induction Variable Simplification", false, false)
2741 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2742  "Induction Variable Simplification", false, false)
2743 
2745  return new IndVarSimplifyLegacyPass();
2746 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1949
iterator_range< use_iterator > uses()
Definition: Value.h:355
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
Perform a quick domtree based check for loop invariance assuming that V is used within the loop...
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:503
This class represents lattice values for constants.
Definition: AllocatorList.h:24
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:184
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:177
This class represents zero extension of integer types.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:662
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1663
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
This class represents a sign extension of integer types.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:64
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:657
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Help linearFunctionTestReplace by generating a value that holds the RHS of the new loop test...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isSigned() const
Definition: InstrTypes.h:816
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:626
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:745
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:690
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
Key
PAL metadata keys.
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1495
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:606
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
This node represents a polynomial recurrence on the trip count of the specified loop.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:151
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1659
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:176
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:217
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:308
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
Return true if this loop&#39;s backedge taken count expression can be safely and cheaply expanded into an...
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:643
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:281
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:562
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:502
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:67
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:238
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
static unsigned getIncomingValueNumForOperand(unsigned i)
size_t size() const
Definition: SmallVector.h:53
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:661
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1655
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
signed greater than
Definition: InstrTypes.h:673
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:303
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1458
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:349
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:660
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:243
signed less than
Definition: InstrTypes.h:675
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
Return the loop header phi IFF IncV adds a loop invariant value to the phi.
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:238
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widen.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:812
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
signed less or equal
Definition: InstrTypes.h:676
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:400
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:96
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:115
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:530
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:618
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Virtual Register Rewriter
Definition: VirtRegMap.cpp:222
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:369
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:132
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:654
static bool needsLFTR(Loop *L, DominatorTree *DT)
linearFunctionTestReplace policy.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:325
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:658
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:349
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:659
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:251
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:573
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:100
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
A container for analyses that lazily runs them and caches their results.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool use_empty() const
Definition: Value.h:323
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:674
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:90
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Find an affine IV in canonical form.