LLVM  10.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
79 #include "llvm/Transforms/Scalar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened , "Number of indvars widened");
93 STATISTIC(NumReplaced , "Number of exit values replaced");
94 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
102  "verify-indvars", cl::Hidden,
103  cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
106 
108  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111  clEnumValN(OnlyCheapRepl, "cheap",
112  "only replace exit value when the cost is cheap"),
113  clEnumValN(NoHardUse, "noharduse",
114  "only replace exit values when loop def likely dead"),
115  clEnumValN(AlwaysRepl, "always",
116  "always replace exit value whenever possible")));
117 
119  "indvars-post-increment-ranges", cl::Hidden,
120  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121  cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125  cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 namespace {
128 
129 struct RewritePhi;
130 
131 class IndVarSimplify {
132  LoopInfo *LI;
133  ScalarEvolution *SE;
134  DominatorTree *DT;
135  const DataLayout &DL;
136  TargetLibraryInfo *TLI;
137  const TargetTransformInfo *TTI;
138 
140 
141  bool isValidRewrite(Value *FromVal, Value *ToVal);
142 
143  bool handleFloatingPointIV(Loop *L, PHINode *PH);
144  bool rewriteNonIntegerIVs(Loop *L);
145 
146  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147  bool optimizeLoopExits(Loop *L);
148 
149  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
150  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
151  bool rewriteFirstIterationLoopExitValues(Loop *L);
152  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
153 
154  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
155  const SCEV *ExitCount,
156  PHINode *IndVar, SCEVExpander &Rewriter);
157 
158  bool sinkUnusedInvariants(Loop *L);
159 
160 public:
161  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
162  const DataLayout &DL, TargetLibraryInfo *TLI,
163  TargetTransformInfo *TTI)
164  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
165 
166  bool run(Loop *L);
167 };
168 
169 } // end anonymous namespace
170 
171 /// Return true if the SCEV expansion generated by the rewriter can replace the
172 /// original value. SCEV guarantees that it produces the same value, but the way
173 /// it is produced may be illegal IR. Ideally, this function will only be
174 /// called for verification.
175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
176  // If an SCEV expression subsumed multiple pointers, its expansion could
177  // reassociate the GEP changing the base pointer. This is illegal because the
178  // final address produced by a GEP chain must be inbounds relative to its
179  // underlying object. Otherwise basic alias analysis, among other things,
180  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
181  // producing an expression involving multiple pointers. Until then, we must
182  // bail out here.
183  //
184  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
185  // because it understands lcssa phis while SCEV does not.
186  Value *FromPtr = FromVal;
187  Value *ToPtr = ToVal;
188  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
189  FromPtr = GEP->getPointerOperand();
190  }
191  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
192  ToPtr = GEP->getPointerOperand();
193  }
194  if (FromPtr != FromVal || ToPtr != ToVal) {
195  // Quickly check the common case
196  if (FromPtr == ToPtr)
197  return true;
198 
199  // SCEV may have rewritten an expression that produces the GEP's pointer
200  // operand. That's ok as long as the pointer operand has the same base
201  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
202  // base of a recurrence. This handles the case in which SCEV expansion
203  // converts a pointer type recurrence into a nonrecurrent pointer base
204  // indexed by an integer recurrence.
205 
206  // If the GEP base pointer is a vector of pointers, abort.
207  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
208  return false;
209 
210  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
211  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
212  if (FromBase == ToBase)
213  return true;
214 
215  LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
216  << " != " << *ToBase << "\n");
217 
218  return false;
219  }
220  return true;
221 }
222 
223 /// Determine the insertion point for this user. By default, insert immediately
224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
226 /// common dominator for the incoming blocks. A nullptr can be returned if no
227 /// viable location is found: it may happen if User is a PHI and Def only comes
228 /// to this PHI from unreachable blocks.
230  DominatorTree *DT, LoopInfo *LI) {
231  PHINode *PHI = dyn_cast<PHINode>(User);
232  if (!PHI)
233  return User;
234 
235  Instruction *InsertPt = nullptr;
236  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
237  if (PHI->getIncomingValue(i) != Def)
238  continue;
239 
240  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
241 
242  if (!DT->isReachableFromEntry(InsertBB))
243  continue;
244 
245  if (!InsertPt) {
246  InsertPt = InsertBB->getTerminator();
247  continue;
248  }
249  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
250  InsertPt = InsertBB->getTerminator();
251  }
252 
253  // If we have skipped all inputs, it means that Def only comes to Phi from
254  // unreachable blocks.
255  if (!InsertPt)
256  return nullptr;
257 
258  auto *DefI = dyn_cast<Instruction>(Def);
259  if (!DefI)
260  return InsertPt;
261 
262  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
263 
264  auto *L = LI->getLoopFor(DefI->getParent());
265  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
266 
267  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
268  if (LI->getLoopFor(DTN->getBlock()) == L)
269  return DTN->getBlock()->getTerminator();
270 
271  llvm_unreachable("DefI dominates InsertPt!");
272 }
273 
274 //===----------------------------------------------------------------------===//
275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
276 //===----------------------------------------------------------------------===//
277 
278 /// Convert APF to an integer, if possible.
279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
280  bool isExact = false;
281  // See if we can convert this to an int64_t
282  uint64_t UIntVal;
283  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
284  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
285  !isExact)
286  return false;
287  IntVal = UIntVal;
288  return true;
289 }
290 
291 /// If the loop has floating induction variable then insert corresponding
292 /// integer induction variable if possible.
293 /// For example,
294 /// for(double i = 0; i < 10000; ++i)
295 /// bar(i)
296 /// is converted into
297 /// for(int i = 0; i < 10000; ++i)
298 /// bar((double)i);
299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
300  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
301  unsigned BackEdge = IncomingEdge^1;
302 
303  // Check incoming value.
304  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
305 
306  int64_t InitValue;
307  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
308  return false;
309 
310  // Check IV increment. Reject this PN if increment operation is not
311  // an add or increment value can not be represented by an integer.
312  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
313  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
314 
315  // If this is not an add of the PHI with a constantfp, or if the constant fp
316  // is not an integer, bail out.
317  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
318  int64_t IncValue;
319  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
320  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
321  return false;
322 
323  // Check Incr uses. One user is PN and the other user is an exit condition
324  // used by the conditional terminator.
325  Value::user_iterator IncrUse = Incr->user_begin();
326  Instruction *U1 = cast<Instruction>(*IncrUse++);
327  if (IncrUse == Incr->user_end()) return false;
328  Instruction *U2 = cast<Instruction>(*IncrUse++);
329  if (IncrUse != Incr->user_end()) return false;
330 
331  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
332  // only used by a branch, we can't transform it.
334  if (!Compare)
335  Compare = dyn_cast<FCmpInst>(U2);
336  if (!Compare || !Compare->hasOneUse() ||
337  !isa<BranchInst>(Compare->user_back()))
338  return false;
339 
340  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
341 
342  // We need to verify that the branch actually controls the iteration count
343  // of the loop. If not, the new IV can overflow and no one will notice.
344  // The branch block must be in the loop and one of the successors must be out
345  // of the loop.
346  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
347  if (!L->contains(TheBr->getParent()) ||
348  (L->contains(TheBr->getSuccessor(0)) &&
349  L->contains(TheBr->getSuccessor(1))))
350  return false;
351 
352  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
353  // transform it.
354  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
355  int64_t ExitValue;
356  if (ExitValueVal == nullptr ||
357  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
358  return false;
359 
360  // Find new predicate for integer comparison.
362  switch (Compare->getPredicate()) {
363  default: return false; // Unknown comparison.
364  case CmpInst::FCMP_OEQ:
365  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
366  case CmpInst::FCMP_ONE:
367  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
368  case CmpInst::FCMP_OGT:
369  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
370  case CmpInst::FCMP_OGE:
371  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
372  case CmpInst::FCMP_OLT:
373  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
374  case CmpInst::FCMP_OLE:
375  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
376  }
377 
378  // We convert the floating point induction variable to a signed i32 value if
379  // we can. This is only safe if the comparison will not overflow in a way
380  // that won't be trapped by the integer equivalent operations. Check for this
381  // now.
382  // TODO: We could use i64 if it is native and the range requires it.
383 
384  // The start/stride/exit values must all fit in signed i32.
385  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
386  return false;
387 
388  // If not actually striding (add x, 0.0), avoid touching the code.
389  if (IncValue == 0)
390  return false;
391 
392  // Positive and negative strides have different safety conditions.
393  if (IncValue > 0) {
394  // If we have a positive stride, we require the init to be less than the
395  // exit value.
396  if (InitValue >= ExitValue)
397  return false;
398 
399  uint32_t Range = uint32_t(ExitValue-InitValue);
400  // Check for infinite loop, either:
401  // while (i <= Exit) or until (i > Exit)
402  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
403  if (++Range == 0) return false; // Range overflows.
404  }
405 
406  unsigned Leftover = Range % uint32_t(IncValue);
407 
408  // If this is an equality comparison, we require that the strided value
409  // exactly land on the exit value, otherwise the IV condition will wrap
410  // around and do things the fp IV wouldn't.
411  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412  Leftover != 0)
413  return false;
414 
415  // If the stride would wrap around the i32 before exiting, we can't
416  // transform the IV.
417  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
418  return false;
419  } else {
420  // If we have a negative stride, we require the init to be greater than the
421  // exit value.
422  if (InitValue <= ExitValue)
423  return false;
424 
425  uint32_t Range = uint32_t(InitValue-ExitValue);
426  // Check for infinite loop, either:
427  // while (i >= Exit) or until (i < Exit)
428  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
429  if (++Range == 0) return false; // Range overflows.
430  }
431 
432  unsigned Leftover = Range % uint32_t(-IncValue);
433 
434  // If this is an equality comparison, we require that the strided value
435  // exactly land on the exit value, otherwise the IV condition will wrap
436  // around and do things the fp IV wouldn't.
437  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
438  Leftover != 0)
439  return false;
440 
441  // If the stride would wrap around the i32 before exiting, we can't
442  // transform the IV.
443  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
444  return false;
445  }
446 
448 
449  // Insert new integer induction variable.
450  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
451  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
452  PN->getIncomingBlock(IncomingEdge));
453 
454  Value *NewAdd =
455  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
456  Incr->getName()+".int", Incr);
457  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
458 
459  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
460  ConstantInt::get(Int32Ty, ExitValue),
461  Compare->getName());
462 
463  // In the following deletions, PN may become dead and may be deleted.
464  // Use a WeakTrackingVH to observe whether this happens.
465  WeakTrackingVH WeakPH = PN;
466 
467  // Delete the old floating point exit comparison. The branch starts using the
468  // new comparison.
469  NewCompare->takeName(Compare);
470  Compare->replaceAllUsesWith(NewCompare);
472 
473  // Delete the old floating point increment.
474  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
476 
477  // If the FP induction variable still has uses, this is because something else
478  // in the loop uses its value. In order to canonicalize the induction
479  // variable, we chose to eliminate the IV and rewrite it in terms of an
480  // int->fp cast.
481  //
482  // We give preference to sitofp over uitofp because it is faster on most
483  // platforms.
484  if (WeakPH) {
485  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486  &*PN->getParent()->getFirstInsertionPt());
487  PN->replaceAllUsesWith(Conv);
489  }
490  return true;
491 }
492 
493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
494  // First step. Check to see if there are any floating-point recurrences.
495  // If there are, change them into integer recurrences, permitting analysis by
496  // the SCEV routines.
497  BasicBlock *Header = L->getHeader();
498 
500  for (PHINode &PN : Header->phis())
501  PHIs.push_back(&PN);
502 
503  bool Changed = false;
504  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
505  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
506  Changed |= handleFloatingPointIV(L, PN);
507 
508  // If the loop previously had floating-point IV, ScalarEvolution
509  // may not have been able to compute a trip count. Now that we've done some
510  // re-writing, the trip count may be computable.
511  if (Changed)
512  SE->forgetLoop(L);
513  return Changed;
514 }
515 
516 namespace {
517 
518 // Collect information about PHI nodes which can be transformed in
519 // rewriteLoopExitValues.
520 struct RewritePhi {
521  PHINode *PN;
522 
523  // Ith incoming value.
524  unsigned Ith;
525 
526  // Exit value after expansion.
527  Value *Val;
528 
529  // High Cost when expansion.
530  bool HighCost;
531 
532  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
533  : PN(P), Ith(I), Val(V), HighCost(H) {}
534 };
535 
536 } // end anonymous namespace
537 
538 //===----------------------------------------------------------------------===//
539 // rewriteLoopExitValues - Optimize IV users outside the loop.
540 // As a side effect, reduces the amount of IV processing within the loop.
541 //===----------------------------------------------------------------------===//
542 
543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
546  Visited.insert(I);
547  WorkList.push_back(I);
548  while (!WorkList.empty()) {
549  const Instruction *Curr = WorkList.pop_back_val();
550  // This use is outside the loop, nothing to do.
551  if (!L->contains(Curr))
552  continue;
553  // Do we assume it is a "hard" use which will not be eliminated easily?
554  if (Curr->mayHaveSideEffects())
555  return true;
556  // Otherwise, add all its users to worklist.
557  for (auto U : Curr->users()) {
558  auto *UI = cast<Instruction>(U);
559  if (Visited.insert(UI).second)
560  WorkList.push_back(UI);
561  }
562  }
563  return false;
564 }
565 
566 /// Check to see if this loop has a computable loop-invariant execution count.
567 /// If so, this means that we can compute the final value of any expressions
568 /// that are recurrent in the loop, and substitute the exit values from the loop
569 /// into any instructions outside of the loop that use the final values of the
570 /// current expressions.
571 ///
572 /// This is mostly redundant with the regular IndVarSimplify activities that
573 /// happen later, except that it's more powerful in some cases, because it's
574 /// able to brute-force evaluate arbitrary instructions as long as they have
575 /// constant operands at the beginning of the loop.
576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
577  // Check a pre-condition.
578  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
579  "Indvars did not preserve LCSSA!");
580 
581  SmallVector<BasicBlock*, 8> ExitBlocks;
582  L->getUniqueExitBlocks(ExitBlocks);
583 
584  SmallVector<RewritePhi, 8> RewritePhiSet;
585  // Find all values that are computed inside the loop, but used outside of it.
586  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
587  // the exit blocks of the loop to find them.
588  for (BasicBlock *ExitBB : ExitBlocks) {
589  // If there are no PHI nodes in this exit block, then no values defined
590  // inside the loop are used on this path, skip it.
591  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
592  if (!PN) continue;
593 
594  unsigned NumPreds = PN->getNumIncomingValues();
595 
596  // Iterate over all of the PHI nodes.
597  BasicBlock::iterator BBI = ExitBB->begin();
598  while ((PN = dyn_cast<PHINode>(BBI++))) {
599  if (PN->use_empty())
600  continue; // dead use, don't replace it
601 
602  if (!SE->isSCEVable(PN->getType()))
603  continue;
604 
605  // It's necessary to tell ScalarEvolution about this explicitly so that
606  // it can walk the def-use list and forget all SCEVs, as it may not be
607  // watching the PHI itself. Once the new exit value is in place, there
608  // may not be a def-use connection between the loop and every instruction
609  // which got a SCEVAddRecExpr for that loop.
610  SE->forgetValue(PN);
611 
612  // Iterate over all of the values in all the PHI nodes.
613  for (unsigned i = 0; i != NumPreds; ++i) {
614  // If the value being merged in is not integer or is not defined
615  // in the loop, skip it.
616  Value *InVal = PN->getIncomingValue(i);
617  if (!isa<Instruction>(InVal))
618  continue;
619 
620  // If this pred is for a subloop, not L itself, skip it.
621  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
622  continue; // The Block is in a subloop, skip it.
623 
624  // Check that InVal is defined in the loop.
625  Instruction *Inst = cast<Instruction>(InVal);
626  if (!L->contains(Inst))
627  continue;
628 
629  // Okay, this instruction has a user outside of the current loop
630  // and varies predictably *inside* the loop. Evaluate the value it
631  // contains when the loop exits, if possible. We prefer to start with
632  // expressions which are true for all exits (so as to maximize
633  // expression reuse by the SCEVExpander), but resort to per-exit
634  // evaluation if that fails.
635  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
636  if (isa<SCEVCouldNotCompute>(ExitValue) ||
637  !SE->isLoopInvariant(ExitValue, L) ||
638  !isSafeToExpand(ExitValue, *SE)) {
639  // TODO: This should probably be sunk into SCEV in some way; maybe a
640  // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
641  // most SCEV expressions and other recurrence types (e.g. shift
642  // recurrences). Is there existing code we can reuse?
643  const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
644  if (isa<SCEVCouldNotCompute>(ExitCount))
645  continue;
646  if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
647  if (AddRec->getLoop() == L)
648  ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
649  if (isa<SCEVCouldNotCompute>(ExitValue) ||
650  !SE->isLoopInvariant(ExitValue, L) ||
651  !isSafeToExpand(ExitValue, *SE))
652  continue;
653  }
654 
655  // Computing the value outside of the loop brings no benefit if it is
656  // definitely used inside the loop in a way which can not be optimized
657  // away. Avoid doing so unless we know we have a value which computes
658  // the ExitValue already. TODO: This should be merged into SCEV
659  // expander to leverage its knowledge of existing expressions.
660  if (ReplaceExitValue != AlwaysRepl &&
661  !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
662  hasHardUserWithinLoop(L, Inst))
663  continue;
664 
665  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
666  Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
667 
668  LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
669  << '\n'
670  << " LoopVal = " << *Inst << "\n");
671 
672  if (!isValidRewrite(Inst, ExitVal)) {
673  DeadInsts.push_back(ExitVal);
674  continue;
675  }
676 
677 #ifndef NDEBUG
678  // If we reuse an instruction from a loop which is neither L nor one of
679  // its containing loops, we end up breaking LCSSA form for this loop by
680  // creating a new use of its instruction.
681  if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
682  if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
683  if (EVL != L)
684  assert(EVL->contains(L) && "LCSSA breach detected!");
685 #endif
686 
687  // Collect all the candidate PHINodes to be rewritten.
688  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
689  }
690  }
691  }
692 
693  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
694 
695  bool Changed = false;
696  // Transformation.
697  for (const RewritePhi &Phi : RewritePhiSet) {
698  PHINode *PN = Phi.PN;
699  Value *ExitVal = Phi.Val;
700 
701  // Only do the rewrite when the ExitValue can be expanded cheaply.
702  // If LoopCanBeDel is true, rewrite exit value aggressively.
703  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
704  DeadInsts.push_back(ExitVal);
705  continue;
706  }
707 
708  Changed = true;
709  ++NumReplaced;
710  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
711  PN->setIncomingValue(Phi.Ith, ExitVal);
712 
713  // If this instruction is dead now, delete it. Don't do it now to avoid
714  // invalidating iterators.
715  if (isInstructionTriviallyDead(Inst, TLI))
716  DeadInsts.push_back(Inst);
717 
718  // Replace PN with ExitVal if that is legal and does not break LCSSA.
719  if (PN->getNumIncomingValues() == 1 &&
720  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
721  PN->replaceAllUsesWith(ExitVal);
722  PN->eraseFromParent();
723  }
724  }
725 
726  // The insertion point instruction may have been deleted; clear it out
727  // so that the rewriter doesn't trip over it later.
728  Rewriter.clearInsertPoint();
729  return Changed;
730 }
731 
732 //===---------------------------------------------------------------------===//
733 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
734 // they will exit at the first iteration.
735 //===---------------------------------------------------------------------===//
736 
737 /// Check to see if this loop has loop invariant conditions which lead to loop
738 /// exits. If so, we know that if the exit path is taken, it is at the first
739 /// loop iteration. This lets us predict exit values of PHI nodes that live in
740 /// loop header.
741 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
742  // Verify the input to the pass is already in LCSSA form.
743  assert(L->isLCSSAForm(*DT));
744 
745  SmallVector<BasicBlock *, 8> ExitBlocks;
746  L->getUniqueExitBlocks(ExitBlocks);
747 
748  bool MadeAnyChanges = false;
749  for (auto *ExitBB : ExitBlocks) {
750  // If there are no more PHI nodes in this exit block, then no more
751  // values defined inside the loop are used on this path.
752  for (PHINode &PN : ExitBB->phis()) {
753  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
754  IncomingValIdx != E; ++IncomingValIdx) {
755  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
756 
757  // Can we prove that the exit must run on the first iteration if it
758  // runs at all? (i.e. early exits are fine for our purposes, but
759  // traces which lead to this exit being taken on the 2nd iteration
760  // aren't.) Note that this is about whether the exit branch is
761  // executed, not about whether it is taken.
762  if (!L->getLoopLatch() ||
763  !DT->dominates(IncomingBB, L->getLoopLatch()))
764  continue;
765 
766  // Get condition that leads to the exit path.
767  auto *TermInst = IncomingBB->getTerminator();
768 
769  Value *Cond = nullptr;
770  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
771  // Must be a conditional branch, otherwise the block
772  // should not be in the loop.
773  Cond = BI->getCondition();
774  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
775  Cond = SI->getCondition();
776  else
777  continue;
778 
779  if (!L->isLoopInvariant(Cond))
780  continue;
781 
782  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
783 
784  // Only deal with PHIs in the loop header.
785  if (!ExitVal || ExitVal->getParent() != L->getHeader())
786  continue;
787 
788  // If ExitVal is a PHI on the loop header, then we know its
789  // value along this exit because the exit can only be taken
790  // on the first iteration.
791  auto *LoopPreheader = L->getLoopPreheader();
792  assert(LoopPreheader && "Invalid loop");
793  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
794  if (PreheaderIdx != -1) {
795  assert(ExitVal->getParent() == L->getHeader() &&
796  "ExitVal must be in loop header");
797  MadeAnyChanges = true;
798  PN.setIncomingValue(IncomingValIdx,
799  ExitVal->getIncomingValue(PreheaderIdx));
800  }
801  }
802  }
803  }
804  return MadeAnyChanges;
805 }
806 
807 /// Check whether it is possible to delete the loop after rewriting exit
808 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
809 /// aggressively.
810 bool IndVarSimplify::canLoopBeDeleted(
811  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
812  BasicBlock *Preheader = L->getLoopPreheader();
813  // If there is no preheader, the loop will not be deleted.
814  if (!Preheader)
815  return false;
816 
817  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
818  // We obviate multiple ExitingBlocks case for simplicity.
819  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
820  // after exit value rewriting, we can enhance the logic here.
821  SmallVector<BasicBlock *, 4> ExitingBlocks;
822  L->getExitingBlocks(ExitingBlocks);
823  SmallVector<BasicBlock *, 8> ExitBlocks;
824  L->getUniqueExitBlocks(ExitBlocks);
825  if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
826  return false;
827 
828  BasicBlock *ExitBlock = ExitBlocks[0];
829  BasicBlock::iterator BI = ExitBlock->begin();
830  while (PHINode *P = dyn_cast<PHINode>(BI)) {
831  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
832 
833  // If the Incoming value of P is found in RewritePhiSet, we know it
834  // could be rewritten to use a loop invariant value in transformation
835  // phase later. Skip it in the loop invariant check below.
836  bool found = false;
837  for (const RewritePhi &Phi : RewritePhiSet) {
838  unsigned i = Phi.Ith;
839  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
840  found = true;
841  break;
842  }
843  }
844 
845  Instruction *I;
846  if (!found && (I = dyn_cast<Instruction>(Incoming)))
847  if (!L->hasLoopInvariantOperands(I))
848  return false;
849 
850  ++BI;
851  }
852 
853  for (auto *BB : L->blocks())
854  if (llvm::any_of(*BB, [](Instruction &I) {
855  return I.mayHaveSideEffects();
856  }))
857  return false;
858 
859  return true;
860 }
861 
862 //===----------------------------------------------------------------------===//
863 // IV Widening - Extend the width of an IV to cover its widest uses.
864 //===----------------------------------------------------------------------===//
865 
866 namespace {
867 
868 // Collect information about induction variables that are used by sign/zero
869 // extend operations. This information is recorded by CollectExtend and provides
870 // the input to WidenIV.
871 struct WideIVInfo {
872  PHINode *NarrowIV = nullptr;
873 
874  // Widest integer type created [sz]ext
875  Type *WidestNativeType = nullptr;
876 
877  // Was a sext user seen before a zext?
878  bool IsSigned = false;
879 };
880 
881 } // end anonymous namespace
882 
883 /// Update information about the induction variable that is extended by this
884 /// sign or zero extend operation. This is used to determine the final width of
885 /// the IV before actually widening it.
886 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
887  const TargetTransformInfo *TTI) {
888  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
889  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
890  return;
891 
892  Type *Ty = Cast->getType();
893  uint64_t Width = SE->getTypeSizeInBits(Ty);
894  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
895  return;
896 
897  // Check that `Cast` actually extends the induction variable (we rely on this
898  // later). This takes care of cases where `Cast` is extending a truncation of
899  // the narrow induction variable, and thus can end up being narrower than the
900  // "narrow" induction variable.
901  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
902  if (NarrowIVWidth >= Width)
903  return;
904 
905  // Cast is either an sext or zext up to this point.
906  // We should not widen an indvar if arithmetics on the wider indvar are more
907  // expensive than those on the narrower indvar. We check only the cost of ADD
908  // because at least an ADD is required to increment the induction variable. We
909  // could compute more comprehensively the cost of all instructions on the
910  // induction variable when necessary.
911  if (TTI &&
914  Cast->getOperand(0)->getType())) {
915  return;
916  }
917 
918  if (!WI.WidestNativeType) {
919  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
920  WI.IsSigned = IsSigned;
921  return;
922  }
923 
924  // We extend the IV to satisfy the sign of its first user, arbitrarily.
925  if (WI.IsSigned != IsSigned)
926  return;
927 
928  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
929  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
930 }
931 
932 namespace {
933 
934 /// Record a link in the Narrow IV def-use chain along with the WideIV that
935 /// computes the same value as the Narrow IV def. This avoids caching Use*
936 /// pointers.
937 struct NarrowIVDefUse {
938  Instruction *NarrowDef = nullptr;
939  Instruction *NarrowUse = nullptr;
940  Instruction *WideDef = nullptr;
941 
942  // True if the narrow def is never negative. Tracking this information lets
943  // us use a sign extension instead of a zero extension or vice versa, when
944  // profitable and legal.
945  bool NeverNegative = false;
946 
947  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
948  bool NeverNegative)
949  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
950  NeverNegative(NeverNegative) {}
951 };
952 
953 /// The goal of this transform is to remove sign and zero extends without
954 /// creating any new induction variables. To do this, it creates a new phi of
955 /// the wider type and redirects all users, either removing extends or inserting
956 /// truncs whenever we stop propagating the type.
957 class WidenIV {
958  // Parameters
959  PHINode *OrigPhi;
960  Type *WideType;
961 
962  // Context
963  LoopInfo *LI;
964  Loop *L;
965  ScalarEvolution *SE;
966  DominatorTree *DT;
967 
968  // Does the module have any calls to the llvm.experimental.guard intrinsic
969  // at all? If not we can avoid scanning instructions looking for guards.
970  bool HasGuards;
971 
972  // Result
973  PHINode *WidePhi = nullptr;
974  Instruction *WideInc = nullptr;
975  const SCEV *WideIncExpr = nullptr;
977 
979  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
980 
981  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
982 
983  // A map tracking the kind of extension used to widen each narrow IV
984  // and narrow IV user.
985  // Key: pointer to a narrow IV or IV user.
986  // Value: the kind of extension used to widen this Instruction.
987  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
988 
989  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
990 
991  // A map with control-dependent ranges for post increment IV uses. The key is
992  // a pair of IV def and a use of this def denoting the context. The value is
993  // a ConstantRange representing possible values of the def at the given
994  // context.
995  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
996 
997  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
998  Instruction *UseI) {
999  DefUserPair Key(Def, UseI);
1000  auto It = PostIncRangeInfos.find(Key);
1001  return It == PostIncRangeInfos.end()
1003  : Optional<ConstantRange>(It->second);
1004  }
1005 
1006  void calculatePostIncRanges(PHINode *OrigPhi);
1007  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1008 
1009  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1010  DefUserPair Key(Def, UseI);
1011  auto It = PostIncRangeInfos.find(Key);
1012  if (It == PostIncRangeInfos.end())
1013  PostIncRangeInfos.insert({Key, R});
1014  else
1015  It->second = R.intersectWith(It->second);
1016  }
1017 
1018 public:
1019  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1021  bool HasGuards)
1022  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1023  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1024  HasGuards(HasGuards), DeadInsts(DI) {
1025  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1026  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1027  }
1028 
1029  PHINode *createWideIV(SCEVExpander &Rewriter);
1030 
1031 protected:
1032  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1033  Instruction *Use);
1034 
1035  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1036  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1037  const SCEVAddRecExpr *WideAR);
1038  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1039 
1040  ExtendKind getExtendKind(Instruction *I);
1041 
1042  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1043 
1044  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1045 
1046  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1047 
1048  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1049  unsigned OpCode) const;
1050 
1051  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1052 
1053  bool widenLoopCompare(NarrowIVDefUse DU);
1054  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1055  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1056 
1057  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1058 };
1059 
1060 } // end anonymous namespace
1061 
1062 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1063  bool IsSigned, Instruction *Use) {
1064  // Set the debug location and conservative insertion point.
1065  IRBuilder<> Builder(Use);
1066  // Hoist the insertion point into loop preheaders as far as possible.
1067  for (const Loop *L = LI->getLoopFor(Use->getParent());
1068  L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1069  L = L->getParentLoop())
1071 
1072  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1073  Builder.CreateZExt(NarrowOper, WideType);
1074 }
1075 
1076 /// Instantiate a wide operation to replace a narrow operation. This only needs
1077 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1078 /// 0 for any operation we decide not to clone.
1079 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1080  const SCEVAddRecExpr *WideAR) {
1081  unsigned Opcode = DU.NarrowUse->getOpcode();
1082  switch (Opcode) {
1083  default:
1084  return nullptr;
1085  case Instruction::Add:
1086  case Instruction::Mul:
1087  case Instruction::UDiv:
1088  case Instruction::Sub:
1089  return cloneArithmeticIVUser(DU, WideAR);
1090 
1091  case Instruction::And:
1092  case Instruction::Or:
1093  case Instruction::Xor:
1094  case Instruction::Shl:
1095  case Instruction::LShr:
1096  case Instruction::AShr:
1097  return cloneBitwiseIVUser(DU);
1098  }
1099 }
1100 
1101 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1102  Instruction *NarrowUse = DU.NarrowUse;
1103  Instruction *NarrowDef = DU.NarrowDef;
1104  Instruction *WideDef = DU.WideDef;
1105 
1106  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1107 
1108  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1109  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1110  // invariant and will be folded or hoisted. If it actually comes from a
1111  // widened IV, it should be removed during a future call to widenIVUse.
1112  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1113  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1114  ? WideDef
1115  : createExtendInst(NarrowUse->getOperand(0), WideType,
1116  IsSigned, NarrowUse);
1117  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1118  ? WideDef
1119  : createExtendInst(NarrowUse->getOperand(1), WideType,
1120  IsSigned, NarrowUse);
1121 
1122  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1123  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1124  NarrowBO->getName());
1125  IRBuilder<> Builder(NarrowUse);
1126  Builder.Insert(WideBO);
1127  WideBO->copyIRFlags(NarrowBO);
1128  return WideBO;
1129 }
1130 
1131 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1132  const SCEVAddRecExpr *WideAR) {
1133  Instruction *NarrowUse = DU.NarrowUse;
1134  Instruction *NarrowDef = DU.NarrowDef;
1135  Instruction *WideDef = DU.WideDef;
1136 
1137  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1138 
1139  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1140 
1141  // We're trying to find X such that
1142  //
1143  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1144  //
1145  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1146  // and check using SCEV if any of them are correct.
1147 
1148  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1149  // correct solution to X.
1150  auto GuessNonIVOperand = [&](bool SignExt) {
1151  const SCEV *WideLHS;
1152  const SCEV *WideRHS;
1153 
1154  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1155  if (SignExt)
1156  return SE->getSignExtendExpr(S, Ty);
1157  return SE->getZeroExtendExpr(S, Ty);
1158  };
1159 
1160  if (IVOpIdx == 0) {
1161  WideLHS = SE->getSCEV(WideDef);
1162  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1163  WideRHS = GetExtend(NarrowRHS, WideType);
1164  } else {
1165  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1166  WideLHS = GetExtend(NarrowLHS, WideType);
1167  WideRHS = SE->getSCEV(WideDef);
1168  }
1169 
1170  // WideUse is "WideDef `op.wide` X" as described in the comment.
1171  const SCEV *WideUse = nullptr;
1172 
1173  switch (NarrowUse->getOpcode()) {
1174  default:
1175  llvm_unreachable("No other possibility!");
1176 
1177  case Instruction::Add:
1178  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1179  break;
1180 
1181  case Instruction::Mul:
1182  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1183  break;
1184 
1185  case Instruction::UDiv:
1186  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1187  break;
1188 
1189  case Instruction::Sub:
1190  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1191  break;
1192  }
1193 
1194  return WideUse == WideAR;
1195  };
1196 
1197  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1198  if (!GuessNonIVOperand(SignExtend)) {
1199  SignExtend = !SignExtend;
1200  if (!GuessNonIVOperand(SignExtend))
1201  return nullptr;
1202  }
1203 
1204  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1205  ? WideDef
1206  : createExtendInst(NarrowUse->getOperand(0), WideType,
1207  SignExtend, NarrowUse);
1208  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1209  ? WideDef
1210  : createExtendInst(NarrowUse->getOperand(1), WideType,
1211  SignExtend, NarrowUse);
1212 
1213  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1214  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1215  NarrowBO->getName());
1216 
1217  IRBuilder<> Builder(NarrowUse);
1218  Builder.Insert(WideBO);
1219  WideBO->copyIRFlags(NarrowBO);
1220  return WideBO;
1221 }
1222 
1223 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1224  auto It = ExtendKindMap.find(I);
1225  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1226  return It->second;
1227 }
1228 
1229 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1230  unsigned OpCode) const {
1231  if (OpCode == Instruction::Add)
1232  return SE->getAddExpr(LHS, RHS);
1233  if (OpCode == Instruction::Sub)
1234  return SE->getMinusSCEV(LHS, RHS);
1235  if (OpCode == Instruction::Mul)
1236  return SE->getMulExpr(LHS, RHS);
1237 
1238  llvm_unreachable("Unsupported opcode.");
1239 }
1240 
1241 /// No-wrap operations can transfer sign extension of their result to their
1242 /// operands. Generate the SCEV value for the widened operation without
1243 /// actually modifying the IR yet. If the expression after extending the
1244 /// operands is an AddRec for this loop, return the AddRec and the kind of
1245 /// extension used.
1246 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1247  // Handle the common case of add<nsw/nuw>
1248  const unsigned OpCode = DU.NarrowUse->getOpcode();
1249  // Only Add/Sub/Mul instructions supported yet.
1250  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1251  OpCode != Instruction::Mul)
1252  return {nullptr, Unknown};
1253 
1254  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1255  // if extending the other will lead to a recurrence.
1256  const unsigned ExtendOperIdx =
1257  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1258  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1259 
1260  const SCEV *ExtendOperExpr = nullptr;
1261  const OverflowingBinaryOperator *OBO =
1262  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1263  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1264  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1265  ExtendOperExpr = SE->getSignExtendExpr(
1266  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1267  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1268  ExtendOperExpr = SE->getZeroExtendExpr(
1269  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1270  else
1271  return {nullptr, Unknown};
1272 
1273  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1274  // flags. This instruction may be guarded by control flow that the no-wrap
1275  // behavior depends on. Non-control-equivalent instructions can be mapped to
1276  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1277  // semantics to those operations.
1278  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1279  const SCEV *rhs = ExtendOperExpr;
1280 
1281  // Let's swap operands to the initial order for the case of non-commutative
1282  // operations, like SUB. See PR21014.
1283  if (ExtendOperIdx == 0)
1284  std::swap(lhs, rhs);
1285  const SCEVAddRecExpr *AddRec =
1286  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1287 
1288  if (!AddRec || AddRec->getLoop() != L)
1289  return {nullptr, Unknown};
1290 
1291  return {AddRec, ExtKind};
1292 }
1293 
1294 /// Is this instruction potentially interesting for further simplification after
1295 /// widening it's type? In other words, can the extend be safely hoisted out of
1296 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1297 /// so, return the extended recurrence and the kind of extension used. Otherwise
1298 /// return {nullptr, Unknown}.
1299 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1300  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1301  return {nullptr, Unknown};
1302 
1303  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1304  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1305  SE->getTypeSizeInBits(WideType)) {
1306  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1307  // index. So don't follow this use.
1308  return {nullptr, Unknown};
1309  }
1310 
1311  const SCEV *WideExpr;
1312  ExtendKind ExtKind;
1313  if (DU.NeverNegative) {
1314  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1315  if (isa<SCEVAddRecExpr>(WideExpr))
1316  ExtKind = SignExtended;
1317  else {
1318  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1319  ExtKind = ZeroExtended;
1320  }
1321  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1322  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1323  ExtKind = SignExtended;
1324  } else {
1325  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1326  ExtKind = ZeroExtended;
1327  }
1328  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1329  if (!AddRec || AddRec->getLoop() != L)
1330  return {nullptr, Unknown};
1331  return {AddRec, ExtKind};
1332 }
1333 
1334 /// This IV user cannot be widened. Replace this use of the original narrow IV
1335 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1336 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1337  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1338  if (!InsertPt)
1339  return;
1340  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1341  << *DU.NarrowUse << "\n");
1342  IRBuilder<> Builder(InsertPt);
1343  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1344  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1345 }
1346 
1347 /// If the narrow use is a compare instruction, then widen the compare
1348 // (and possibly the other operand). The extend operation is hoisted into the
1349 // loop preheader as far as possible.
1350 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1351  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1352  if (!Cmp)
1353  return false;
1354 
1355  // We can legally widen the comparison in the following two cases:
1356  //
1357  // - The signedness of the IV extension and comparison match
1358  //
1359  // - The narrow IV is always positive (and thus its sign extension is equal
1360  // to its zero extension). For instance, let's say we're zero extending
1361  // %narrow for the following use
1362  //
1363  // icmp slt i32 %narrow, %val ... (A)
1364  //
1365  // and %narrow is always positive. Then
1366  //
1367  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1368  // == icmp slt i32 zext(%narrow), sext(%val)
1369  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1370  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1371  return false;
1372 
1373  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1374  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1375  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1376  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1377 
1378  // Widen the compare instruction.
1379  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1380  if (!InsertPt)
1381  return false;
1382  IRBuilder<> Builder(InsertPt);
1383  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1384 
1385  // Widen the other operand of the compare, if necessary.
1386  if (CastWidth < IVWidth) {
1387  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1388  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1389  }
1390  return true;
1391 }
1392 
1393 /// If the narrow use is an instruction whose two operands are the defining
1394 /// instruction of DU and a load instruction, then we have the following:
1395 /// if the load is hoisted outside the loop, then we do not reach this function
1396 /// as scalar evolution analysis works fine in widenIVUse with variables
1397 /// hoisted outside the loop and efficient code is subsequently generated by
1398 /// not emitting truncate instructions. But when the load is not hoisted
1399 /// (whether due to limitation in alias analysis or due to a true legality),
1400 /// then scalar evolution can not proceed with loop variant values and
1401 /// inefficient code is generated. This function handles the non-hoisted load
1402 /// special case by making the optimization generate the same type of code for
1403 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1404 /// instruction). This special case is important especially when the induction
1405 /// variables are affecting addressing mode in code generation.
1406 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1407  Instruction *NarrowUse = DU.NarrowUse;
1408  Instruction *NarrowDef = DU.NarrowDef;
1409  Instruction *WideDef = DU.WideDef;
1410 
1411  // Handle the common case of add<nsw/nuw>
1412  const unsigned OpCode = NarrowUse->getOpcode();
1413  // Only Add/Sub/Mul instructions are supported.
1414  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1415  OpCode != Instruction::Mul)
1416  return false;
1417 
1418  // The operand that is not defined by NarrowDef of DU. Let's call it the
1419  // other operand.
1420  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1421  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1422  "bad DU");
1423 
1424  const SCEV *ExtendOperExpr = nullptr;
1425  const OverflowingBinaryOperator *OBO =
1426  cast<OverflowingBinaryOperator>(NarrowUse);
1427  ExtendKind ExtKind = getExtendKind(NarrowDef);
1428  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1429  ExtendOperExpr = SE->getSignExtendExpr(
1430  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1431  else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1432  ExtendOperExpr = SE->getZeroExtendExpr(
1433  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1434  else
1435  return false;
1436 
1437  // We are interested in the other operand being a load instruction.
1438  // But, we should look into relaxing this restriction later on.
1439  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1440  if (I && I->getOpcode() != Instruction::Load)
1441  return false;
1442 
1443  // Verifying that Defining operand is an AddRec
1444  const SCEV *Op1 = SE->getSCEV(WideDef);
1445  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1446  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1447  return false;
1448  // Verifying that other operand is an Extend.
1449  if (ExtKind == SignExtended) {
1450  if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1451  return false;
1452  } else {
1453  if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1454  return false;
1455  }
1456 
1457  if (ExtKind == SignExtended) {
1458  for (Use &U : NarrowUse->uses()) {
1459  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1460  if (!User || User->getType() != WideType)
1461  return false;
1462  }
1463  } else { // ExtKind == ZeroExtended
1464  for (Use &U : NarrowUse->uses()) {
1465  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1466  if (!User || User->getType() != WideType)
1467  return false;
1468  }
1469  }
1470 
1471  return true;
1472 }
1473 
1474 /// Special Case for widening with variant Loads (see
1475 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1476 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1477  Instruction *NarrowUse = DU.NarrowUse;
1478  Instruction *NarrowDef = DU.NarrowDef;
1479  Instruction *WideDef = DU.WideDef;
1480 
1481  ExtendKind ExtKind = getExtendKind(NarrowDef);
1482 
1483  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1484 
1485  // Generating a widening use instruction.
1486  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1487  ? WideDef
1488  : createExtendInst(NarrowUse->getOperand(0), WideType,
1489  ExtKind, NarrowUse);
1490  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1491  ? WideDef
1492  : createExtendInst(NarrowUse->getOperand(1), WideType,
1493  ExtKind, NarrowUse);
1494 
1495  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1496  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1497  NarrowBO->getName());
1498  IRBuilder<> Builder(NarrowUse);
1499  Builder.Insert(WideBO);
1500  WideBO->copyIRFlags(NarrowBO);
1501 
1502  if (ExtKind == SignExtended)
1503  ExtendKindMap[NarrowUse] = SignExtended;
1504  else
1505  ExtendKindMap[NarrowUse] = ZeroExtended;
1506 
1507  // Update the Use.
1508  if (ExtKind == SignExtended) {
1509  for (Use &U : NarrowUse->uses()) {
1510  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1511  if (User && User->getType() == WideType) {
1512  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1513  << *WideBO << "\n");
1514  ++NumElimExt;
1515  User->replaceAllUsesWith(WideBO);
1516  DeadInsts.emplace_back(User);
1517  }
1518  }
1519  } else { // ExtKind == ZeroExtended
1520  for (Use &U : NarrowUse->uses()) {
1521  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1522  if (User && User->getType() == WideType) {
1523  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1524  << *WideBO << "\n");
1525  ++NumElimExt;
1526  User->replaceAllUsesWith(WideBO);
1527  DeadInsts.emplace_back(User);
1528  }
1529  }
1530  }
1531 }
1532 
1533 /// Determine whether an individual user of the narrow IV can be widened. If so,
1534 /// return the wide clone of the user.
1535 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1536  assert(ExtendKindMap.count(DU.NarrowDef) &&
1537  "Should already know the kind of extension used to widen NarrowDef");
1538 
1539  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1540  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1541  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1542  // For LCSSA phis, sink the truncate outside the loop.
1543  // After SimplifyCFG most loop exit targets have a single predecessor.
1544  // Otherwise fall back to a truncate within the loop.
1545  if (UsePhi->getNumOperands() != 1)
1546  truncateIVUse(DU, DT, LI);
1547  else {
1548  // Widening the PHI requires us to insert a trunc. The logical place
1549  // for this trunc is in the same BB as the PHI. This is not possible if
1550  // the BB is terminated by a catchswitch.
1551  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1552  return nullptr;
1553 
1554  PHINode *WidePhi =
1555  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1556  UsePhi);
1557  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1558  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1559  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1560  UsePhi->replaceAllUsesWith(Trunc);
1561  DeadInsts.emplace_back(UsePhi);
1562  LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1563  << *WidePhi << "\n");
1564  }
1565  return nullptr;
1566  }
1567  }
1568 
1569  // This narrow use can be widened by a sext if it's non-negative or its narrow
1570  // def was widended by a sext. Same for zext.
1571  auto canWidenBySExt = [&]() {
1572  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1573  };
1574  auto canWidenByZExt = [&]() {
1575  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1576  };
1577 
1578  // Our raison d'etre! Eliminate sign and zero extension.
1579  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1580  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1581  Value *NewDef = DU.WideDef;
1582  if (DU.NarrowUse->getType() != WideType) {
1583  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1584  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1585  if (CastWidth < IVWidth) {
1586  // The cast isn't as wide as the IV, so insert a Trunc.
1587  IRBuilder<> Builder(DU.NarrowUse);
1588  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1589  }
1590  else {
1591  // A wider extend was hidden behind a narrower one. This may induce
1592  // another round of IV widening in which the intermediate IV becomes
1593  // dead. It should be very rare.
1594  LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1595  << " not wide enough to subsume " << *DU.NarrowUse
1596  << "\n");
1597  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1598  NewDef = DU.NarrowUse;
1599  }
1600  }
1601  if (NewDef != DU.NarrowUse) {
1602  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1603  << " replaced by " << *DU.WideDef << "\n");
1604  ++NumElimExt;
1605  DU.NarrowUse->replaceAllUsesWith(NewDef);
1606  DeadInsts.emplace_back(DU.NarrowUse);
1607  }
1608  // Now that the extend is gone, we want to expose it's uses for potential
1609  // further simplification. We don't need to directly inform SimplifyIVUsers
1610  // of the new users, because their parent IV will be processed later as a
1611  // new loop phi. If we preserved IVUsers analysis, we would also want to
1612  // push the uses of WideDef here.
1613 
1614  // No further widening is needed. The deceased [sz]ext had done it for us.
1615  return nullptr;
1616  }
1617 
1618  // Does this user itself evaluate to a recurrence after widening?
1619  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1620  if (!WideAddRec.first)
1621  WideAddRec = getWideRecurrence(DU);
1622 
1623  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1624  if (!WideAddRec.first) {
1625  // If use is a loop condition, try to promote the condition instead of
1626  // truncating the IV first.
1627  if (widenLoopCompare(DU))
1628  return nullptr;
1629 
1630  // We are here about to generate a truncate instruction that may hurt
1631  // performance because the scalar evolution expression computed earlier
1632  // in WideAddRec.first does not indicate a polynomial induction expression.
1633  // In that case, look at the operands of the use instruction to determine
1634  // if we can still widen the use instead of truncating its operand.
1635  if (widenWithVariantLoadUse(DU)) {
1636  widenWithVariantLoadUseCodegen(DU);
1637  return nullptr;
1638  }
1639 
1640  // This user does not evaluate to a recurrence after widening, so don't
1641  // follow it. Instead insert a Trunc to kill off the original use,
1642  // eventually isolating the original narrow IV so it can be removed.
1643  truncateIVUse(DU, DT, LI);
1644  return nullptr;
1645  }
1646  // Assume block terminators cannot evaluate to a recurrence. We can't to
1647  // insert a Trunc after a terminator if there happens to be a critical edge.
1648  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1649  "SCEV is not expected to evaluate a block terminator");
1650 
1651  // Reuse the IV increment that SCEVExpander created as long as it dominates
1652  // NarrowUse.
1653  Instruction *WideUse = nullptr;
1654  if (WideAddRec.first == WideIncExpr &&
1655  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1656  WideUse = WideInc;
1657  else {
1658  WideUse = cloneIVUser(DU, WideAddRec.first);
1659  if (!WideUse)
1660  return nullptr;
1661  }
1662  // Evaluation of WideAddRec ensured that the narrow expression could be
1663  // extended outside the loop without overflow. This suggests that the wide use
1664  // evaluates to the same expression as the extended narrow use, but doesn't
1665  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1666  // where it fails, we simply throw away the newly created wide use.
1667  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1668  LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1669  << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1670  << "\n");
1671  DeadInsts.emplace_back(WideUse);
1672  return nullptr;
1673  }
1674 
1675  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1676  // Returning WideUse pushes it on the worklist.
1677  return WideUse;
1678 }
1679 
1680 /// Add eligible users of NarrowDef to NarrowIVUsers.
1681 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1682  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1683  bool NonNegativeDef =
1684  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1685  SE->getConstant(NarrowSCEV->getType(), 0));
1686  for (User *U : NarrowDef->users()) {
1687  Instruction *NarrowUser = cast<Instruction>(U);
1688 
1689  // Handle data flow merges and bizarre phi cycles.
1690  if (!Widened.insert(NarrowUser).second)
1691  continue;
1692 
1693  bool NonNegativeUse = false;
1694  if (!NonNegativeDef) {
1695  // We might have a control-dependent range information for this context.
1696  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1697  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1698  }
1699 
1700  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1701  NonNegativeDef || NonNegativeUse);
1702  }
1703 }
1704 
1705 /// Process a single induction variable. First use the SCEVExpander to create a
1706 /// wide induction variable that evaluates to the same recurrence as the
1707 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1708 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1709 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1710 ///
1711 /// It would be simpler to delete uses as they are processed, but we must avoid
1712 /// invalidating SCEV expressions.
1713 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1714  // Is this phi an induction variable?
1715  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1716  if (!AddRec)
1717  return nullptr;
1718 
1719  // Widen the induction variable expression.
1720  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1721  ? SE->getSignExtendExpr(AddRec, WideType)
1722  : SE->getZeroExtendExpr(AddRec, WideType);
1723 
1724  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1725  "Expect the new IV expression to preserve its type");
1726 
1727  // Can the IV be extended outside the loop without overflow?
1728  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1729  if (!AddRec || AddRec->getLoop() != L)
1730  return nullptr;
1731 
1732  // An AddRec must have loop-invariant operands. Since this AddRec is
1733  // materialized by a loop header phi, the expression cannot have any post-loop
1734  // operands, so they must dominate the loop header.
1735  assert(
1736  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1737  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1738  "Loop header phi recurrence inputs do not dominate the loop");
1739 
1740  // Iterate over IV uses (including transitive ones) looking for IV increments
1741  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1742  // the increment calculate control-dependent range information basing on
1743  // dominating conditions inside of the loop (e.g. a range check inside of the
1744  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1745  //
1746  // Control-dependent range information is later used to prove that a narrow
1747  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1748  // this on demand because when pushNarrowIVUsers needs this information some
1749  // of the dominating conditions might be already widened.
1750  if (UsePostIncrementRanges)
1751  calculatePostIncRanges(OrigPhi);
1752 
1753  // The rewriter provides a value for the desired IV expression. This may
1754  // either find an existing phi or materialize a new one. Either way, we
1755  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1756  // of the phi-SCC dominates the loop entry.
1757  Instruction *InsertPt = &L->getHeader()->front();
1758  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1759 
1760  // Remembering the WideIV increment generated by SCEVExpander allows
1761  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1762  // employ a general reuse mechanism because the call above is the only call to
1763  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1764  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1765  WideInc =
1766  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1767  WideIncExpr = SE->getSCEV(WideInc);
1768  // Propagate the debug location associated with the original loop increment
1769  // to the new (widened) increment.
1770  auto *OrigInc =
1771  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1772  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1773  }
1774 
1775  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1776  ++NumWidened;
1777 
1778  // Traverse the def-use chain using a worklist starting at the original IV.
1779  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1780 
1781  Widened.insert(OrigPhi);
1782  pushNarrowIVUsers(OrigPhi, WidePhi);
1783 
1784  while (!NarrowIVUsers.empty()) {
1785  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1786 
1787  // Process a def-use edge. This may replace the use, so don't hold a
1788  // use_iterator across it.
1789  Instruction *WideUse = widenIVUse(DU, Rewriter);
1790 
1791  // Follow all def-use edges from the previous narrow use.
1792  if (WideUse)
1793  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1794 
1795  // widenIVUse may have removed the def-use edge.
1796  if (DU.NarrowDef->use_empty())
1797  DeadInsts.emplace_back(DU.NarrowDef);
1798  }
1799 
1800  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1801  // evaluate the same recurrence, we can just copy the debug info over.
1803  llvm::findDbgValues(DbgValues, OrigPhi);
1804  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1805  ValueAsMetadata::get(WidePhi));
1806  for (auto &DbgValue : DbgValues)
1807  DbgValue->setOperand(0, MDPhi);
1808  return WidePhi;
1809 }
1810 
1811 /// Calculates control-dependent range for the given def at the given context
1812 /// by looking at dominating conditions inside of the loop
1813 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1814  Instruction *NarrowUser) {
1815  using namespace llvm::PatternMatch;
1816 
1817  Value *NarrowDefLHS;
1818  const APInt *NarrowDefRHS;
1819  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1820  m_APInt(NarrowDefRHS))) ||
1821  !NarrowDefRHS->isNonNegative())
1822  return;
1823 
1824  auto UpdateRangeFromCondition = [&] (Value *Condition,
1825  bool TrueDest) {
1826  CmpInst::Predicate Pred;
1827  Value *CmpRHS;
1828  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1829  m_Value(CmpRHS))))
1830  return;
1831 
1833  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1834 
1835  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1836  auto CmpConstrainedLHSRange =
1837  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1838  auto NarrowDefRange =
1839  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1840 
1841  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1842  };
1843 
1844  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1845  if (!HasGuards)
1846  return;
1847 
1848  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1849  Ctx->getParent()->rend())) {
1850  Value *C = nullptr;
1851  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1852  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1853  }
1854  };
1855 
1856  UpdateRangeFromGuards(NarrowUser);
1857 
1858  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1859  // If NarrowUserBB is statically unreachable asking dominator queries may
1860  // yield surprising results. (e.g. the block may not have a dom tree node)
1861  if (!DT->isReachableFromEntry(NarrowUserBB))
1862  return;
1863 
1864  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1865  L->contains(DTB->getBlock());
1866  DTB = DTB->getIDom()) {
1867  auto *BB = DTB->getBlock();
1868  auto *TI = BB->getTerminator();
1869  UpdateRangeFromGuards(TI);
1870 
1871  auto *BI = dyn_cast<BranchInst>(TI);
1872  if (!BI || !BI->isConditional())
1873  continue;
1874 
1875  auto *TrueSuccessor = BI->getSuccessor(0);
1876  auto *FalseSuccessor = BI->getSuccessor(1);
1877 
1878  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1879  return BBE.isSingleEdge() &&
1880  DT->dominates(BBE, NarrowUser->getParent());
1881  };
1882 
1883  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1884  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1885 
1886  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1887  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1888  }
1889 }
1890 
1891 /// Calculates PostIncRangeInfos map for the given IV
1892 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1895  Worklist.push_back(OrigPhi);
1896  Visited.insert(OrigPhi);
1897 
1898  while (!Worklist.empty()) {
1899  Instruction *NarrowDef = Worklist.pop_back_val();
1900 
1901  for (Use &U : NarrowDef->uses()) {
1902  auto *NarrowUser = cast<Instruction>(U.getUser());
1903 
1904  // Don't go looking outside the current loop.
1905  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1906  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1907  continue;
1908 
1909  if (!Visited.insert(NarrowUser).second)
1910  continue;
1911 
1912  Worklist.push_back(NarrowUser);
1913 
1914  calculatePostIncRange(NarrowDef, NarrowUser);
1915  }
1916  }
1917 }
1918 
1919 //===----------------------------------------------------------------------===//
1920 // Live IV Reduction - Minimize IVs live across the loop.
1921 //===----------------------------------------------------------------------===//
1922 
1923 //===----------------------------------------------------------------------===//
1924 // Simplification of IV users based on SCEV evaluation.
1925 //===----------------------------------------------------------------------===//
1926 
1927 namespace {
1928 
1929 class IndVarSimplifyVisitor : public IVVisitor {
1930  ScalarEvolution *SE;
1931  const TargetTransformInfo *TTI;
1932  PHINode *IVPhi;
1933 
1934 public:
1935  WideIVInfo WI;
1936 
1937  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1938  const TargetTransformInfo *TTI,
1939  const DominatorTree *DTree)
1940  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1941  DT = DTree;
1942  WI.NarrowIV = IVPhi;
1943  }
1944 
1945  // Implement the interface used by simplifyUsersOfIV.
1946  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1947 };
1948 
1949 } // end anonymous namespace
1950 
1951 /// Iteratively perform simplification on a worklist of IV users. Each
1952 /// successive simplification may push more users which may themselves be
1953 /// candidates for simplification.
1954 ///
1955 /// Sign/Zero extend elimination is interleaved with IV simplification.
1956 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1957  SCEVExpander &Rewriter,
1958  LoopInfo *LI) {
1960 
1961  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1962  Intrinsic::getName(Intrinsic::experimental_guard));
1963  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1964 
1965  SmallVector<PHINode*, 8> LoopPhis;
1966  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1967  LoopPhis.push_back(cast<PHINode>(I));
1968  }
1969  // Each round of simplification iterates through the SimplifyIVUsers worklist
1970  // for all current phis, then determines whether any IVs can be
1971  // widened. Widening adds new phis to LoopPhis, inducing another round of
1972  // simplification on the wide IVs.
1973  bool Changed = false;
1974  while (!LoopPhis.empty()) {
1975  // Evaluate as many IV expressions as possible before widening any IVs. This
1976  // forces SCEV to set no-wrap flags before evaluating sign/zero
1977  // extension. The first time SCEV attempts to normalize sign/zero extension,
1978  // the result becomes final. So for the most predictable results, we delay
1979  // evaluation of sign/zero extend evaluation until needed, and avoid running
1980  // other SCEV based analysis prior to simplifyAndExtend.
1981  do {
1982  PHINode *CurrIV = LoopPhis.pop_back_val();
1983 
1984  // Information about sign/zero extensions of CurrIV.
1985  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1986 
1987  Changed |=
1988  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1989 
1990  if (Visitor.WI.WidestNativeType) {
1991  WideIVs.push_back(Visitor.WI);
1992  }
1993  } while(!LoopPhis.empty());
1994 
1995  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1996  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1997  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1998  Changed = true;
1999  LoopPhis.push_back(WidePhi);
2000  }
2001  }
2002  }
2003  return Changed;
2004 }
2005 
2006 //===----------------------------------------------------------------------===//
2007 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2008 //===----------------------------------------------------------------------===//
2009 
2010 /// Given an Value which is hoped to be part of an add recurance in the given
2011 /// loop, return the associated Phi node if so. Otherwise, return null. Note
2012 /// that this is less general than SCEVs AddRec checking.
2014  Instruction *IncI = dyn_cast<Instruction>(IncV);
2015  if (!IncI)
2016  return nullptr;
2017 
2018  switch (IncI->getOpcode()) {
2019  case Instruction::Add:
2020  case Instruction::Sub:
2021  break;
2022  case Instruction::GetElementPtr:
2023  // An IV counter must preserve its type.
2024  if (IncI->getNumOperands() == 2)
2025  break;
2027  default:
2028  return nullptr;
2029  }
2030 
2031  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2032  if (Phi && Phi->getParent() == L->getHeader()) {
2033  if (L->isLoopInvariant(IncI->getOperand(1)))
2034  return Phi;
2035  return nullptr;
2036  }
2037  if (IncI->getOpcode() == Instruction::GetElementPtr)
2038  return nullptr;
2039 
2040  // Allow add/sub to be commuted.
2041  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2042  if (Phi && Phi->getParent() == L->getHeader()) {
2043  if (L->isLoopInvariant(IncI->getOperand(0)))
2044  return Phi;
2045  }
2046  return nullptr;
2047 }
2048 
2049 /// Whether the current loop exit test is based on this value. Currently this
2050 /// is limited to a direct use in the loop condition.
2051 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2052  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2053  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2054  // TODO: Allow non-icmp loop test.
2055  if (!ICmp)
2056  return false;
2057 
2058  // TODO: Allow indirect use.
2059  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2060 }
2061 
2062 /// linearFunctionTestReplace policy. Return true unless we can show that the
2063 /// current exit test is already sufficiently canonical.
2064 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2065  assert(L->getLoopLatch() && "Must be in simplified form");
2066 
2067  // Avoid converting a constant or loop invariant test back to a runtime
2068  // test. This is critical for when SCEV's cached ExitCount is less precise
2069  // than the current IR (such as after we've proven a particular exit is
2070  // actually dead and thus the BE count never reaches our ExitCount.)
2071  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2072  if (L->isLoopInvariant(BI->getCondition()))
2073  return false;
2074 
2075  // Do LFTR to simplify the exit condition to an ICMP.
2076  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2077  if (!Cond)
2078  return true;
2079 
2080  // Do LFTR to simplify the exit ICMP to EQ/NE
2081  ICmpInst::Predicate Pred = Cond->getPredicate();
2082  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2083  return true;
2084 
2085  // Look for a loop invariant RHS
2086  Value *LHS = Cond->getOperand(0);
2087  Value *RHS = Cond->getOperand(1);
2088  if (!L->isLoopInvariant(RHS)) {
2089  if (!L->isLoopInvariant(LHS))
2090  return true;
2091  std::swap(LHS, RHS);
2092  }
2093  // Look for a simple IV counter LHS
2094  PHINode *Phi = dyn_cast<PHINode>(LHS);
2095  if (!Phi)
2096  Phi = getLoopPhiForCounter(LHS, L);
2097 
2098  if (!Phi)
2099  return true;
2100 
2101  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2102  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2103  if (Idx < 0)
2104  return true;
2105 
2106  // Do LFTR if the exit condition's IV is *not* a simple counter.
2107  Value *IncV = Phi->getIncomingValue(Idx);
2108  return Phi != getLoopPhiForCounter(IncV, L);
2109 }
2110 
2111 /// Return true if undefined behavior would provable be executed on the path to
2112 /// OnPathTo if Root produced a posion result. Note that this doesn't say
2113 /// anything about whether OnPathTo is actually executed or whether Root is
2114 /// actually poison. This can be used to assess whether a new use of Root can
2115 /// be added at a location which is control equivalent with OnPathTo (such as
2116 /// immediately before it) without introducing UB which didn't previously
2117 /// exist. Note that a false result conveys no information.
2119  Instruction *OnPathTo,
2120  DominatorTree *DT) {
2121  // Basic approach is to assume Root is poison, propagate poison forward
2122  // through all users we can easily track, and then check whether any of those
2123  // users are provable UB and must execute before out exiting block might
2124  // exit.
2125 
2126  // The set of all recursive users we've visited (which are assumed to all be
2127  // poison because of said visit)
2128  SmallSet<const Value *, 16> KnownPoison;
2130  Worklist.push_back(Root);
2131  while (!Worklist.empty()) {
2132  const Instruction *I = Worklist.pop_back_val();
2133 
2134  // If we know this must trigger UB on a path leading our target.
2135  if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2136  return true;
2137 
2138  // If we can't analyze propagation through this instruction, just skip it
2139  // and transitive users. Safe as false is a conservative result.
2140  if (!propagatesFullPoison(I) && I != Root)
2141  continue;
2142 
2143  if (KnownPoison.insert(I).second)
2144  for (const User *User : I->users())
2145  Worklist.push_back(cast<Instruction>(User));
2146  }
2147 
2148  // Might be non-UB, or might have a path we couldn't prove must execute on
2149  // way to exiting bb.
2150  return false;
2151 }
2152 
2153 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2154 /// down to checking that all operands are constant and listing instructions
2155 /// that may hide undef.
2157  unsigned Depth) {
2158  if (isa<Constant>(V))
2159  return !isa<UndefValue>(V);
2160 
2161  if (Depth >= 6)
2162  return false;
2163 
2164  // Conservatively handle non-constant non-instructions. For example, Arguments
2165  // may be undef.
2166  Instruction *I = dyn_cast<Instruction>(V);
2167  if (!I)
2168  return false;
2169 
2170  // Load and return values may be undef.
2171  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2172  return false;
2173 
2174  // Optimistically handle other instructions.
2175  for (Value *Op : I->operands()) {
2176  if (!Visited.insert(Op).second)
2177  continue;
2178  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2179  return false;
2180  }
2181  return true;
2182 }
2183 
2184 /// Return true if the given value is concrete. We must prove that undef can
2185 /// never reach it.
2186 ///
2187 /// TODO: If we decide that this is a good approach to checking for undef, we
2188 /// may factor it into a common location.
2189 static bool hasConcreteDef(Value *V) {
2190  SmallPtrSet<Value*, 8> Visited;
2191  Visited.insert(V);
2192  return hasConcreteDefImpl(V, Visited, 0);
2193 }
2194 
2195 /// Return true if this IV has any uses other than the (soon to be rewritten)
2196 /// loop exit test.
2197 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2198  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2199  Value *IncV = Phi->getIncomingValue(LatchIdx);
2200 
2201  for (User *U : Phi->users())
2202  if (U != Cond && U != IncV) return false;
2203 
2204  for (User *U : IncV->users())
2205  if (U != Cond && U != Phi) return false;
2206  return true;
2207 }
2208 
2209 /// Return true if the given phi is a "counter" in L. A counter is an
2210 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2211 /// step of 1. Note that L must have exactly one latch.
2212 static bool isLoopCounter(PHINode* Phi, Loop *L,
2213  ScalarEvolution *SE) {
2214  assert(Phi->getParent() == L->getHeader());
2215  assert(L->getLoopLatch());
2216 
2217  if (!SE->isSCEVable(Phi->getType()))
2218  return false;
2219 
2220  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2221  if (!AR || AR->getLoop() != L || !AR->isAffine())
2222  return false;
2223 
2224  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2225  if (!Step || !Step->isOne())
2226  return false;
2227 
2228  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2229  Value *IncV = Phi->getIncomingValue(LatchIdx);
2230  return (getLoopPhiForCounter(IncV, L) == Phi);
2231 }
2232 
2233 /// Search the loop header for a loop counter (anadd rec w/step of one)
2234 /// suitable for use by LFTR. If multiple counters are available, select the
2235 /// "best" one based profitable heuristics.
2236 ///
2237 /// BECount may be an i8* pointer type. The pointer difference is already
2238 /// valid count without scaling the address stride, so it remains a pointer
2239 /// expression as far as SCEV is concerned.
2240 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2241  const SCEV *BECount,
2242  ScalarEvolution *SE, DominatorTree *DT) {
2243  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2244 
2245  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2246 
2247  // Loop over all of the PHI nodes, looking for a simple counter.
2248  PHINode *BestPhi = nullptr;
2249  const SCEV *BestInit = nullptr;
2250  BasicBlock *LatchBlock = L->getLoopLatch();
2251  assert(LatchBlock && "Must be in simplified form");
2252  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2253 
2254  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2255  PHINode *Phi = cast<PHINode>(I);
2256  if (!isLoopCounter(Phi, L, SE))
2257  continue;
2258 
2259  // Avoid comparing an integer IV against a pointer Limit.
2260  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2261  continue;
2262 
2263  const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2264 
2265  // AR may be a pointer type, while BECount is an integer type.
2266  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2267  // AR may not be a narrower type, or we may never exit.
2268  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2269  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2270  continue;
2271 
2272  // Avoid reusing a potentially undef value to compute other values that may
2273  // have originally had a concrete definition.
2274  if (!hasConcreteDef(Phi)) {
2275  // We explicitly allow unknown phis as long as they are already used by
2276  // the loop exit test. This is legal since performing LFTR could not
2277  // increase the number of undef users.
2278  Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2279  if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2280  !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2281  continue;
2282  }
2283 
2284  // Avoid introducing undefined behavior due to poison which didn't exist in
2285  // the original program. (Annoyingly, the rules for poison and undef
2286  // propagation are distinct, so this does NOT cover the undef case above.)
2287  // We have to ensure that we don't introduce UB by introducing a use on an
2288  // iteration where said IV produces poison. Our strategy here differs for
2289  // pointers and integer IVs. For integers, we strip and reinfer as needed,
2290  // see code in linearFunctionTestReplace. For pointers, we restrict
2291  // transforms as there is no good way to reinfer inbounds once lost.
2292  if (!Phi->getType()->isIntegerTy() &&
2293  !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2294  continue;
2295 
2296  const SCEV *Init = AR->getStart();
2297 
2298  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2299  // Don't force a live loop counter if another IV can be used.
2300  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2301  continue;
2302 
2303  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2304  // also prefers integer to pointer IVs.
2305  if (BestInit->isZero() != Init->isZero()) {
2306  if (BestInit->isZero())
2307  continue;
2308  }
2309  // If two IVs both count from zero or both count from nonzero then the
2310  // narrower is likely a dead phi that has been widened. Use the wider phi
2311  // to allow the other to be eliminated.
2312  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2313  continue;
2314  }
2315  BestPhi = Phi;
2316  BestInit = Init;
2317  }
2318  return BestPhi;
2319 }
2320 
2321 /// Insert an IR expression which computes the value held by the IV IndVar
2322 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2323 /// is taken ExitCount times.
2324 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2325  const SCEV *ExitCount, bool UsePostInc, Loop *L,
2326  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2327  assert(isLoopCounter(IndVar, L, SE));
2328  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2329  const SCEV *IVInit = AR->getStart();
2330 
2331  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2332  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2333  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2334  // the existing GEPs whenever possible.
2335  if (IndVar->getType()->isPointerTy() &&
2336  !ExitCount->getType()->isPointerTy()) {
2337  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2338  // signed value. ExitCount on the other hand represents the loop trip count,
2339  // which is an unsigned value. FindLoopCounter only allows induction
2340  // variables that have a positive unit stride of one. This means we don't
2341  // have to handle the case of negative offsets (yet) and just need to zero
2342  // extend ExitCount.
2343  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2344  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2345  if (UsePostInc)
2346  IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2347 
2348  // Expand the code for the iteration count.
2349  assert(SE->isLoopInvariant(IVOffset, L) &&
2350  "Computed iteration count is not loop invariant!");
2351 
2352  // We could handle pointer IVs other than i8*, but we need to compensate for
2353  // gep index scaling.
2355  cast<PointerType>(IndVar->getType())
2356  ->getElementType())->isOne() &&
2357  "unit stride pointer IV must be i8*");
2358 
2359  const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2360  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2361  return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2362  } else {
2363  // In any other case, convert both IVInit and ExitCount to integers before
2364  // comparing. This may result in SCEV expansion of pointers, but in practice
2365  // SCEV will fold the pointer arithmetic away as such:
2366  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2367  //
2368  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2369  // for simple memset-style loops.
2370  //
2371  // IVInit integer and ExitCount pointer would only occur if a canonical IV
2372  // were generated on top of case #2, which is not expected.
2373 
2374  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2375  // For unit stride, IVCount = Start + ExitCount with 2's complement
2376  // overflow.
2377 
2378  // For integer IVs, truncate the IV before computing IVInit + BECount,
2379  // unless we know apriori that the limit must be a constant when evaluated
2380  // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
2381  // of the IV in the loop over a (potentially) expensive expansion of the
2382  // widened exit count add(zext(add)) expression.
2383  if (SE->getTypeSizeInBits(IVInit->getType())
2384  > SE->getTypeSizeInBits(ExitCount->getType())) {
2385  if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2386  ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2387  else
2388  IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2389  }
2390 
2391  const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2392 
2393  if (UsePostInc)
2394  IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2395 
2396  // Expand the code for the iteration count.
2397  assert(SE->isLoopInvariant(IVLimit, L) &&
2398  "Computed iteration count is not loop invariant!");
2399  // Ensure that we generate the same type as IndVar, or a smaller integer
2400  // type. In the presence of null pointer values, we have an integer type
2401  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2402  Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2403  IndVar->getType() : ExitCount->getType();
2404  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2405  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2406  }
2407 }
2408 
2409 /// This method rewrites the exit condition of the loop to be a canonical !=
2410 /// comparison against the incremented loop induction variable. This pass is
2411 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2412 /// determine a loop-invariant trip count of the loop, which is actually a much
2413 /// broader range than just linear tests.
2414 bool IndVarSimplify::
2415 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2416  const SCEV *ExitCount,
2417  PHINode *IndVar, SCEVExpander &Rewriter) {
2418  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2419  assert(isLoopCounter(IndVar, L, SE));
2420  Instruction * const IncVar =
2421  cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2422 
2423  // Initialize CmpIndVar to the preincremented IV.
2424  Value *CmpIndVar = IndVar;
2425  bool UsePostInc = false;
2426 
2427  // If the exiting block is the same as the backedge block, we prefer to
2428  // compare against the post-incremented value, otherwise we must compare
2429  // against the preincremented value.
2430  if (ExitingBB == L->getLoopLatch()) {
2431  // For pointer IVs, we chose to not strip inbounds which requires us not
2432  // to add a potentially UB introducing use. We need to either a) show
2433  // the loop test we're modifying is already in post-inc form, or b) show
2434  // that adding a use must not introduce UB.
2435  bool SafeToPostInc =
2436  IndVar->getType()->isIntegerTy() ||
2437  isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2438  mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2439  if (SafeToPostInc) {
2440  UsePostInc = true;
2441  CmpIndVar = IncVar;
2442  }
2443  }
2444 
2445  // It may be necessary to drop nowrap flags on the incrementing instruction
2446  // if either LFTR moves from a pre-inc check to a post-inc check (in which
2447  // case the increment might have previously been poison on the last iteration
2448  // only) or if LFTR switches to a different IV that was previously dynamically
2449  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2450  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2451  // check), because the pre-inc addrec flags may be adopted from the original
2452  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2453  // TODO: This handling is inaccurate for one case: If we switch to a
2454  // dynamically dead IV that wraps on the first loop iteration only, which is
2455  // not covered by the post-inc addrec. (If the new IV was not dynamically
2456  // dead, it could not be poison on the first iteration in the first place.)
2457  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2458  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2459  if (BO->hasNoUnsignedWrap())
2460  BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2461  if (BO->hasNoSignedWrap())
2462  BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2463  }
2464 
2465  Value *ExitCnt = genLoopLimit(
2466  IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2467  assert(ExitCnt->getType()->isPointerTy() ==
2468  IndVar->getType()->isPointerTy() &&
2469  "genLoopLimit missed a cast");
2470 
2471  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2472  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2474  if (L->contains(BI->getSuccessor(0)))
2475  P = ICmpInst::ICMP_NE;
2476  else
2477  P = ICmpInst::ICMP_EQ;
2478 
2479  IRBuilder<> Builder(BI);
2480 
2481  // The new loop exit condition should reuse the debug location of the
2482  // original loop exit condition.
2483  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2484  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2485 
2486  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2487  // avoid the expensive expansion of the limit expression in the wider type,
2488  // emit a truncate to narrow the IV to the ExitCount type. This is safe
2489  // since we know (from the exit count bitwidth), that we can't self-wrap in
2490  // the narrower type.
2491  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2492  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2493  if (CmpIndVarSize > ExitCntSize) {
2494  assert(!CmpIndVar->getType()->isPointerTy() &&
2495  !ExitCnt->getType()->isPointerTy());
2496 
2497  // Before resorting to actually inserting the truncate, use the same
2498  // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2499  // the other side of the comparison instead. We still evaluate the limit
2500  // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2501  // a truncate within in.
2502  bool Extended = false;
2503  const SCEV *IV = SE->getSCEV(CmpIndVar);
2504  const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2505  ExitCnt->getType());
2506  const SCEV *ZExtTrunc =
2507  SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2508 
2509  if (ZExtTrunc == IV) {
2510  Extended = true;
2511  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2512  "wide.trip.count");
2513  } else {
2514  const SCEV *SExtTrunc =
2515  SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2516  if (SExtTrunc == IV) {
2517  Extended = true;
2518  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2519  "wide.trip.count");
2520  }
2521  }
2522 
2523  if (Extended) {
2524  bool Discard;
2525  L->makeLoopInvariant(ExitCnt, Discard);
2526  } else
2527  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2528  "lftr.wideiv");
2529  }
2530  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2531  << " LHS:" << *CmpIndVar << '\n'
2532  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2533  << "\n"
2534  << " RHS:\t" << *ExitCnt << "\n"
2535  << "ExitCount:\t" << *ExitCount << "\n"
2536  << " was: " << *BI->getCondition() << "\n");
2537 
2538  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2539  Value *OrigCond = BI->getCondition();
2540  // It's tempting to use replaceAllUsesWith here to fully replace the old
2541  // comparison, but that's not immediately safe, since users of the old
2542  // comparison may not be dominated by the new comparison. Instead, just
2543  // update the branch to use the new comparison; in the common case this
2544  // will make old comparison dead.
2545  BI->setCondition(Cond);
2546  DeadInsts.push_back(OrigCond);
2547 
2548  ++NumLFTR;
2549  return true;
2550 }
2551 
2552 //===----------------------------------------------------------------------===//
2553 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2554 //===----------------------------------------------------------------------===//
2555 
2556 /// If there's a single exit block, sink any loop-invariant values that
2557 /// were defined in the preheader but not used inside the loop into the
2558 /// exit block to reduce register pressure in the loop.
2559 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2560  BasicBlock *ExitBlock = L->getExitBlock();
2561  if (!ExitBlock) return false;
2562 
2563  BasicBlock *Preheader = L->getLoopPreheader();
2564  if (!Preheader) return false;
2565 
2566  bool MadeAnyChanges = false;
2567  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2568  BasicBlock::iterator I(Preheader->getTerminator());
2569  while (I != Preheader->begin()) {
2570  --I;
2571  // New instructions were inserted at the end of the preheader.
2572  if (isa<PHINode>(I))
2573  break;
2574 
2575  // Don't move instructions which might have side effects, since the side
2576  // effects need to complete before instructions inside the loop. Also don't
2577  // move instructions which might read memory, since the loop may modify
2578  // memory. Note that it's okay if the instruction might have undefined
2579  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2580  // block.
2581  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2582  continue;
2583 
2584  // Skip debug info intrinsics.
2585  if (isa<DbgInfoIntrinsic>(I))
2586  continue;
2587 
2588  // Skip eh pad instructions.
2589  if (I->isEHPad())
2590  continue;
2591 
2592  // Don't sink alloca: we never want to sink static alloca's out of the
2593  // entry block, and correctly sinking dynamic alloca's requires
2594  // checks for stacksave/stackrestore intrinsics.
2595  // FIXME: Refactor this check somehow?
2596  if (isa<AllocaInst>(I))
2597  continue;
2598 
2599  // Determine if there is a use in or before the loop (direct or
2600  // otherwise).
2601  bool UsedInLoop = false;
2602  for (Use &U : I->uses()) {
2603  Instruction *User = cast<Instruction>(U.getUser());
2604  BasicBlock *UseBB = User->getParent();
2605  if (PHINode *P = dyn_cast<PHINode>(User)) {
2606  unsigned i =
2607  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2608  UseBB = P->getIncomingBlock(i);
2609  }
2610  if (UseBB == Preheader || L->contains(UseBB)) {
2611  UsedInLoop = true;
2612  break;
2613  }
2614  }
2615 
2616  // If there is, the def must remain in the preheader.
2617  if (UsedInLoop)
2618  continue;
2619 
2620  // Otherwise, sink it to the exit block.
2621  Instruction *ToMove = &*I;
2622  bool Done = false;
2623 
2624  if (I != Preheader->begin()) {
2625  // Skip debug info intrinsics.
2626  do {
2627  --I;
2628  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2629 
2630  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2631  Done = true;
2632  } else {
2633  Done = true;
2634  }
2635 
2636  MadeAnyChanges = true;
2637  ToMove->moveBefore(*ExitBlock, InsertPt);
2638  if (Done) break;
2639  InsertPt = ToMove->getIterator();
2640  }
2641 
2642  return MadeAnyChanges;
2643 }
2644 
2645 bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2646  SmallVector<BasicBlock*, 16> ExitingBlocks;
2647  L->getExitingBlocks(ExitingBlocks);
2648 
2649  // Form an expression for the maximum exit count possible for this loop. We
2650  // merge the max and exact information to approximate a version of
2651  // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2652  // TODO: factor this out as a version of getConstantMaxBackedgeTakenCount which
2653  // isn't guaranteed to return a constant.
2654  SmallVector<const SCEV*, 4> ExitCounts;
2655  const SCEV *MaxConstEC = SE->getConstantMaxBackedgeTakenCount(L);
2656  if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2657  ExitCounts.push_back(MaxConstEC);
2658  for (BasicBlock *ExitingBB : ExitingBlocks) {
2659  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2660  if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2661  assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2662  "We should only have known counts for exiting blocks that "
2663  "dominate latch!");
2664  ExitCounts.push_back(ExitCount);
2665  }
2666  }
2667  if (ExitCounts.empty())
2668  return false;
2669  const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2670 
2671  bool Changed = false;
2672  for (BasicBlock *ExitingBB : ExitingBlocks) {
2673  // If our exitting block exits multiple loops, we can only rewrite the
2674  // innermost one. Otherwise, we're changing how many times the innermost
2675  // loop runs before it exits.
2676  if (LI->getLoopFor(ExitingBB) != L)
2677  continue;
2678 
2679  // Can't rewrite non-branch yet.
2680  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2681  if (!BI)
2682  continue;
2683 
2684  // If already constant, nothing to do.
2685  if (isa<Constant>(BI->getCondition()))
2686  continue;
2687 
2688  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2689  if (isa<SCEVCouldNotCompute>(ExitCount))
2690  continue;
2691 
2692  // If we know we'd exit on the first iteration, rewrite the exit to
2693  // reflect this. This does not imply the loop must exit through this
2694  // exit; there may be an earlier one taken on the first iteration.
2695  // TODO: Given we know the backedge can't be taken, we should go ahead
2696  // and break it. Or at least, kill all the header phis and simplify.
2697  if (ExitCount->isZero()) {
2698  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2699  auto *OldCond = BI->getCondition();
2700  auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2701  ConstantInt::getFalse(OldCond->getType());
2702  BI->setCondition(NewCond);
2703  if (OldCond->use_empty())
2704  DeadInsts.push_back(OldCond);
2705  Changed = true;
2706  continue;
2707  }
2708 
2709  // If we end up with a pointer exit count, bail. Note that we can end up
2710  // with a pointer exit count for one exiting block, and not for another in
2711  // the same loop.
2712  if (!ExitCount->getType()->isIntegerTy() ||
2713  !MaxExitCount->getType()->isIntegerTy())
2714  continue;
2715 
2716  Type *WiderType =
2717  SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2718  ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2719  MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2720  assert(MaxExitCount->getType() == ExitCount->getType());
2721 
2722  // Can we prove that some other exit must be taken strictly before this
2723  // one? TODO: handle cases where ule is known, and equality is covered
2724  // by a dominating exit
2725  if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2726  MaxExitCount, ExitCount)) {
2727  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2728  auto *OldCond = BI->getCondition();
2729  auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2730  ConstantInt::getTrue(OldCond->getType());
2731  BI->setCondition(NewCond);
2732  if (OldCond->use_empty())
2733  DeadInsts.push_back(OldCond);
2734  Changed = true;
2735  continue;
2736  }
2737 
2738  // TODO: If we can prove that the exiting iteration is equal to the exit
2739  // count for this exit and that no previous exit oppurtunities exist within
2740  // the loop, then we can discharge all other exits. (May fall out of
2741  // previous TODO.)
2742 
2743  // TODO: If we can't prove any relation between our exit count and the
2744  // loops exit count, but taking this exit doesn't require actually running
2745  // the loop (i.e. no side effects, no computed values used in exit), then
2746  // we can replace the exit test with a loop invariant test which exits on
2747  // the first iteration.
2748  }
2749  return Changed;
2750 }
2751 
2752 //===----------------------------------------------------------------------===//
2753 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2754 //===----------------------------------------------------------------------===//
2755 
2756 bool IndVarSimplify::run(Loop *L) {
2757  // We need (and expect!) the incoming loop to be in LCSSA.
2758  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2759  "LCSSA required to run indvars!");
2760  bool Changed = false;
2761 
2762  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2763  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2764  // canonicalization can be a pessimization without LSR to "clean up"
2765  // afterwards.
2766  // - We depend on having a preheader; in particular,
2767  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2768  // and we're in trouble if we can't find the induction variable even when
2769  // we've manually inserted one.
2770  // - LFTR relies on having a single backedge.
2771  if (!L->isLoopSimplifyForm())
2772  return false;
2773 
2774  // If there are any floating-point recurrences, attempt to
2775  // transform them to use integer recurrences.
2776  Changed |= rewriteNonIntegerIVs(L);
2777 
2778 #ifndef NDEBUG
2779  // Used below for a consistency check only
2780  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2781 #endif
2782 
2783  // Create a rewriter object which we'll use to transform the code with.
2784  SCEVExpander Rewriter(*SE, DL, "indvars");
2785 #ifndef NDEBUG
2786  Rewriter.setDebugType(DEBUG_TYPE);
2787 #endif
2788 
2789  // Eliminate redundant IV users.
2790  //
2791  // Simplification works best when run before other consumers of SCEV. We
2792  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2793  // other expressions involving loop IVs have been evaluated. This helps SCEV
2794  // set no-wrap flags before normalizing sign/zero extension.
2795  Rewriter.disableCanonicalMode();
2796  Changed |= simplifyAndExtend(L, Rewriter, LI);
2797 
2798  // Check to see if we can compute the final value of any expressions
2799  // that are recurrent in the loop, and substitute the exit values from the
2800  // loop into any instructions outside of the loop that use the final values
2801  // of the current expressions.
2802  if (ReplaceExitValue != NeverRepl)
2803  Changed |= rewriteLoopExitValues(L, Rewriter);
2804 
2805  // Eliminate redundant IV cycles.
2806  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2807 
2808  Changed |= optimizeLoopExits(L);
2809 
2810  // If we have a trip count expression, rewrite the loop's exit condition
2811  // using it.
2812  if (!DisableLFTR) {
2813  SmallVector<BasicBlock*, 16> ExitingBlocks;
2814  L->getExitingBlocks(ExitingBlocks);
2815  for (BasicBlock *ExitingBB : ExitingBlocks) {
2816  // Can't rewrite non-branch yet.
2817  if (!isa<BranchInst>(ExitingBB->getTerminator()))
2818  continue;
2819 
2820  // If our exitting block exits multiple loops, we can only rewrite the
2821  // innermost one. Otherwise, we're changing how many times the innermost
2822  // loop runs before it exits.
2823  if (LI->getLoopFor(ExitingBB) != L)
2824  continue;
2825 
2826  if (!needsLFTR(L, ExitingBB))
2827  continue;
2828 
2829  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2830  if (isa<SCEVCouldNotCompute>(ExitCount))
2831  continue;
2832 
2833  // This was handled above, but as we form SCEVs, we can sometimes refine
2834  // existing ones; this allows exit counts to be folded to zero which
2835  // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2836  // until stable to handle cases like this better.
2837  if (ExitCount->isZero())
2838  continue;
2839 
2840  PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2841  if (!IndVar)
2842  continue;
2843 
2844  // Avoid high cost expansions. Note: This heuristic is questionable in
2845  // that our definition of "high cost" is not exactly principled.
2846  if (Rewriter.isHighCostExpansion(ExitCount, L))
2847  continue;
2848 
2849  // Check preconditions for proper SCEVExpander operation. SCEV does not
2850  // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2851  // any pass that uses the SCEVExpander must do it. This does not work
2852  // well for loop passes because SCEVExpander makes assumptions about
2853  // all loops, while LoopPassManager only forces the current loop to be
2854  // simplified.
2855  //
2856  // FIXME: SCEV expansion has no way to bail out, so the caller must
2857  // explicitly check any assumptions made by SCEV. Brittle.
2858  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2859  if (!AR || AR->getLoop()->getLoopPreheader())
2860  Changed |= linearFunctionTestReplace(L, ExitingBB,
2861  ExitCount, IndVar,
2862  Rewriter);
2863  }
2864  }
2865  // Clear the rewriter cache, because values that are in the rewriter's cache
2866  // can be deleted in the loop below, causing the AssertingVH in the cache to
2867  // trigger.
2868  Rewriter.clear();
2869 
2870  // Now that we're done iterating through lists, clean up any instructions
2871  // which are now dead.
2872  while (!DeadInsts.empty())
2873  if (Instruction *Inst =
2874  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2875  Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2876 
2877  // The Rewriter may not be used from this point on.
2878 
2879  // Loop-invariant instructions in the preheader that aren't used in the
2880  // loop may be sunk below the loop to reduce register pressure.
2881  Changed |= sinkUnusedInvariants(L);
2882 
2883  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2884  // trip count and therefore can further simplify exit values in addition to
2885  // rewriteLoopExitValues.
2886  Changed |= rewriteFirstIterationLoopExitValues(L);
2887 
2888  // Clean up dead instructions.
2889  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2890 
2891  // Check a post-condition.
2892  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2893  "Indvars did not preserve LCSSA!");
2894 
2895  // Verify that LFTR, and any other change have not interfered with SCEV's
2896  // ability to compute trip count.
2897 #ifndef NDEBUG
2898  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2899  SE->forgetLoop(L);
2900  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2901  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2902  SE->getTypeSizeInBits(NewBECount->getType()))
2903  NewBECount = SE->getTruncateOrNoop(NewBECount,
2904  BackedgeTakenCount->getType());
2905  else
2906  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2907  NewBECount->getType());
2908  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2909  }
2910 #endif
2911 
2912  return Changed;
2913 }
2914 
2917  LPMUpdater &) {
2918  Function *F = L.getHeader()->getParent();
2919  const DataLayout &DL = F->getParent()->getDataLayout();
2920 
2921  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2922  if (!IVS.run(&L))
2923  return PreservedAnalyses::all();
2924 
2925  auto PA = getLoopPassPreservedAnalyses();
2926  PA.preserveSet<CFGAnalyses>();
2927  return PA;
2928 }
2929 
2930 namespace {
2931 
2932 struct IndVarSimplifyLegacyPass : public LoopPass {
2933  static char ID; // Pass identification, replacement for typeid
2934 
2935  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2937  }
2938 
2939  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2940  if (skipLoop(L))
2941  return false;
2942 
2943  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2944  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2945  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2946  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2947  auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2948  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2949  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2950  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2951 
2952  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2953  return IVS.run(L);
2954  }
2955 
2956  void getAnalysisUsage(AnalysisUsage &AU) const override {
2957  AU.setPreservesCFG();
2959  }
2960 };
2961 
2962 } // end anonymous namespace
2963 
2965 
2966 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2967  "Induction Variable Simplification", false, false)
2969 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2970  "Induction Variable Simplification", false, false)
2971 
2973  return new IndVarSimplifyLegacyPass();
2974 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:616
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2212
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
iterator_range< use_iterator > uses()
Definition: Value.h:374
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:211
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:508
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:451
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:444
This class represents zero extension of integer types.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
unsigned less than
Definition: InstrTypes.h:757
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:733
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1882
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
This class represents a sign extension of integer types.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
Hexagon Common GEP
Value * getCondition() const
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:67
bool propagatesFullPoison(const Instruction *I)
Return true if this function can prove that I is guaranteed to yield full-poison (all bits poison) if...
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1084
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isSigned() const
Definition: InstrTypes.h:902
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:639
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:368
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:105
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Key
PAL metadata keys.
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1514
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:692
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:181
This node represents a polynomial recurrence on the trip count of the specified loop.
ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:156
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1878
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:105
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:189
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:652
Value * getIncomingValueForBlock(const BasicBlock *BB) const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
const Instruction & front() const
Definition: BasicBlock.h:280
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:582
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool mustTriggerUB(const Instruction *I, const SmallSet< const Value *, 16 > &KnownPoison)
Return true if the given instruction must trigger undefined behavior.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:576
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:66
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:237
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:40
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:440
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:75
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static unsigned getIncomingValueNumForOperand(unsigned i)
size_t size() const
Definition: SmallVector.h:52
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1874
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
signed greater than
Definition: InstrTypes.h:759
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:302
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:348
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:255
signed less than
Definition: InstrTypes.h:761
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:609
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widened.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:894
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
signed less or equal
Definition: InstrTypes.h:762
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:419
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:95
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:627
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
Virtual Register Rewriter
Definition: VirtRegMap.cpp:220
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:807
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:460
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:388
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:138
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:324
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:744
void setCondition(Value *V)
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:395
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:735
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:745
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:265
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:593
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:115
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:432
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:737
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool use_empty() const
Definition: Value.h:342
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:760
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:89
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:71