LLVM  10.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
79 #include "llvm/Transforms/Scalar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened , "Number of indvars widened");
93 STATISTIC(NumReplaced , "Number of exit values replaced");
94 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
102  "verify-indvars", cl::Hidden,
103  cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
106 
108  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111  clEnumValN(OnlyCheapRepl, "cheap",
112  "only replace exit value when the cost is cheap"),
113  clEnumValN(NoHardUse, "noharduse",
114  "only replace exit values when loop def likely dead"),
115  clEnumValN(AlwaysRepl, "always",
116  "always replace exit value whenever possible")));
117 
119  "indvars-post-increment-ranges", cl::Hidden,
120  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121  cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125  cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 static cl::opt<bool>
128 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
129  cl::desc("Predicate conditions in read only loops"));
130 
131 
132 namespace {
133 
134 struct RewritePhi;
135 
136 class IndVarSimplify {
137  LoopInfo *LI;
138  ScalarEvolution *SE;
139  DominatorTree *DT;
140  const DataLayout &DL;
141  TargetLibraryInfo *TLI;
142  const TargetTransformInfo *TTI;
143 
145 
146  bool isValidRewrite(Value *FromVal, Value *ToVal);
147 
148  bool handleFloatingPointIV(Loop *L, PHINode *PH);
149  bool rewriteNonIntegerIVs(Loop *L);
150 
151  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152  /// Try to eliminate loop exits based on analyzeable exit counts
153  bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154  /// Try to form loop invariant tests for loop exits by changing how many
155  /// iterations of the loop run when that is unobservable.
156  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
157 
158  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
159  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
160  bool rewriteFirstIterationLoopExitValues(Loop *L);
161  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
162 
163  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
164  const SCEV *ExitCount,
165  PHINode *IndVar, SCEVExpander &Rewriter);
166 
167  bool sinkUnusedInvariants(Loop *L);
168 
169 public:
170  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
171  const DataLayout &DL, TargetLibraryInfo *TLI,
172  TargetTransformInfo *TTI)
173  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
174 
175  bool run(Loop *L);
176 };
177 
178 } // end anonymous namespace
179 
180 /// Return true if the SCEV expansion generated by the rewriter can replace the
181 /// original value. SCEV guarantees that it produces the same value, but the way
182 /// it is produced may be illegal IR. Ideally, this function will only be
183 /// called for verification.
184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
185  // If an SCEV expression subsumed multiple pointers, its expansion could
186  // reassociate the GEP changing the base pointer. This is illegal because the
187  // final address produced by a GEP chain must be inbounds relative to its
188  // underlying object. Otherwise basic alias analysis, among other things,
189  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
190  // producing an expression involving multiple pointers. Until then, we must
191  // bail out here.
192  //
193  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
194  // because it understands lcssa phis while SCEV does not.
195  Value *FromPtr = FromVal;
196  Value *ToPtr = ToVal;
197  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
198  FromPtr = GEP->getPointerOperand();
199  }
200  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
201  ToPtr = GEP->getPointerOperand();
202  }
203  if (FromPtr != FromVal || ToPtr != ToVal) {
204  // Quickly check the common case
205  if (FromPtr == ToPtr)
206  return true;
207 
208  // SCEV may have rewritten an expression that produces the GEP's pointer
209  // operand. That's ok as long as the pointer operand has the same base
210  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
211  // base of a recurrence. This handles the case in which SCEV expansion
212  // converts a pointer type recurrence into a nonrecurrent pointer base
213  // indexed by an integer recurrence.
214 
215  // If the GEP base pointer is a vector of pointers, abort.
216  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
217  return false;
218 
219  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
220  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
221  if (FromBase == ToBase)
222  return true;
223 
224  LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
225  << " != " << *ToBase << "\n");
226 
227  return false;
228  }
229  return true;
230 }
231 
232 /// Determine the insertion point for this user. By default, insert immediately
233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
235 /// common dominator for the incoming blocks. A nullptr can be returned if no
236 /// viable location is found: it may happen if User is a PHI and Def only comes
237 /// to this PHI from unreachable blocks.
239  DominatorTree *DT, LoopInfo *LI) {
240  PHINode *PHI = dyn_cast<PHINode>(User);
241  if (!PHI)
242  return User;
243 
244  Instruction *InsertPt = nullptr;
245  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
246  if (PHI->getIncomingValue(i) != Def)
247  continue;
248 
249  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
250 
251  if (!DT->isReachableFromEntry(InsertBB))
252  continue;
253 
254  if (!InsertPt) {
255  InsertPt = InsertBB->getTerminator();
256  continue;
257  }
258  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
259  InsertPt = InsertBB->getTerminator();
260  }
261 
262  // If we have skipped all inputs, it means that Def only comes to Phi from
263  // unreachable blocks.
264  if (!InsertPt)
265  return nullptr;
266 
267  auto *DefI = dyn_cast<Instruction>(Def);
268  if (!DefI)
269  return InsertPt;
270 
271  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
272 
273  auto *L = LI->getLoopFor(DefI->getParent());
274  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
275 
276  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
277  if (LI->getLoopFor(DTN->getBlock()) == L)
278  return DTN->getBlock()->getTerminator();
279 
280  llvm_unreachable("DefI dominates InsertPt!");
281 }
282 
283 //===----------------------------------------------------------------------===//
284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
285 //===----------------------------------------------------------------------===//
286 
287 /// Convert APF to an integer, if possible.
288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
289  bool isExact = false;
290  // See if we can convert this to an int64_t
291  uint64_t UIntVal;
292  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
293  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
294  !isExact)
295  return false;
296  IntVal = UIntVal;
297  return true;
298 }
299 
300 /// If the loop has floating induction variable then insert corresponding
301 /// integer induction variable if possible.
302 /// For example,
303 /// for(double i = 0; i < 10000; ++i)
304 /// bar(i)
305 /// is converted into
306 /// for(int i = 0; i < 10000; ++i)
307 /// bar((double)i);
308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
309  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
310  unsigned BackEdge = IncomingEdge^1;
311 
312  // Check incoming value.
313  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
314 
315  int64_t InitValue;
316  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
317  return false;
318 
319  // Check IV increment. Reject this PN if increment operation is not
320  // an add or increment value can not be represented by an integer.
321  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
322  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
323 
324  // If this is not an add of the PHI with a constantfp, or if the constant fp
325  // is not an integer, bail out.
326  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
327  int64_t IncValue;
328  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
329  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
330  return false;
331 
332  // Check Incr uses. One user is PN and the other user is an exit condition
333  // used by the conditional terminator.
334  Value::user_iterator IncrUse = Incr->user_begin();
335  Instruction *U1 = cast<Instruction>(*IncrUse++);
336  if (IncrUse == Incr->user_end()) return false;
337  Instruction *U2 = cast<Instruction>(*IncrUse++);
338  if (IncrUse != Incr->user_end()) return false;
339 
340  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
341  // only used by a branch, we can't transform it.
343  if (!Compare)
344  Compare = dyn_cast<FCmpInst>(U2);
345  if (!Compare || !Compare->hasOneUse() ||
346  !isa<BranchInst>(Compare->user_back()))
347  return false;
348 
349  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
350 
351  // We need to verify that the branch actually controls the iteration count
352  // of the loop. If not, the new IV can overflow and no one will notice.
353  // The branch block must be in the loop and one of the successors must be out
354  // of the loop.
355  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
356  if (!L->contains(TheBr->getParent()) ||
357  (L->contains(TheBr->getSuccessor(0)) &&
358  L->contains(TheBr->getSuccessor(1))))
359  return false;
360 
361  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
362  // transform it.
363  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
364  int64_t ExitValue;
365  if (ExitValueVal == nullptr ||
366  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
367  return false;
368 
369  // Find new predicate for integer comparison.
371  switch (Compare->getPredicate()) {
372  default: return false; // Unknown comparison.
373  case CmpInst::FCMP_OEQ:
374  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
375  case CmpInst::FCMP_ONE:
376  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
377  case CmpInst::FCMP_OGT:
378  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
379  case CmpInst::FCMP_OGE:
380  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
381  case CmpInst::FCMP_OLT:
382  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
383  case CmpInst::FCMP_OLE:
384  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
385  }
386 
387  // We convert the floating point induction variable to a signed i32 value if
388  // we can. This is only safe if the comparison will not overflow in a way
389  // that won't be trapped by the integer equivalent operations. Check for this
390  // now.
391  // TODO: We could use i64 if it is native and the range requires it.
392 
393  // The start/stride/exit values must all fit in signed i32.
394  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
395  return false;
396 
397  // If not actually striding (add x, 0.0), avoid touching the code.
398  if (IncValue == 0)
399  return false;
400 
401  // Positive and negative strides have different safety conditions.
402  if (IncValue > 0) {
403  // If we have a positive stride, we require the init to be less than the
404  // exit value.
405  if (InitValue >= ExitValue)
406  return false;
407 
408  uint32_t Range = uint32_t(ExitValue-InitValue);
409  // Check for infinite loop, either:
410  // while (i <= Exit) or until (i > Exit)
411  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
412  if (++Range == 0) return false; // Range overflows.
413  }
414 
415  unsigned Leftover = Range % uint32_t(IncValue);
416 
417  // If this is an equality comparison, we require that the strided value
418  // exactly land on the exit value, otherwise the IV condition will wrap
419  // around and do things the fp IV wouldn't.
420  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
421  Leftover != 0)
422  return false;
423 
424  // If the stride would wrap around the i32 before exiting, we can't
425  // transform the IV.
426  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
427  return false;
428  } else {
429  // If we have a negative stride, we require the init to be greater than the
430  // exit value.
431  if (InitValue <= ExitValue)
432  return false;
433 
434  uint32_t Range = uint32_t(InitValue-ExitValue);
435  // Check for infinite loop, either:
436  // while (i >= Exit) or until (i < Exit)
437  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
438  if (++Range == 0) return false; // Range overflows.
439  }
440 
441  unsigned Leftover = Range % uint32_t(-IncValue);
442 
443  // If this is an equality comparison, we require that the strided value
444  // exactly land on the exit value, otherwise the IV condition will wrap
445  // around and do things the fp IV wouldn't.
446  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
447  Leftover != 0)
448  return false;
449 
450  // If the stride would wrap around the i32 before exiting, we can't
451  // transform the IV.
452  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
453  return false;
454  }
455 
457 
458  // Insert new integer induction variable.
459  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
460  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
461  PN->getIncomingBlock(IncomingEdge));
462 
463  Value *NewAdd =
464  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
465  Incr->getName()+".int", Incr);
466  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
467 
468  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
469  ConstantInt::get(Int32Ty, ExitValue),
470  Compare->getName());
471 
472  // In the following deletions, PN may become dead and may be deleted.
473  // Use a WeakTrackingVH to observe whether this happens.
474  WeakTrackingVH WeakPH = PN;
475 
476  // Delete the old floating point exit comparison. The branch starts using the
477  // new comparison.
478  NewCompare->takeName(Compare);
479  Compare->replaceAllUsesWith(NewCompare);
481 
482  // Delete the old floating point increment.
483  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
485 
486  // If the FP induction variable still has uses, this is because something else
487  // in the loop uses its value. In order to canonicalize the induction
488  // variable, we chose to eliminate the IV and rewrite it in terms of an
489  // int->fp cast.
490  //
491  // We give preference to sitofp over uitofp because it is faster on most
492  // platforms.
493  if (WeakPH) {
494  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
495  &*PN->getParent()->getFirstInsertionPt());
496  PN->replaceAllUsesWith(Conv);
498  }
499  return true;
500 }
501 
502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
503  // First step. Check to see if there are any floating-point recurrences.
504  // If there are, change them into integer recurrences, permitting analysis by
505  // the SCEV routines.
506  BasicBlock *Header = L->getHeader();
507 
509  for (PHINode &PN : Header->phis())
510  PHIs.push_back(&PN);
511 
512  bool Changed = false;
513  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
514  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
515  Changed |= handleFloatingPointIV(L, PN);
516 
517  // If the loop previously had floating-point IV, ScalarEvolution
518  // may not have been able to compute a trip count. Now that we've done some
519  // re-writing, the trip count may be computable.
520  if (Changed)
521  SE->forgetLoop(L);
522  return Changed;
523 }
524 
525 namespace {
526 
527 // Collect information about PHI nodes which can be transformed in
528 // rewriteLoopExitValues.
529 struct RewritePhi {
530  PHINode *PN;
531 
532  // Ith incoming value.
533  unsigned Ith;
534 
535  // Exit value after expansion.
536  Value *Val;
537 
538  // High Cost when expansion.
539  bool HighCost;
540 
541  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
542  : PN(P), Ith(I), Val(V), HighCost(H) {}
543 };
544 
545 } // end anonymous namespace
546 
547 //===----------------------------------------------------------------------===//
548 // rewriteLoopExitValues - Optimize IV users outside the loop.
549 // As a side effect, reduces the amount of IV processing within the loop.
550 //===----------------------------------------------------------------------===//
551 
552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
555  Visited.insert(I);
556  WorkList.push_back(I);
557  while (!WorkList.empty()) {
558  const Instruction *Curr = WorkList.pop_back_val();
559  // This use is outside the loop, nothing to do.
560  if (!L->contains(Curr))
561  continue;
562  // Do we assume it is a "hard" use which will not be eliminated easily?
563  if (Curr->mayHaveSideEffects())
564  return true;
565  // Otherwise, add all its users to worklist.
566  for (auto U : Curr->users()) {
567  auto *UI = cast<Instruction>(U);
568  if (Visited.insert(UI).second)
569  WorkList.push_back(UI);
570  }
571  }
572  return false;
573 }
574 
575 /// Check to see if this loop has a computable loop-invariant execution count.
576 /// If so, this means that we can compute the final value of any expressions
577 /// that are recurrent in the loop, and substitute the exit values from the loop
578 /// into any instructions outside of the loop that use the final values of the
579 /// current expressions.
580 ///
581 /// This is mostly redundant with the regular IndVarSimplify activities that
582 /// happen later, except that it's more powerful in some cases, because it's
583 /// able to brute-force evaluate arbitrary instructions as long as they have
584 /// constant operands at the beginning of the loop.
585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
586  // Check a pre-condition.
587  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
588  "Indvars did not preserve LCSSA!");
589 
590  SmallVector<BasicBlock*, 8> ExitBlocks;
591  L->getUniqueExitBlocks(ExitBlocks);
592 
593  SmallVector<RewritePhi, 8> RewritePhiSet;
594  // Find all values that are computed inside the loop, but used outside of it.
595  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
596  // the exit blocks of the loop to find them.
597  for (BasicBlock *ExitBB : ExitBlocks) {
598  // If there are no PHI nodes in this exit block, then no values defined
599  // inside the loop are used on this path, skip it.
600  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
601  if (!PN) continue;
602 
603  unsigned NumPreds = PN->getNumIncomingValues();
604 
605  // Iterate over all of the PHI nodes.
606  BasicBlock::iterator BBI = ExitBB->begin();
607  while ((PN = dyn_cast<PHINode>(BBI++))) {
608  if (PN->use_empty())
609  continue; // dead use, don't replace it
610 
611  if (!SE->isSCEVable(PN->getType()))
612  continue;
613 
614  // It's necessary to tell ScalarEvolution about this explicitly so that
615  // it can walk the def-use list and forget all SCEVs, as it may not be
616  // watching the PHI itself. Once the new exit value is in place, there
617  // may not be a def-use connection between the loop and every instruction
618  // which got a SCEVAddRecExpr for that loop.
619  SE->forgetValue(PN);
620 
621  // Iterate over all of the values in all the PHI nodes.
622  for (unsigned i = 0; i != NumPreds; ++i) {
623  // If the value being merged in is not integer or is not defined
624  // in the loop, skip it.
625  Value *InVal = PN->getIncomingValue(i);
626  if (!isa<Instruction>(InVal))
627  continue;
628 
629  // If this pred is for a subloop, not L itself, skip it.
630  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
631  continue; // The Block is in a subloop, skip it.
632 
633  // Check that InVal is defined in the loop.
634  Instruction *Inst = cast<Instruction>(InVal);
635  if (!L->contains(Inst))
636  continue;
637 
638  // Okay, this instruction has a user outside of the current loop
639  // and varies predictably *inside* the loop. Evaluate the value it
640  // contains when the loop exits, if possible. We prefer to start with
641  // expressions which are true for all exits (so as to maximize
642  // expression reuse by the SCEVExpander), but resort to per-exit
643  // evaluation if that fails.
644  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
645  if (isa<SCEVCouldNotCompute>(ExitValue) ||
646  !SE->isLoopInvariant(ExitValue, L) ||
647  !isSafeToExpand(ExitValue, *SE)) {
648  // TODO: This should probably be sunk into SCEV in some way; maybe a
649  // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
650  // most SCEV expressions and other recurrence types (e.g. shift
651  // recurrences). Is there existing code we can reuse?
652  const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
653  if (isa<SCEVCouldNotCompute>(ExitCount))
654  continue;
655  if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
656  if (AddRec->getLoop() == L)
657  ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
658  if (isa<SCEVCouldNotCompute>(ExitValue) ||
659  !SE->isLoopInvariant(ExitValue, L) ||
660  !isSafeToExpand(ExitValue, *SE))
661  continue;
662  }
663 
664  // Computing the value outside of the loop brings no benefit if it is
665  // definitely used inside the loop in a way which can not be optimized
666  // away. Avoid doing so unless we know we have a value which computes
667  // the ExitValue already. TODO: This should be merged into SCEV
668  // expander to leverage its knowledge of existing expressions.
669  if (ReplaceExitValue != AlwaysRepl &&
670  !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
671  hasHardUserWithinLoop(L, Inst))
672  continue;
673 
674  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
675  Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
676 
677  LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
678  << '\n'
679  << " LoopVal = " << *Inst << "\n");
680 
681  if (!isValidRewrite(Inst, ExitVal)) {
682  DeadInsts.push_back(ExitVal);
683  continue;
684  }
685 
686 #ifndef NDEBUG
687  // If we reuse an instruction from a loop which is neither L nor one of
688  // its containing loops, we end up breaking LCSSA form for this loop by
689  // creating a new use of its instruction.
690  if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
691  if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
692  if (EVL != L)
693  assert(EVL->contains(L) && "LCSSA breach detected!");
694 #endif
695 
696  // Collect all the candidate PHINodes to be rewritten.
697  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
698  }
699  }
700  }
701 
702  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
703 
704  bool Changed = false;
705  // Transformation.
706  for (const RewritePhi &Phi : RewritePhiSet) {
707  PHINode *PN = Phi.PN;
708  Value *ExitVal = Phi.Val;
709 
710  // Only do the rewrite when the ExitValue can be expanded cheaply.
711  // If LoopCanBeDel is true, rewrite exit value aggressively.
712  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
713  DeadInsts.push_back(ExitVal);
714  continue;
715  }
716 
717  Changed = true;
718  ++NumReplaced;
719  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
720  PN->setIncomingValue(Phi.Ith, ExitVal);
721 
722  // If this instruction is dead now, delete it. Don't do it now to avoid
723  // invalidating iterators.
724  if (isInstructionTriviallyDead(Inst, TLI))
725  DeadInsts.push_back(Inst);
726 
727  // Replace PN with ExitVal if that is legal and does not break LCSSA.
728  if (PN->getNumIncomingValues() == 1 &&
729  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
730  PN->replaceAllUsesWith(ExitVal);
731  PN->eraseFromParent();
732  }
733  }
734 
735  // The insertion point instruction may have been deleted; clear it out
736  // so that the rewriter doesn't trip over it later.
737  Rewriter.clearInsertPoint();
738  return Changed;
739 }
740 
741 //===---------------------------------------------------------------------===//
742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
743 // they will exit at the first iteration.
744 //===---------------------------------------------------------------------===//
745 
746 /// Check to see if this loop has loop invariant conditions which lead to loop
747 /// exits. If so, we know that if the exit path is taken, it is at the first
748 /// loop iteration. This lets us predict exit values of PHI nodes that live in
749 /// loop header.
750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
751  // Verify the input to the pass is already in LCSSA form.
752  assert(L->isLCSSAForm(*DT));
753 
754  SmallVector<BasicBlock *, 8> ExitBlocks;
755  L->getUniqueExitBlocks(ExitBlocks);
756 
757  bool MadeAnyChanges = false;
758  for (auto *ExitBB : ExitBlocks) {
759  // If there are no more PHI nodes in this exit block, then no more
760  // values defined inside the loop are used on this path.
761  for (PHINode &PN : ExitBB->phis()) {
762  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
763  IncomingValIdx != E; ++IncomingValIdx) {
764  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
765 
766  // Can we prove that the exit must run on the first iteration if it
767  // runs at all? (i.e. early exits are fine for our purposes, but
768  // traces which lead to this exit being taken on the 2nd iteration
769  // aren't.) Note that this is about whether the exit branch is
770  // executed, not about whether it is taken.
771  if (!L->getLoopLatch() ||
772  !DT->dominates(IncomingBB, L->getLoopLatch()))
773  continue;
774 
775  // Get condition that leads to the exit path.
776  auto *TermInst = IncomingBB->getTerminator();
777 
778  Value *Cond = nullptr;
779  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
780  // Must be a conditional branch, otherwise the block
781  // should not be in the loop.
782  Cond = BI->getCondition();
783  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
784  Cond = SI->getCondition();
785  else
786  continue;
787 
788  if (!L->isLoopInvariant(Cond))
789  continue;
790 
791  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
792 
793  // Only deal with PHIs in the loop header.
794  if (!ExitVal || ExitVal->getParent() != L->getHeader())
795  continue;
796 
797  // If ExitVal is a PHI on the loop header, then we know its
798  // value along this exit because the exit can only be taken
799  // on the first iteration.
800  auto *LoopPreheader = L->getLoopPreheader();
801  assert(LoopPreheader && "Invalid loop");
802  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
803  if (PreheaderIdx != -1) {
804  assert(ExitVal->getParent() == L->getHeader() &&
805  "ExitVal must be in loop header");
806  MadeAnyChanges = true;
807  PN.setIncomingValue(IncomingValIdx,
808  ExitVal->getIncomingValue(PreheaderIdx));
809  }
810  }
811  }
812  }
813  return MadeAnyChanges;
814 }
815 
816 /// Check whether it is possible to delete the loop after rewriting exit
817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
818 /// aggressively.
819 bool IndVarSimplify::canLoopBeDeleted(
820  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
821  BasicBlock *Preheader = L->getLoopPreheader();
822  // If there is no preheader, the loop will not be deleted.
823  if (!Preheader)
824  return false;
825 
826  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
827  // We obviate multiple ExitingBlocks case for simplicity.
828  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
829  // after exit value rewriting, we can enhance the logic here.
830  SmallVector<BasicBlock *, 4> ExitingBlocks;
831  L->getExitingBlocks(ExitingBlocks);
832  SmallVector<BasicBlock *, 8> ExitBlocks;
833  L->getUniqueExitBlocks(ExitBlocks);
834  if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
835  return false;
836 
837  BasicBlock *ExitBlock = ExitBlocks[0];
838  BasicBlock::iterator BI = ExitBlock->begin();
839  while (PHINode *P = dyn_cast<PHINode>(BI)) {
840  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
841 
842  // If the Incoming value of P is found in RewritePhiSet, we know it
843  // could be rewritten to use a loop invariant value in transformation
844  // phase later. Skip it in the loop invariant check below.
845  bool found = false;
846  for (const RewritePhi &Phi : RewritePhiSet) {
847  unsigned i = Phi.Ith;
848  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
849  found = true;
850  break;
851  }
852  }
853 
854  Instruction *I;
855  if (!found && (I = dyn_cast<Instruction>(Incoming)))
856  if (!L->hasLoopInvariantOperands(I))
857  return false;
858 
859  ++BI;
860  }
861 
862  for (auto *BB : L->blocks())
863  if (llvm::any_of(*BB, [](Instruction &I) {
864  return I.mayHaveSideEffects();
865  }))
866  return false;
867 
868  return true;
869 }
870 
871 //===----------------------------------------------------------------------===//
872 // IV Widening - Extend the width of an IV to cover its widest uses.
873 //===----------------------------------------------------------------------===//
874 
875 namespace {
876 
877 // Collect information about induction variables that are used by sign/zero
878 // extend operations. This information is recorded by CollectExtend and provides
879 // the input to WidenIV.
880 struct WideIVInfo {
881  PHINode *NarrowIV = nullptr;
882 
883  // Widest integer type created [sz]ext
884  Type *WidestNativeType = nullptr;
885 
886  // Was a sext user seen before a zext?
887  bool IsSigned = false;
888 };
889 
890 } // end anonymous namespace
891 
892 /// Update information about the induction variable that is extended by this
893 /// sign or zero extend operation. This is used to determine the final width of
894 /// the IV before actually widening it.
895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
896  const TargetTransformInfo *TTI) {
897  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
898  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
899  return;
900 
901  Type *Ty = Cast->getType();
902  uint64_t Width = SE->getTypeSizeInBits(Ty);
903  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
904  return;
905 
906  // Check that `Cast` actually extends the induction variable (we rely on this
907  // later). This takes care of cases where `Cast` is extending a truncation of
908  // the narrow induction variable, and thus can end up being narrower than the
909  // "narrow" induction variable.
910  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
911  if (NarrowIVWidth >= Width)
912  return;
913 
914  // Cast is either an sext or zext up to this point.
915  // We should not widen an indvar if arithmetics on the wider indvar are more
916  // expensive than those on the narrower indvar. We check only the cost of ADD
917  // because at least an ADD is required to increment the induction variable. We
918  // could compute more comprehensively the cost of all instructions on the
919  // induction variable when necessary.
920  if (TTI &&
923  Cast->getOperand(0)->getType())) {
924  return;
925  }
926 
927  if (!WI.WidestNativeType) {
928  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
929  WI.IsSigned = IsSigned;
930  return;
931  }
932 
933  // We extend the IV to satisfy the sign of its first user, arbitrarily.
934  if (WI.IsSigned != IsSigned)
935  return;
936 
937  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
938  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
939 }
940 
941 namespace {
942 
943 /// Record a link in the Narrow IV def-use chain along with the WideIV that
944 /// computes the same value as the Narrow IV def. This avoids caching Use*
945 /// pointers.
946 struct NarrowIVDefUse {
947  Instruction *NarrowDef = nullptr;
948  Instruction *NarrowUse = nullptr;
949  Instruction *WideDef = nullptr;
950 
951  // True if the narrow def is never negative. Tracking this information lets
952  // us use a sign extension instead of a zero extension or vice versa, when
953  // profitable and legal.
954  bool NeverNegative = false;
955 
956  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
957  bool NeverNegative)
958  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
959  NeverNegative(NeverNegative) {}
960 };
961 
962 /// The goal of this transform is to remove sign and zero extends without
963 /// creating any new induction variables. To do this, it creates a new phi of
964 /// the wider type and redirects all users, either removing extends or inserting
965 /// truncs whenever we stop propagating the type.
966 class WidenIV {
967  // Parameters
968  PHINode *OrigPhi;
969  Type *WideType;
970 
971  // Context
972  LoopInfo *LI;
973  Loop *L;
974  ScalarEvolution *SE;
975  DominatorTree *DT;
976 
977  // Does the module have any calls to the llvm.experimental.guard intrinsic
978  // at all? If not we can avoid scanning instructions looking for guards.
979  bool HasGuards;
980 
981  // Result
982  PHINode *WidePhi = nullptr;
983  Instruction *WideInc = nullptr;
984  const SCEV *WideIncExpr = nullptr;
986 
988  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
989 
990  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
991 
992  // A map tracking the kind of extension used to widen each narrow IV
993  // and narrow IV user.
994  // Key: pointer to a narrow IV or IV user.
995  // Value: the kind of extension used to widen this Instruction.
996  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
997 
998  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
999 
1000  // A map with control-dependent ranges for post increment IV uses. The key is
1001  // a pair of IV def and a use of this def denoting the context. The value is
1002  // a ConstantRange representing possible values of the def at the given
1003  // context.
1004  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1005 
1006  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1007  Instruction *UseI) {
1008  DefUserPair Key(Def, UseI);
1009  auto It = PostIncRangeInfos.find(Key);
1010  return It == PostIncRangeInfos.end()
1012  : Optional<ConstantRange>(It->second);
1013  }
1014 
1015  void calculatePostIncRanges(PHINode *OrigPhi);
1016  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1017 
1018  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1019  DefUserPair Key(Def, UseI);
1020  auto It = PostIncRangeInfos.find(Key);
1021  if (It == PostIncRangeInfos.end())
1022  PostIncRangeInfos.insert({Key, R});
1023  else
1024  It->second = R.intersectWith(It->second);
1025  }
1026 
1027 public:
1028  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1030  bool HasGuards)
1031  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1032  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1033  HasGuards(HasGuards), DeadInsts(DI) {
1034  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1035  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1036  }
1037 
1038  PHINode *createWideIV(SCEVExpander &Rewriter);
1039 
1040 protected:
1041  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1042  Instruction *Use);
1043 
1044  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1045  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1046  const SCEVAddRecExpr *WideAR);
1047  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1048 
1049  ExtendKind getExtendKind(Instruction *I);
1050 
1051  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1052 
1053  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1054 
1055  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1056 
1057  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1058  unsigned OpCode) const;
1059 
1060  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1061 
1062  bool widenLoopCompare(NarrowIVDefUse DU);
1063  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1064  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1065 
1066  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1067 };
1068 
1069 } // end anonymous namespace
1070 
1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1072  bool IsSigned, Instruction *Use) {
1073  // Set the debug location and conservative insertion point.
1074  IRBuilder<> Builder(Use);
1075  // Hoist the insertion point into loop preheaders as far as possible.
1076  for (const Loop *L = LI->getLoopFor(Use->getParent());
1077  L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1078  L = L->getParentLoop())
1080 
1081  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1082  Builder.CreateZExt(NarrowOper, WideType);
1083 }
1084 
1085 /// Instantiate a wide operation to replace a narrow operation. This only needs
1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1087 /// 0 for any operation we decide not to clone.
1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1089  const SCEVAddRecExpr *WideAR) {
1090  unsigned Opcode = DU.NarrowUse->getOpcode();
1091  switch (Opcode) {
1092  default:
1093  return nullptr;
1094  case Instruction::Add:
1095  case Instruction::Mul:
1096  case Instruction::UDiv:
1097  case Instruction::Sub:
1098  return cloneArithmeticIVUser(DU, WideAR);
1099 
1100  case Instruction::And:
1101  case Instruction::Or:
1102  case Instruction::Xor:
1103  case Instruction::Shl:
1104  case Instruction::LShr:
1105  case Instruction::AShr:
1106  return cloneBitwiseIVUser(DU);
1107  }
1108 }
1109 
1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1111  Instruction *NarrowUse = DU.NarrowUse;
1112  Instruction *NarrowDef = DU.NarrowDef;
1113  Instruction *WideDef = DU.WideDef;
1114 
1115  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1116 
1117  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1118  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1119  // invariant and will be folded or hoisted. If it actually comes from a
1120  // widened IV, it should be removed during a future call to widenIVUse.
1121  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1122  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1123  ? WideDef
1124  : createExtendInst(NarrowUse->getOperand(0), WideType,
1125  IsSigned, NarrowUse);
1126  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1127  ? WideDef
1128  : createExtendInst(NarrowUse->getOperand(1), WideType,
1129  IsSigned, NarrowUse);
1130 
1131  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1132  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1133  NarrowBO->getName());
1134  IRBuilder<> Builder(NarrowUse);
1135  Builder.Insert(WideBO);
1136  WideBO->copyIRFlags(NarrowBO);
1137  return WideBO;
1138 }
1139 
1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1141  const SCEVAddRecExpr *WideAR) {
1142  Instruction *NarrowUse = DU.NarrowUse;
1143  Instruction *NarrowDef = DU.NarrowDef;
1144  Instruction *WideDef = DU.WideDef;
1145 
1146  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1147 
1148  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1149 
1150  // We're trying to find X such that
1151  //
1152  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1153  //
1154  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1155  // and check using SCEV if any of them are correct.
1156 
1157  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1158  // correct solution to X.
1159  auto GuessNonIVOperand = [&](bool SignExt) {
1160  const SCEV *WideLHS;
1161  const SCEV *WideRHS;
1162 
1163  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1164  if (SignExt)
1165  return SE->getSignExtendExpr(S, Ty);
1166  return SE->getZeroExtendExpr(S, Ty);
1167  };
1168 
1169  if (IVOpIdx == 0) {
1170  WideLHS = SE->getSCEV(WideDef);
1171  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1172  WideRHS = GetExtend(NarrowRHS, WideType);
1173  } else {
1174  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1175  WideLHS = GetExtend(NarrowLHS, WideType);
1176  WideRHS = SE->getSCEV(WideDef);
1177  }
1178 
1179  // WideUse is "WideDef `op.wide` X" as described in the comment.
1180  const SCEV *WideUse = nullptr;
1181 
1182  switch (NarrowUse->getOpcode()) {
1183  default:
1184  llvm_unreachable("No other possibility!");
1185 
1186  case Instruction::Add:
1187  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1188  break;
1189 
1190  case Instruction::Mul:
1191  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1192  break;
1193 
1194  case Instruction::UDiv:
1195  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1196  break;
1197 
1198  case Instruction::Sub:
1199  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1200  break;
1201  }
1202 
1203  return WideUse == WideAR;
1204  };
1205 
1206  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1207  if (!GuessNonIVOperand(SignExtend)) {
1208  SignExtend = !SignExtend;
1209  if (!GuessNonIVOperand(SignExtend))
1210  return nullptr;
1211  }
1212 
1213  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1214  ? WideDef
1215  : createExtendInst(NarrowUse->getOperand(0), WideType,
1216  SignExtend, NarrowUse);
1217  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1218  ? WideDef
1219  : createExtendInst(NarrowUse->getOperand(1), WideType,
1220  SignExtend, NarrowUse);
1221 
1222  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1223  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1224  NarrowBO->getName());
1225 
1226  IRBuilder<> Builder(NarrowUse);
1227  Builder.Insert(WideBO);
1228  WideBO->copyIRFlags(NarrowBO);
1229  return WideBO;
1230 }
1231 
1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1233  auto It = ExtendKindMap.find(I);
1234  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1235  return It->second;
1236 }
1237 
1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1239  unsigned OpCode) const {
1240  if (OpCode == Instruction::Add)
1241  return SE->getAddExpr(LHS, RHS);
1242  if (OpCode == Instruction::Sub)
1243  return SE->getMinusSCEV(LHS, RHS);
1244  if (OpCode == Instruction::Mul)
1245  return SE->getMulExpr(LHS, RHS);
1246 
1247  llvm_unreachable("Unsupported opcode.");
1248 }
1249 
1250 /// No-wrap operations can transfer sign extension of their result to their
1251 /// operands. Generate the SCEV value for the widened operation without
1252 /// actually modifying the IR yet. If the expression after extending the
1253 /// operands is an AddRec for this loop, return the AddRec and the kind of
1254 /// extension used.
1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1256  // Handle the common case of add<nsw/nuw>
1257  const unsigned OpCode = DU.NarrowUse->getOpcode();
1258  // Only Add/Sub/Mul instructions supported yet.
1259  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1260  OpCode != Instruction::Mul)
1261  return {nullptr, Unknown};
1262 
1263  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1264  // if extending the other will lead to a recurrence.
1265  const unsigned ExtendOperIdx =
1266  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1267  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1268 
1269  const SCEV *ExtendOperExpr = nullptr;
1270  const OverflowingBinaryOperator *OBO =
1271  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1272  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1273  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1274  ExtendOperExpr = SE->getSignExtendExpr(
1275  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1276  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1277  ExtendOperExpr = SE->getZeroExtendExpr(
1278  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1279  else
1280  return {nullptr, Unknown};
1281 
1282  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1283  // flags. This instruction may be guarded by control flow that the no-wrap
1284  // behavior depends on. Non-control-equivalent instructions can be mapped to
1285  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1286  // semantics to those operations.
1287  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1288  const SCEV *rhs = ExtendOperExpr;
1289 
1290  // Let's swap operands to the initial order for the case of non-commutative
1291  // operations, like SUB. See PR21014.
1292  if (ExtendOperIdx == 0)
1293  std::swap(lhs, rhs);
1294  const SCEVAddRecExpr *AddRec =
1295  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1296 
1297  if (!AddRec || AddRec->getLoop() != L)
1298  return {nullptr, Unknown};
1299 
1300  return {AddRec, ExtKind};
1301 }
1302 
1303 /// Is this instruction potentially interesting for further simplification after
1304 /// widening it's type? In other words, can the extend be safely hoisted out of
1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1306 /// so, return the extended recurrence and the kind of extension used. Otherwise
1307 /// return {nullptr, Unknown}.
1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1309  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1310  return {nullptr, Unknown};
1311 
1312  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1313  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1314  SE->getTypeSizeInBits(WideType)) {
1315  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1316  // index. So don't follow this use.
1317  return {nullptr, Unknown};
1318  }
1319 
1320  const SCEV *WideExpr;
1321  ExtendKind ExtKind;
1322  if (DU.NeverNegative) {
1323  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1324  if (isa<SCEVAddRecExpr>(WideExpr))
1325  ExtKind = SignExtended;
1326  else {
1327  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1328  ExtKind = ZeroExtended;
1329  }
1330  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1331  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1332  ExtKind = SignExtended;
1333  } else {
1334  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1335  ExtKind = ZeroExtended;
1336  }
1337  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1338  if (!AddRec || AddRec->getLoop() != L)
1339  return {nullptr, Unknown};
1340  return {AddRec, ExtKind};
1341 }
1342 
1343 /// This IV user cannot be widened. Replace this use of the original narrow IV
1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1346  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1347  if (!InsertPt)
1348  return;
1349  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1350  << *DU.NarrowUse << "\n");
1351  IRBuilder<> Builder(InsertPt);
1352  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1353  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1354 }
1355 
1356 /// If the narrow use is a compare instruction, then widen the compare
1357 // (and possibly the other operand). The extend operation is hoisted into the
1358 // loop preheader as far as possible.
1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1360  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1361  if (!Cmp)
1362  return false;
1363 
1364  // We can legally widen the comparison in the following two cases:
1365  //
1366  // - The signedness of the IV extension and comparison match
1367  //
1368  // - The narrow IV is always positive (and thus its sign extension is equal
1369  // to its zero extension). For instance, let's say we're zero extending
1370  // %narrow for the following use
1371  //
1372  // icmp slt i32 %narrow, %val ... (A)
1373  //
1374  // and %narrow is always positive. Then
1375  //
1376  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1377  // == icmp slt i32 zext(%narrow), sext(%val)
1378  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1379  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1380  return false;
1381 
1382  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1383  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1384  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1385  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1386 
1387  // Widen the compare instruction.
1388  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1389  if (!InsertPt)
1390  return false;
1391  IRBuilder<> Builder(InsertPt);
1392  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1393 
1394  // Widen the other operand of the compare, if necessary.
1395  if (CastWidth < IVWidth) {
1396  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1397  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1398  }
1399  return true;
1400 }
1401 
1402 /// If the narrow use is an instruction whose two operands are the defining
1403 /// instruction of DU and a load instruction, then we have the following:
1404 /// if the load is hoisted outside the loop, then we do not reach this function
1405 /// as scalar evolution analysis works fine in widenIVUse with variables
1406 /// hoisted outside the loop and efficient code is subsequently generated by
1407 /// not emitting truncate instructions. But when the load is not hoisted
1408 /// (whether due to limitation in alias analysis or due to a true legality),
1409 /// then scalar evolution can not proceed with loop variant values and
1410 /// inefficient code is generated. This function handles the non-hoisted load
1411 /// special case by making the optimization generate the same type of code for
1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1413 /// instruction). This special case is important especially when the induction
1414 /// variables are affecting addressing mode in code generation.
1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1416  Instruction *NarrowUse = DU.NarrowUse;
1417  Instruction *NarrowDef = DU.NarrowDef;
1418  Instruction *WideDef = DU.WideDef;
1419 
1420  // Handle the common case of add<nsw/nuw>
1421  const unsigned OpCode = NarrowUse->getOpcode();
1422  // Only Add/Sub/Mul instructions are supported.
1423  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1424  OpCode != Instruction::Mul)
1425  return false;
1426 
1427  // The operand that is not defined by NarrowDef of DU. Let's call it the
1428  // other operand.
1429  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1430  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1431  "bad DU");
1432 
1433  const SCEV *ExtendOperExpr = nullptr;
1434  const OverflowingBinaryOperator *OBO =
1435  cast<OverflowingBinaryOperator>(NarrowUse);
1436  ExtendKind ExtKind = getExtendKind(NarrowDef);
1437  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1438  ExtendOperExpr = SE->getSignExtendExpr(
1439  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1440  else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1441  ExtendOperExpr = SE->getZeroExtendExpr(
1442  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1443  else
1444  return false;
1445 
1446  // We are interested in the other operand being a load instruction.
1447  // But, we should look into relaxing this restriction later on.
1448  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1449  if (I && I->getOpcode() != Instruction::Load)
1450  return false;
1451 
1452  // Verifying that Defining operand is an AddRec
1453  const SCEV *Op1 = SE->getSCEV(WideDef);
1454  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1455  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1456  return false;
1457  // Verifying that other operand is an Extend.
1458  if (ExtKind == SignExtended) {
1459  if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1460  return false;
1461  } else {
1462  if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1463  return false;
1464  }
1465 
1466  if (ExtKind == SignExtended) {
1467  for (Use &U : NarrowUse->uses()) {
1468  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1469  if (!User || User->getType() != WideType)
1470  return false;
1471  }
1472  } else { // ExtKind == ZeroExtended
1473  for (Use &U : NarrowUse->uses()) {
1474  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1475  if (!User || User->getType() != WideType)
1476  return false;
1477  }
1478  }
1479 
1480  return true;
1481 }
1482 
1483 /// Special Case for widening with variant Loads (see
1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1486  Instruction *NarrowUse = DU.NarrowUse;
1487  Instruction *NarrowDef = DU.NarrowDef;
1488  Instruction *WideDef = DU.WideDef;
1489 
1490  ExtendKind ExtKind = getExtendKind(NarrowDef);
1491 
1492  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1493 
1494  // Generating a widening use instruction.
1495  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1496  ? WideDef
1497  : createExtendInst(NarrowUse->getOperand(0), WideType,
1498  ExtKind, NarrowUse);
1499  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1500  ? WideDef
1501  : createExtendInst(NarrowUse->getOperand(1), WideType,
1502  ExtKind, NarrowUse);
1503 
1504  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1505  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1506  NarrowBO->getName());
1507  IRBuilder<> Builder(NarrowUse);
1508  Builder.Insert(WideBO);
1509  WideBO->copyIRFlags(NarrowBO);
1510 
1511  if (ExtKind == SignExtended)
1512  ExtendKindMap[NarrowUse] = SignExtended;
1513  else
1514  ExtendKindMap[NarrowUse] = ZeroExtended;
1515 
1516  // Update the Use.
1517  if (ExtKind == SignExtended) {
1518  for (Use &U : NarrowUse->uses()) {
1519  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1520  if (User && User->getType() == WideType) {
1521  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1522  << *WideBO << "\n");
1523  ++NumElimExt;
1524  User->replaceAllUsesWith(WideBO);
1525  DeadInsts.emplace_back(User);
1526  }
1527  }
1528  } else { // ExtKind == ZeroExtended
1529  for (Use &U : NarrowUse->uses()) {
1530  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1531  if (User && User->getType() == WideType) {
1532  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1533  << *WideBO << "\n");
1534  ++NumElimExt;
1535  User->replaceAllUsesWith(WideBO);
1536  DeadInsts.emplace_back(User);
1537  }
1538  }
1539  }
1540 }
1541 
1542 /// Determine whether an individual user of the narrow IV can be widened. If so,
1543 /// return the wide clone of the user.
1544 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1545  assert(ExtendKindMap.count(DU.NarrowDef) &&
1546  "Should already know the kind of extension used to widen NarrowDef");
1547 
1548  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1549  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1550  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1551  // For LCSSA phis, sink the truncate outside the loop.
1552  // After SimplifyCFG most loop exit targets have a single predecessor.
1553  // Otherwise fall back to a truncate within the loop.
1554  if (UsePhi->getNumOperands() != 1)
1555  truncateIVUse(DU, DT, LI);
1556  else {
1557  // Widening the PHI requires us to insert a trunc. The logical place
1558  // for this trunc is in the same BB as the PHI. This is not possible if
1559  // the BB is terminated by a catchswitch.
1560  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1561  return nullptr;
1562 
1563  PHINode *WidePhi =
1564  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1565  UsePhi);
1566  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1567  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1568  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1569  UsePhi->replaceAllUsesWith(Trunc);
1570  DeadInsts.emplace_back(UsePhi);
1571  LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1572  << *WidePhi << "\n");
1573  }
1574  return nullptr;
1575  }
1576  }
1577 
1578  // This narrow use can be widened by a sext if it's non-negative or its narrow
1579  // def was widended by a sext. Same for zext.
1580  auto canWidenBySExt = [&]() {
1581  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1582  };
1583  auto canWidenByZExt = [&]() {
1584  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1585  };
1586 
1587  // Our raison d'etre! Eliminate sign and zero extension.
1588  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1589  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1590  Value *NewDef = DU.WideDef;
1591  if (DU.NarrowUse->getType() != WideType) {
1592  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1593  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1594  if (CastWidth < IVWidth) {
1595  // The cast isn't as wide as the IV, so insert a Trunc.
1596  IRBuilder<> Builder(DU.NarrowUse);
1597  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1598  }
1599  else {
1600  // A wider extend was hidden behind a narrower one. This may induce
1601  // another round of IV widening in which the intermediate IV becomes
1602  // dead. It should be very rare.
1603  LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1604  << " not wide enough to subsume " << *DU.NarrowUse
1605  << "\n");
1606  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1607  NewDef = DU.NarrowUse;
1608  }
1609  }
1610  if (NewDef != DU.NarrowUse) {
1611  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1612  << " replaced by " << *DU.WideDef << "\n");
1613  ++NumElimExt;
1614  DU.NarrowUse->replaceAllUsesWith(NewDef);
1615  DeadInsts.emplace_back(DU.NarrowUse);
1616  }
1617  // Now that the extend is gone, we want to expose it's uses for potential
1618  // further simplification. We don't need to directly inform SimplifyIVUsers
1619  // of the new users, because their parent IV will be processed later as a
1620  // new loop phi. If we preserved IVUsers analysis, we would also want to
1621  // push the uses of WideDef here.
1622 
1623  // No further widening is needed. The deceased [sz]ext had done it for us.
1624  return nullptr;
1625  }
1626 
1627  // Does this user itself evaluate to a recurrence after widening?
1628  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1629  if (!WideAddRec.first)
1630  WideAddRec = getWideRecurrence(DU);
1631 
1632  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1633  if (!WideAddRec.first) {
1634  // If use is a loop condition, try to promote the condition instead of
1635  // truncating the IV first.
1636  if (widenLoopCompare(DU))
1637  return nullptr;
1638 
1639  // We are here about to generate a truncate instruction that may hurt
1640  // performance because the scalar evolution expression computed earlier
1641  // in WideAddRec.first does not indicate a polynomial induction expression.
1642  // In that case, look at the operands of the use instruction to determine
1643  // if we can still widen the use instead of truncating its operand.
1644  if (widenWithVariantLoadUse(DU)) {
1645  widenWithVariantLoadUseCodegen(DU);
1646  return nullptr;
1647  }
1648 
1649  // This user does not evaluate to a recurrence after widening, so don't
1650  // follow it. Instead insert a Trunc to kill off the original use,
1651  // eventually isolating the original narrow IV so it can be removed.
1652  truncateIVUse(DU, DT, LI);
1653  return nullptr;
1654  }
1655  // Assume block terminators cannot evaluate to a recurrence. We can't to
1656  // insert a Trunc after a terminator if there happens to be a critical edge.
1657  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1658  "SCEV is not expected to evaluate a block terminator");
1659 
1660  // Reuse the IV increment that SCEVExpander created as long as it dominates
1661  // NarrowUse.
1662  Instruction *WideUse = nullptr;
1663  if (WideAddRec.first == WideIncExpr &&
1664  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1665  WideUse = WideInc;
1666  else {
1667  WideUse = cloneIVUser(DU, WideAddRec.first);
1668  if (!WideUse)
1669  return nullptr;
1670  }
1671  // Evaluation of WideAddRec ensured that the narrow expression could be
1672  // extended outside the loop without overflow. This suggests that the wide use
1673  // evaluates to the same expression as the extended narrow use, but doesn't
1674  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1675  // where it fails, we simply throw away the newly created wide use.
1676  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1677  LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1678  << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1679  << "\n");
1680  DeadInsts.emplace_back(WideUse);
1681  return nullptr;
1682  }
1683 
1684  // if we reached this point then we are going to replace
1685  // DU.NarrowUse with WideUse. Reattach DbgValue then.
1686  replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1687 
1688  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1689  // Returning WideUse pushes it on the worklist.
1690  return WideUse;
1691 }
1692 
1693 /// Add eligible users of NarrowDef to NarrowIVUsers.
1694 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1695  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1696  bool NonNegativeDef =
1697  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1698  SE->getConstant(NarrowSCEV->getType(), 0));
1699  for (User *U : NarrowDef->users()) {
1700  Instruction *NarrowUser = cast<Instruction>(U);
1701 
1702  // Handle data flow merges and bizarre phi cycles.
1703  if (!Widened.insert(NarrowUser).second)
1704  continue;
1705 
1706  bool NonNegativeUse = false;
1707  if (!NonNegativeDef) {
1708  // We might have a control-dependent range information for this context.
1709  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1710  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1711  }
1712 
1713  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1714  NonNegativeDef || NonNegativeUse);
1715  }
1716 }
1717 
1718 /// Process a single induction variable. First use the SCEVExpander to create a
1719 /// wide induction variable that evaluates to the same recurrence as the
1720 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1721 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1722 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1723 ///
1724 /// It would be simpler to delete uses as they are processed, but we must avoid
1725 /// invalidating SCEV expressions.
1726 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1727  // Is this phi an induction variable?
1728  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1729  if (!AddRec)
1730  return nullptr;
1731 
1732  // Widen the induction variable expression.
1733  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1734  ? SE->getSignExtendExpr(AddRec, WideType)
1735  : SE->getZeroExtendExpr(AddRec, WideType);
1736 
1737  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1738  "Expect the new IV expression to preserve its type");
1739 
1740  // Can the IV be extended outside the loop without overflow?
1741  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1742  if (!AddRec || AddRec->getLoop() != L)
1743  return nullptr;
1744 
1745  // An AddRec must have loop-invariant operands. Since this AddRec is
1746  // materialized by a loop header phi, the expression cannot have any post-loop
1747  // operands, so they must dominate the loop header.
1748  assert(
1749  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1750  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1751  "Loop header phi recurrence inputs do not dominate the loop");
1752 
1753  // Iterate over IV uses (including transitive ones) looking for IV increments
1754  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1755  // the increment calculate control-dependent range information basing on
1756  // dominating conditions inside of the loop (e.g. a range check inside of the
1757  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1758  //
1759  // Control-dependent range information is later used to prove that a narrow
1760  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1761  // this on demand because when pushNarrowIVUsers needs this information some
1762  // of the dominating conditions might be already widened.
1763  if (UsePostIncrementRanges)
1764  calculatePostIncRanges(OrigPhi);
1765 
1766  // The rewriter provides a value for the desired IV expression. This may
1767  // either find an existing phi or materialize a new one. Either way, we
1768  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1769  // of the phi-SCC dominates the loop entry.
1770  Instruction *InsertPt = &L->getHeader()->front();
1771  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1772 
1773  // Remembering the WideIV increment generated by SCEVExpander allows
1774  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1775  // employ a general reuse mechanism because the call above is the only call to
1776  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1777  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1778  WideInc =
1779  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1780  WideIncExpr = SE->getSCEV(WideInc);
1781  // Propagate the debug location associated with the original loop increment
1782  // to the new (widened) increment.
1783  auto *OrigInc =
1784  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1785  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1786  }
1787 
1788  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1789  ++NumWidened;
1790 
1791  // Traverse the def-use chain using a worklist starting at the original IV.
1792  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1793 
1794  Widened.insert(OrigPhi);
1795  pushNarrowIVUsers(OrigPhi, WidePhi);
1796 
1797  while (!NarrowIVUsers.empty()) {
1798  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1799 
1800  // Process a def-use edge. This may replace the use, so don't hold a
1801  // use_iterator across it.
1802  Instruction *WideUse = widenIVUse(DU, Rewriter);
1803 
1804  // Follow all def-use edges from the previous narrow use.
1805  if (WideUse)
1806  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1807 
1808  // widenIVUse may have removed the def-use edge.
1809  if (DU.NarrowDef->use_empty())
1810  DeadInsts.emplace_back(DU.NarrowDef);
1811  }
1812 
1813  // Attach any debug information to the new PHI.
1814  replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1815 
1816  return WidePhi;
1817 }
1818 
1819 /// Calculates control-dependent range for the given def at the given context
1820 /// by looking at dominating conditions inside of the loop
1821 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1822  Instruction *NarrowUser) {
1823  using namespace llvm::PatternMatch;
1824 
1825  Value *NarrowDefLHS;
1826  const APInt *NarrowDefRHS;
1827  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1828  m_APInt(NarrowDefRHS))) ||
1829  !NarrowDefRHS->isNonNegative())
1830  return;
1831 
1832  auto UpdateRangeFromCondition = [&] (Value *Condition,
1833  bool TrueDest) {
1834  CmpInst::Predicate Pred;
1835  Value *CmpRHS;
1836  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1837  m_Value(CmpRHS))))
1838  return;
1839 
1841  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1842 
1843  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1844  auto CmpConstrainedLHSRange =
1845  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1846  auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1848 
1849  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1850  };
1851 
1852  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1853  if (!HasGuards)
1854  return;
1855 
1856  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1857  Ctx->getParent()->rend())) {
1858  Value *C = nullptr;
1859  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1860  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1861  }
1862  };
1863 
1864  UpdateRangeFromGuards(NarrowUser);
1865 
1866  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1867  // If NarrowUserBB is statically unreachable asking dominator queries may
1868  // yield surprising results. (e.g. the block may not have a dom tree node)
1869  if (!DT->isReachableFromEntry(NarrowUserBB))
1870  return;
1871 
1872  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1873  L->contains(DTB->getBlock());
1874  DTB = DTB->getIDom()) {
1875  auto *BB = DTB->getBlock();
1876  auto *TI = BB->getTerminator();
1877  UpdateRangeFromGuards(TI);
1878 
1879  auto *BI = dyn_cast<BranchInst>(TI);
1880  if (!BI || !BI->isConditional())
1881  continue;
1882 
1883  auto *TrueSuccessor = BI->getSuccessor(0);
1884  auto *FalseSuccessor = BI->getSuccessor(1);
1885 
1886  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1887  return BBE.isSingleEdge() &&
1888  DT->dominates(BBE, NarrowUser->getParent());
1889  };
1890 
1891  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1892  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1893 
1894  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1895  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1896  }
1897 }
1898 
1899 /// Calculates PostIncRangeInfos map for the given IV
1900 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1903  Worklist.push_back(OrigPhi);
1904  Visited.insert(OrigPhi);
1905 
1906  while (!Worklist.empty()) {
1907  Instruction *NarrowDef = Worklist.pop_back_val();
1908 
1909  for (Use &U : NarrowDef->uses()) {
1910  auto *NarrowUser = cast<Instruction>(U.getUser());
1911 
1912  // Don't go looking outside the current loop.
1913  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1914  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1915  continue;
1916 
1917  if (!Visited.insert(NarrowUser).second)
1918  continue;
1919 
1920  Worklist.push_back(NarrowUser);
1921 
1922  calculatePostIncRange(NarrowDef, NarrowUser);
1923  }
1924  }
1925 }
1926 
1927 //===----------------------------------------------------------------------===//
1928 // Live IV Reduction - Minimize IVs live across the loop.
1929 //===----------------------------------------------------------------------===//
1930 
1931 //===----------------------------------------------------------------------===//
1932 // Simplification of IV users based on SCEV evaluation.
1933 //===----------------------------------------------------------------------===//
1934 
1935 namespace {
1936 
1937 class IndVarSimplifyVisitor : public IVVisitor {
1938  ScalarEvolution *SE;
1939  const TargetTransformInfo *TTI;
1940  PHINode *IVPhi;
1941 
1942 public:
1943  WideIVInfo WI;
1944 
1945  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1946  const TargetTransformInfo *TTI,
1947  const DominatorTree *DTree)
1948  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1949  DT = DTree;
1950  WI.NarrowIV = IVPhi;
1951  }
1952 
1953  // Implement the interface used by simplifyUsersOfIV.
1954  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1955 };
1956 
1957 } // end anonymous namespace
1958 
1959 /// Iteratively perform simplification on a worklist of IV users. Each
1960 /// successive simplification may push more users which may themselves be
1961 /// candidates for simplification.
1962 ///
1963 /// Sign/Zero extend elimination is interleaved with IV simplification.
1964 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1965  SCEVExpander &Rewriter,
1966  LoopInfo *LI) {
1968 
1969  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1970  Intrinsic::getName(Intrinsic::experimental_guard));
1971  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1972 
1973  SmallVector<PHINode*, 8> LoopPhis;
1974  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1975  LoopPhis.push_back(cast<PHINode>(I));
1976  }
1977  // Each round of simplification iterates through the SimplifyIVUsers worklist
1978  // for all current phis, then determines whether any IVs can be
1979  // widened. Widening adds new phis to LoopPhis, inducing another round of
1980  // simplification on the wide IVs.
1981  bool Changed = false;
1982  while (!LoopPhis.empty()) {
1983  // Evaluate as many IV expressions as possible before widening any IVs. This
1984  // forces SCEV to set no-wrap flags before evaluating sign/zero
1985  // extension. The first time SCEV attempts to normalize sign/zero extension,
1986  // the result becomes final. So for the most predictable results, we delay
1987  // evaluation of sign/zero extend evaluation until needed, and avoid running
1988  // other SCEV based analysis prior to simplifyAndExtend.
1989  do {
1990  PHINode *CurrIV = LoopPhis.pop_back_val();
1991 
1992  // Information about sign/zero extensions of CurrIV.
1993  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1994 
1995  Changed |=
1996  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1997 
1998  if (Visitor.WI.WidestNativeType) {
1999  WideIVs.push_back(Visitor.WI);
2000  }
2001  } while(!LoopPhis.empty());
2002 
2003  for (; !WideIVs.empty(); WideIVs.pop_back()) {
2004  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2005  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2006  Changed = true;
2007  LoopPhis.push_back(WidePhi);
2008  }
2009  }
2010  }
2011  return Changed;
2012 }
2013 
2014 //===----------------------------------------------------------------------===//
2015 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2016 //===----------------------------------------------------------------------===//
2017 
2018 /// Given an Value which is hoped to be part of an add recurance in the given
2019 /// loop, return the associated Phi node if so. Otherwise, return null. Note
2020 /// that this is less general than SCEVs AddRec checking.
2022  Instruction *IncI = dyn_cast<Instruction>(IncV);
2023  if (!IncI)
2024  return nullptr;
2025 
2026  switch (IncI->getOpcode()) {
2027  case Instruction::Add:
2028  case Instruction::Sub:
2029  break;
2030  case Instruction::GetElementPtr:
2031  // An IV counter must preserve its type.
2032  if (IncI->getNumOperands() == 2)
2033  break;
2035  default:
2036  return nullptr;
2037  }
2038 
2039  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2040  if (Phi && Phi->getParent() == L->getHeader()) {
2041  if (L->isLoopInvariant(IncI->getOperand(1)))
2042  return Phi;
2043  return nullptr;
2044  }
2045  if (IncI->getOpcode() == Instruction::GetElementPtr)
2046  return nullptr;
2047 
2048  // Allow add/sub to be commuted.
2049  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2050  if (Phi && Phi->getParent() == L->getHeader()) {
2051  if (L->isLoopInvariant(IncI->getOperand(0)))
2052  return Phi;
2053  }
2054  return nullptr;
2055 }
2056 
2057 /// Whether the current loop exit test is based on this value. Currently this
2058 /// is limited to a direct use in the loop condition.
2059 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2060  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2061  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2062  // TODO: Allow non-icmp loop test.
2063  if (!ICmp)
2064  return false;
2065 
2066  // TODO: Allow indirect use.
2067  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2068 }
2069 
2070 /// linearFunctionTestReplace policy. Return true unless we can show that the
2071 /// current exit test is already sufficiently canonical.
2072 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2073  assert(L->getLoopLatch() && "Must be in simplified form");
2074 
2075  // Avoid converting a constant or loop invariant test back to a runtime
2076  // test. This is critical for when SCEV's cached ExitCount is less precise
2077  // than the current IR (such as after we've proven a particular exit is
2078  // actually dead and thus the BE count never reaches our ExitCount.)
2079  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2080  if (L->isLoopInvariant(BI->getCondition()))
2081  return false;
2082 
2083  // Do LFTR to simplify the exit condition to an ICMP.
2084  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2085  if (!Cond)
2086  return true;
2087 
2088  // Do LFTR to simplify the exit ICMP to EQ/NE
2089  ICmpInst::Predicate Pred = Cond->getPredicate();
2090  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2091  return true;
2092 
2093  // Look for a loop invariant RHS
2094  Value *LHS = Cond->getOperand(0);
2095  Value *RHS = Cond->getOperand(1);
2096  if (!L->isLoopInvariant(RHS)) {
2097  if (!L->isLoopInvariant(LHS))
2098  return true;
2099  std::swap(LHS, RHS);
2100  }
2101  // Look for a simple IV counter LHS
2102  PHINode *Phi = dyn_cast<PHINode>(LHS);
2103  if (!Phi)
2104  Phi = getLoopPhiForCounter(LHS, L);
2105 
2106  if (!Phi)
2107  return true;
2108 
2109  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2110  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2111  if (Idx < 0)
2112  return true;
2113 
2114  // Do LFTR if the exit condition's IV is *not* a simple counter.
2115  Value *IncV = Phi->getIncomingValue(Idx);
2116  return Phi != getLoopPhiForCounter(IncV, L);
2117 }
2118 
2119 /// Return true if undefined behavior would provable be executed on the path to
2120 /// OnPathTo if Root produced a posion result. Note that this doesn't say
2121 /// anything about whether OnPathTo is actually executed or whether Root is
2122 /// actually poison. This can be used to assess whether a new use of Root can
2123 /// be added at a location which is control equivalent with OnPathTo (such as
2124 /// immediately before it) without introducing UB which didn't previously
2125 /// exist. Note that a false result conveys no information.
2127  Instruction *OnPathTo,
2128  DominatorTree *DT) {
2129  // Basic approach is to assume Root is poison, propagate poison forward
2130  // through all users we can easily track, and then check whether any of those
2131  // users are provable UB and must execute before out exiting block might
2132  // exit.
2133 
2134  // The set of all recursive users we've visited (which are assumed to all be
2135  // poison because of said visit)
2136  SmallSet<const Value *, 16> KnownPoison;
2138  Worklist.push_back(Root);
2139  while (!Worklist.empty()) {
2140  const Instruction *I = Worklist.pop_back_val();
2141 
2142  // If we know this must trigger UB on a path leading our target.
2143  if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2144  return true;
2145 
2146  // If we can't analyze propagation through this instruction, just skip it
2147  // and transitive users. Safe as false is a conservative result.
2148  if (!propagatesFullPoison(I) && I != Root)
2149  continue;
2150 
2151  if (KnownPoison.insert(I).second)
2152  for (const User *User : I->users())
2153  Worklist.push_back(cast<Instruction>(User));
2154  }
2155 
2156  // Might be non-UB, or might have a path we couldn't prove must execute on
2157  // way to exiting bb.
2158  return false;
2159 }
2160 
2161 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2162 /// down to checking that all operands are constant and listing instructions
2163 /// that may hide undef.
2165  unsigned Depth) {
2166  if (isa<Constant>(V))
2167  return !isa<UndefValue>(V);
2168 
2169  if (Depth >= 6)
2170  return false;
2171 
2172  // Conservatively handle non-constant non-instructions. For example, Arguments
2173  // may be undef.
2174  Instruction *I = dyn_cast<Instruction>(V);
2175  if (!I)
2176  return false;
2177 
2178  // Load and return values may be undef.
2179  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2180  return false;
2181 
2182  // Optimistically handle other instructions.
2183  for (Value *Op : I->operands()) {
2184  if (!Visited.insert(Op).second)
2185  continue;
2186  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2187  return false;
2188  }
2189  return true;
2190 }
2191 
2192 /// Return true if the given value is concrete. We must prove that undef can
2193 /// never reach it.
2194 ///
2195 /// TODO: If we decide that this is a good approach to checking for undef, we
2196 /// may factor it into a common location.
2197 static bool hasConcreteDef(Value *V) {
2198  SmallPtrSet<Value*, 8> Visited;
2199  Visited.insert(V);
2200  return hasConcreteDefImpl(V, Visited, 0);
2201 }
2202 
2203 /// Return true if this IV has any uses other than the (soon to be rewritten)
2204 /// loop exit test.
2205 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2206  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2207  Value *IncV = Phi->getIncomingValue(LatchIdx);
2208 
2209  for (User *U : Phi->users())
2210  if (U != Cond && U != IncV) return false;
2211 
2212  for (User *U : IncV->users())
2213  if (U != Cond && U != Phi) return false;
2214  return true;
2215 }
2216 
2217 /// Return true if the given phi is a "counter" in L. A counter is an
2218 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2219 /// step of 1. Note that L must have exactly one latch.
2220 static bool isLoopCounter(PHINode* Phi, Loop *L,
2221  ScalarEvolution *SE) {
2222  assert(Phi->getParent() == L->getHeader());
2223  assert(L->getLoopLatch());
2224 
2225  if (!SE->isSCEVable(Phi->getType()))
2226  return false;
2227 
2228  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2229  if (!AR || AR->getLoop() != L || !AR->isAffine())
2230  return false;
2231 
2232  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2233  if (!Step || !Step->isOne())
2234  return false;
2235 
2236  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2237  Value *IncV = Phi->getIncomingValue(LatchIdx);
2238  return (getLoopPhiForCounter(IncV, L) == Phi);
2239 }
2240 
2241 /// Search the loop header for a loop counter (anadd rec w/step of one)
2242 /// suitable for use by LFTR. If multiple counters are available, select the
2243 /// "best" one based profitable heuristics.
2244 ///
2245 /// BECount may be an i8* pointer type. The pointer difference is already
2246 /// valid count without scaling the address stride, so it remains a pointer
2247 /// expression as far as SCEV is concerned.
2248 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2249  const SCEV *BECount,
2250  ScalarEvolution *SE, DominatorTree *DT) {
2251  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2252 
2253  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2254 
2255  // Loop over all of the PHI nodes, looking for a simple counter.
2256  PHINode *BestPhi = nullptr;
2257  const SCEV *BestInit = nullptr;
2258  BasicBlock *LatchBlock = L->getLoopLatch();
2259  assert(LatchBlock && "Must be in simplified form");
2260  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2261 
2262  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2263  PHINode *Phi = cast<PHINode>(I);
2264  if (!isLoopCounter(Phi, L, SE))
2265  continue;
2266 
2267  // Avoid comparing an integer IV against a pointer Limit.
2268  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2269  continue;
2270 
2271  const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2272 
2273  // AR may be a pointer type, while BECount is an integer type.
2274  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2275  // AR may not be a narrower type, or we may never exit.
2276  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2277  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2278  continue;
2279 
2280  // Avoid reusing a potentially undef value to compute other values that may
2281  // have originally had a concrete definition.
2282  if (!hasConcreteDef(Phi)) {
2283  // We explicitly allow unknown phis as long as they are already used by
2284  // the loop exit test. This is legal since performing LFTR could not
2285  // increase the number of undef users.
2286  Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2287  if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2288  !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2289  continue;
2290  }
2291 
2292  // Avoid introducing undefined behavior due to poison which didn't exist in
2293  // the original program. (Annoyingly, the rules for poison and undef
2294  // propagation are distinct, so this does NOT cover the undef case above.)
2295  // We have to ensure that we don't introduce UB by introducing a use on an
2296  // iteration where said IV produces poison. Our strategy here differs for
2297  // pointers and integer IVs. For integers, we strip and reinfer as needed,
2298  // see code in linearFunctionTestReplace. For pointers, we restrict
2299  // transforms as there is no good way to reinfer inbounds once lost.
2300  if (!Phi->getType()->isIntegerTy() &&
2301  !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2302  continue;
2303 
2304  const SCEV *Init = AR->getStart();
2305 
2306  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2307  // Don't force a live loop counter if another IV can be used.
2308  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2309  continue;
2310 
2311  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2312  // also prefers integer to pointer IVs.
2313  if (BestInit->isZero() != Init->isZero()) {
2314  if (BestInit->isZero())
2315  continue;
2316  }
2317  // If two IVs both count from zero or both count from nonzero then the
2318  // narrower is likely a dead phi that has been widened. Use the wider phi
2319  // to allow the other to be eliminated.
2320  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2321  continue;
2322  }
2323  BestPhi = Phi;
2324  BestInit = Init;
2325  }
2326  return BestPhi;
2327 }
2328 
2329 /// Insert an IR expression which computes the value held by the IV IndVar
2330 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2331 /// is taken ExitCount times.
2332 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2333  const SCEV *ExitCount, bool UsePostInc, Loop *L,
2334  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2335  assert(isLoopCounter(IndVar, L, SE));
2336  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2337  const SCEV *IVInit = AR->getStart();
2338 
2339  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2340  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2341  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2342  // the existing GEPs whenever possible.
2343  if (IndVar->getType()->isPointerTy() &&
2344  !ExitCount->getType()->isPointerTy()) {
2345  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2346  // signed value. ExitCount on the other hand represents the loop trip count,
2347  // which is an unsigned value. FindLoopCounter only allows induction
2348  // variables that have a positive unit stride of one. This means we don't
2349  // have to handle the case of negative offsets (yet) and just need to zero
2350  // extend ExitCount.
2351  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2352  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2353  if (UsePostInc)
2354  IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2355 
2356  // Expand the code for the iteration count.
2357  assert(SE->isLoopInvariant(IVOffset, L) &&
2358  "Computed iteration count is not loop invariant!");
2359 
2360  // We could handle pointer IVs other than i8*, but we need to compensate for
2361  // gep index scaling.
2363  cast<PointerType>(IndVar->getType())
2364  ->getElementType())->isOne() &&
2365  "unit stride pointer IV must be i8*");
2366 
2367  const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2368  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2369  return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2370  } else {
2371  // In any other case, convert both IVInit and ExitCount to integers before
2372  // comparing. This may result in SCEV expansion of pointers, but in practice
2373  // SCEV will fold the pointer arithmetic away as such:
2374  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2375  //
2376  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2377  // for simple memset-style loops.
2378  //
2379  // IVInit integer and ExitCount pointer would only occur if a canonical IV
2380  // were generated on top of case #2, which is not expected.
2381 
2382  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2383  // For unit stride, IVCount = Start + ExitCount with 2's complement
2384  // overflow.
2385 
2386  // For integer IVs, truncate the IV before computing IVInit + BECount,
2387  // unless we know apriori that the limit must be a constant when evaluated
2388  // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
2389  // of the IV in the loop over a (potentially) expensive expansion of the
2390  // widened exit count add(zext(add)) expression.
2391  if (SE->getTypeSizeInBits(IVInit->getType())
2392  > SE->getTypeSizeInBits(ExitCount->getType())) {
2393  if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2394  ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2395  else
2396  IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2397  }
2398 
2399  const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2400 
2401  if (UsePostInc)
2402  IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2403 
2404  // Expand the code for the iteration count.
2405  assert(SE->isLoopInvariant(IVLimit, L) &&
2406  "Computed iteration count is not loop invariant!");
2407  // Ensure that we generate the same type as IndVar, or a smaller integer
2408  // type. In the presence of null pointer values, we have an integer type
2409  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2410  Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2411  IndVar->getType() : ExitCount->getType();
2412  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2413  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2414  }
2415 }
2416 
2417 /// This method rewrites the exit condition of the loop to be a canonical !=
2418 /// comparison against the incremented loop induction variable. This pass is
2419 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2420 /// determine a loop-invariant trip count of the loop, which is actually a much
2421 /// broader range than just linear tests.
2422 bool IndVarSimplify::
2423 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2424  const SCEV *ExitCount,
2425  PHINode *IndVar, SCEVExpander &Rewriter) {
2426  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2427  assert(isLoopCounter(IndVar, L, SE));
2428  Instruction * const IncVar =
2429  cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2430 
2431  // Initialize CmpIndVar to the preincremented IV.
2432  Value *CmpIndVar = IndVar;
2433  bool UsePostInc = false;
2434 
2435  // If the exiting block is the same as the backedge block, we prefer to
2436  // compare against the post-incremented value, otherwise we must compare
2437  // against the preincremented value.
2438  if (ExitingBB == L->getLoopLatch()) {
2439  // For pointer IVs, we chose to not strip inbounds which requires us not
2440  // to add a potentially UB introducing use. We need to either a) show
2441  // the loop test we're modifying is already in post-inc form, or b) show
2442  // that adding a use must not introduce UB.
2443  bool SafeToPostInc =
2444  IndVar->getType()->isIntegerTy() ||
2445  isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2446  mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2447  if (SafeToPostInc) {
2448  UsePostInc = true;
2449  CmpIndVar = IncVar;
2450  }
2451  }
2452 
2453  // It may be necessary to drop nowrap flags on the incrementing instruction
2454  // if either LFTR moves from a pre-inc check to a post-inc check (in which
2455  // case the increment might have previously been poison on the last iteration
2456  // only) or if LFTR switches to a different IV that was previously dynamically
2457  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2458  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2459  // check), because the pre-inc addrec flags may be adopted from the original
2460  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2461  // TODO: This handling is inaccurate for one case: If we switch to a
2462  // dynamically dead IV that wraps on the first loop iteration only, which is
2463  // not covered by the post-inc addrec. (If the new IV was not dynamically
2464  // dead, it could not be poison on the first iteration in the first place.)
2465  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2466  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2467  if (BO->hasNoUnsignedWrap())
2468  BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2469  if (BO->hasNoSignedWrap())
2470  BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2471  }
2472 
2473  Value *ExitCnt = genLoopLimit(
2474  IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2475  assert(ExitCnt->getType()->isPointerTy() ==
2476  IndVar->getType()->isPointerTy() &&
2477  "genLoopLimit missed a cast");
2478 
2479  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2480  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2482  if (L->contains(BI->getSuccessor(0)))
2483  P = ICmpInst::ICMP_NE;
2484  else
2485  P = ICmpInst::ICMP_EQ;
2486 
2487  IRBuilder<> Builder(BI);
2488 
2489  // The new loop exit condition should reuse the debug location of the
2490  // original loop exit condition.
2491  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2492  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2493 
2494  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2495  // avoid the expensive expansion of the limit expression in the wider type,
2496  // emit a truncate to narrow the IV to the ExitCount type. This is safe
2497  // since we know (from the exit count bitwidth), that we can't self-wrap in
2498  // the narrower type.
2499  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2500  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2501  if (CmpIndVarSize > ExitCntSize) {
2502  assert(!CmpIndVar->getType()->isPointerTy() &&
2503  !ExitCnt->getType()->isPointerTy());
2504 
2505  // Before resorting to actually inserting the truncate, use the same
2506  // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2507  // the other side of the comparison instead. We still evaluate the limit
2508  // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2509  // a truncate within in.
2510  bool Extended = false;
2511  const SCEV *IV = SE->getSCEV(CmpIndVar);
2512  const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2513  ExitCnt->getType());
2514  const SCEV *ZExtTrunc =
2515  SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2516 
2517  if (ZExtTrunc == IV) {
2518  Extended = true;
2519  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2520  "wide.trip.count");
2521  } else {
2522  const SCEV *SExtTrunc =
2523  SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2524  if (SExtTrunc == IV) {
2525  Extended = true;
2526  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2527  "wide.trip.count");
2528  }
2529  }
2530 
2531  if (Extended) {
2532  bool Discard;
2533  L->makeLoopInvariant(ExitCnt, Discard);
2534  } else
2535  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2536  "lftr.wideiv");
2537  }
2538  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2539  << " LHS:" << *CmpIndVar << '\n'
2540  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2541  << "\n"
2542  << " RHS:\t" << *ExitCnt << "\n"
2543  << "ExitCount:\t" << *ExitCount << "\n"
2544  << " was: " << *BI->getCondition() << "\n");
2545 
2546  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2547  Value *OrigCond = BI->getCondition();
2548  // It's tempting to use replaceAllUsesWith here to fully replace the old
2549  // comparison, but that's not immediately safe, since users of the old
2550  // comparison may not be dominated by the new comparison. Instead, just
2551  // update the branch to use the new comparison; in the common case this
2552  // will make old comparison dead.
2553  BI->setCondition(Cond);
2554  DeadInsts.push_back(OrigCond);
2555 
2556  ++NumLFTR;
2557  return true;
2558 }
2559 
2560 //===----------------------------------------------------------------------===//
2561 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2562 //===----------------------------------------------------------------------===//
2563 
2564 /// If there's a single exit block, sink any loop-invariant values that
2565 /// were defined in the preheader but not used inside the loop into the
2566 /// exit block to reduce register pressure in the loop.
2567 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2568  BasicBlock *ExitBlock = L->getExitBlock();
2569  if (!ExitBlock) return false;
2570 
2571  BasicBlock *Preheader = L->getLoopPreheader();
2572  if (!Preheader) return false;
2573 
2574  bool MadeAnyChanges = false;
2575  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2576  BasicBlock::iterator I(Preheader->getTerminator());
2577  while (I != Preheader->begin()) {
2578  --I;
2579  // New instructions were inserted at the end of the preheader.
2580  if (isa<PHINode>(I))
2581  break;
2582 
2583  // Don't move instructions which might have side effects, since the side
2584  // effects need to complete before instructions inside the loop. Also don't
2585  // move instructions which might read memory, since the loop may modify
2586  // memory. Note that it's okay if the instruction might have undefined
2587  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2588  // block.
2589  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2590  continue;
2591 
2592  // Skip debug info intrinsics.
2593  if (isa<DbgInfoIntrinsic>(I))
2594  continue;
2595 
2596  // Skip eh pad instructions.
2597  if (I->isEHPad())
2598  continue;
2599 
2600  // Don't sink alloca: we never want to sink static alloca's out of the
2601  // entry block, and correctly sinking dynamic alloca's requires
2602  // checks for stacksave/stackrestore intrinsics.
2603  // FIXME: Refactor this check somehow?
2604  if (isa<AllocaInst>(I))
2605  continue;
2606 
2607  // Determine if there is a use in or before the loop (direct or
2608  // otherwise).
2609  bool UsedInLoop = false;
2610  for (Use &U : I->uses()) {
2611  Instruction *User = cast<Instruction>(U.getUser());
2612  BasicBlock *UseBB = User->getParent();
2613  if (PHINode *P = dyn_cast<PHINode>(User)) {
2614  unsigned i =
2615  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2616  UseBB = P->getIncomingBlock(i);
2617  }
2618  if (UseBB == Preheader || L->contains(UseBB)) {
2619  UsedInLoop = true;
2620  break;
2621  }
2622  }
2623 
2624  // If there is, the def must remain in the preheader.
2625  if (UsedInLoop)
2626  continue;
2627 
2628  // Otherwise, sink it to the exit block.
2629  Instruction *ToMove = &*I;
2630  bool Done = false;
2631 
2632  if (I != Preheader->begin()) {
2633  // Skip debug info intrinsics.
2634  do {
2635  --I;
2636  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2637 
2638  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2639  Done = true;
2640  } else {
2641  Done = true;
2642  }
2643 
2644  MadeAnyChanges = true;
2645  ToMove->moveBefore(*ExitBlock, InsertPt);
2646  if (Done) break;
2647  InsertPt = ToMove->getIterator();
2648  }
2649 
2650  return MadeAnyChanges;
2651 }
2652 
2653 /// Return a symbolic upper bound for the backedge taken count of the loop.
2654 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2655 /// an arbitrary expression as opposed to only constants.
2656 /// TODO: Move into the ScalarEvolution class.
2658  DominatorTree &DT, Loop *L) {
2659  SmallVector<BasicBlock*, 16> ExitingBlocks;
2660  L->getExitingBlocks(ExitingBlocks);
2661 
2662  // Form an expression for the maximum exit count possible for this loop. We
2663  // merge the max and exact information to approximate a version of
2664  // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2665  SmallVector<const SCEV*, 4> ExitCounts;
2666  const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L);
2667  if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2668  ExitCounts.push_back(MaxConstEC);
2669  for (BasicBlock *ExitingBB : ExitingBlocks) {
2670  const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2671  if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2672  assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2673  "We should only have known counts for exiting blocks that "
2674  "dominate latch!");
2675  ExitCounts.push_back(ExitCount);
2676  }
2677  }
2678  if (ExitCounts.empty())
2679  return SE.getCouldNotCompute();
2680  return SE.getUMinFromMismatchedTypes(ExitCounts);
2681 }
2682 
2683 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2684  SmallVector<BasicBlock*, 16> ExitingBlocks;
2685  L->getExitingBlocks(ExitingBlocks);
2686 
2687  // Remove all exits which aren't both rewriteable and analyzeable.
2688  auto NewEnd = llvm::remove_if(ExitingBlocks,
2689  [&](BasicBlock *ExitingBB) {
2690  // If our exitting block exits multiple loops, we can only rewrite the
2691  // innermost one. Otherwise, we're changing how many times the innermost
2692  // loop runs before it exits.
2693  if (LI->getLoopFor(ExitingBB) != L)
2694  return true;
2695 
2696  // Can't rewrite non-branch yet.
2697  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2698  if (!BI)
2699  return true;
2700 
2701  // If already constant, nothing to do.
2702  if (isa<Constant>(BI->getCondition()))
2703  return true;
2704 
2705  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2706  if (isa<SCEVCouldNotCompute>(ExitCount))
2707  return true;
2708  return false;
2709  });
2710  ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2711 
2712  if (ExitingBlocks.empty())
2713  return false;
2714 
2715  // Get a symbolic upper bound on the loop backedge taken count.
2716  const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2717  if (isa<SCEVCouldNotCompute>(MaxExitCount))
2718  return false;
2719 
2720  // Visit our exit blocks in order of dominance. We know from the fact that
2721  // all exits (left) are analyzeable that the must be a total dominance order
2722  // between them as each must dominate the latch. The visit order only
2723  // matters for the provably equal case.
2724  llvm::sort(ExitingBlocks,
2725  [&](BasicBlock *A, BasicBlock *B) {
2726  // std::sort sorts in ascending order, so we want the inverse of
2727  // the normal dominance relation.
2728  if (DT->properlyDominates(A, B)) return true;
2729  if (DT->properlyDominates(B, A)) return false;
2730  llvm_unreachable("expected total dominance order!");
2731  });
2732 #ifdef ASSERT
2733  for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2734  assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2735  }
2736 #endif
2737 
2738  auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2739  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2740  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2741  auto *OldCond = BI->getCondition();
2742  auto *NewCond = ConstantInt::get(OldCond->getType(),
2743  IsTaken ? ExitIfTrue : !ExitIfTrue);
2744  BI->setCondition(NewCond);
2745  if (OldCond->use_empty())
2746  DeadInsts.push_back(OldCond);
2747  };
2748 
2749  bool Changed = false;
2750  SmallSet<const SCEV*, 8> DominatingExitCounts;
2751  for (BasicBlock *ExitingBB : ExitingBlocks) {
2752  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2753  assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2754 
2755  // If we know we'd exit on the first iteration, rewrite the exit to
2756  // reflect this. This does not imply the loop must exit through this
2757  // exit; there may be an earlier one taken on the first iteration.
2758  // TODO: Given we know the backedge can't be taken, we should go ahead
2759  // and break it. Or at least, kill all the header phis and simplify.
2760  if (ExitCount->isZero()) {
2761  FoldExit(ExitingBB, true);
2762  Changed = true;
2763  continue;
2764  }
2765 
2766  // If we end up with a pointer exit count, bail. Note that we can end up
2767  // with a pointer exit count for one exiting block, and not for another in
2768  // the same loop.
2769  if (!ExitCount->getType()->isIntegerTy() ||
2770  !MaxExitCount->getType()->isIntegerTy())
2771  continue;
2772 
2773  Type *WiderType =
2774  SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2775  ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2776  MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2777  assert(MaxExitCount->getType() == ExitCount->getType());
2778 
2779  // Can we prove that some other exit must be taken strictly before this
2780  // one?
2781  if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2782  MaxExitCount, ExitCount)) {
2783  FoldExit(ExitingBB, false);
2784  Changed = true;
2785  continue;
2786  }
2787 
2788  // As we run, keep track of which exit counts we've encountered. If we
2789  // find a duplicate, we've found an exit which would have exited on the
2790  // exiting iteration, but (from the visit order) strictly follows another
2791  // which does the same and is thus dead.
2792  if (!DominatingExitCounts.insert(ExitCount).second) {
2793  FoldExit(ExitingBB, false);
2794  Changed = true;
2795  continue;
2796  }
2797 
2798  // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2799  // here. If we kept track of the min of dominanting exits so far, we could
2800  // discharge exits with EC >= MDEC. This is less powerful than the existing
2801  // transform (since later exits aren't considered), but potentially more
2802  // powerful for any case where SCEV can prove a >=u b, but neither a == b
2803  // or a >u b. Such a case is not currently known.
2804  }
2805  return Changed;
2806 }
2807 
2808 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2809  SmallVector<BasicBlock*, 16> ExitingBlocks;
2810  L->getExitingBlocks(ExitingBlocks);
2811 
2812  bool Changed = false;
2813 
2814  // Finally, see if we can rewrite our exit conditions into a loop invariant
2815  // form. If we have a read-only loop, and we can tell that we must exit down
2816  // a path which does not need any of the values computed within the loop, we
2817  // can rewrite the loop to exit on the first iteration. Note that this
2818  // doesn't either a) tell us the loop exits on the first iteration (unless
2819  // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2820  // This transformation looks a lot like a restricted form of dead loop
2821  // elimination, but restricted to read-only loops and without neccesssarily
2822  // needing to kill the loop entirely.
2823  if (!LoopPredication)
2824  return Changed;
2825 
2826  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2827  return Changed;
2828 
2829  // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2830  // through *explicit* control flow. We have to eliminate the possibility of
2831  // implicit exits (see below) before we know it's truly exact.
2832  const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2833  if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2834  !SE->isLoopInvariant(ExactBTC, L) ||
2835  !isSafeToExpand(ExactBTC, *SE))
2836  return Changed;
2837 
2838  auto BadExit = [&](BasicBlock *ExitingBB) {
2839  // If our exiting block exits multiple loops, we can only rewrite the
2840  // innermost one. Otherwise, we're changing how many times the innermost
2841  // loop runs before it exits.
2842  if (LI->getLoopFor(ExitingBB) != L)
2843  return true;
2844 
2845  // Can't rewrite non-branch yet.
2846  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2847  if (!BI)
2848  return true;
2849 
2850  // If already constant, nothing to do.
2851  if (isa<Constant>(BI->getCondition()))
2852  return true;
2853 
2854  // If the exit block has phis, we need to be able to compute the values
2855  // within the loop which contains them. This assumes trivially lcssa phis
2856  // have already been removed; TODO: generalize
2857  BasicBlock *ExitBlock =
2858  BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2859  if (!ExitBlock->phis().empty())
2860  return true;
2861 
2862  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2863  assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2864  if (!SE->isLoopInvariant(ExitCount, L) ||
2865  !isSafeToExpand(ExitCount, *SE))
2866  return true;
2867 
2868  return false;
2869  };
2870 
2871  // If we have any exits which can't be predicated themselves, than we can't
2872  // predicate any exit which isn't guaranteed to execute before it. Consider
2873  // two exits (a) and (b) which would both exit on the same iteration. If we
2874  // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2875  // we could convert a loop from exiting through (a) to one exiting through
2876  // (b). Note that this problem exists only for exits with the same exit
2877  // count, and we could be more aggressive when exit counts are known inequal.
2878  llvm::sort(ExitingBlocks,
2879  [&](BasicBlock *A, BasicBlock *B) {
2880  // std::sort sorts in ascending order, so we want the inverse of
2881  // the normal dominance relation, plus a tie breaker for blocks
2882  // unordered by dominance.
2883  if (DT->properlyDominates(A, B)) return true;
2884  if (DT->properlyDominates(B, A)) return false;
2885  return A->getName() < B->getName();
2886  });
2887  // Check to see if our exit blocks are a total order (i.e. a linear chain of
2888  // exits before the backedge). If they aren't, reasoning about reachability
2889  // is complicated and we choose not to for now.
2890  for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2891  if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2892  return Changed;
2893 
2894  // Given our sorted total order, we know that exit[j] must be evaluated
2895  // after all exit[i] such j > i.
2896  for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2897  if (BadExit(ExitingBlocks[i])) {
2898  ExitingBlocks.resize(i);
2899  break;
2900  }
2901 
2902  if (ExitingBlocks.empty())
2903  return Changed;
2904 
2905  // We rely on not being able to reach an exiting block on a later iteration
2906  // then it's statically compute exit count. The implementaton of
2907  // getExitCount currently has this invariant, but assert it here so that
2908  // breakage is obvious if this ever changes..
2909  assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2910  return DT->dominates(ExitingBB, L->getLoopLatch());
2911  }));
2912 
2913  // At this point, ExitingBlocks consists of only those blocks which are
2914  // predicatable. Given that, we know we have at least one exit we can
2915  // predicate if the loop is doesn't have side effects and doesn't have any
2916  // implicit exits (because then our exact BTC isn't actually exact).
2917  // @Reviewers - As structured, this is O(I^2) for loop nests. Any
2918  // suggestions on how to improve this? I can obviously bail out for outer
2919  // loops, but that seems less than ideal. MemorySSA can find memory writes,
2920  // is that enough for *all* side effects?
2921  for (BasicBlock *BB : L->blocks())
2922  for (auto &I : *BB)
2923  // TODO:isGuaranteedToTransfer
2924  if (I.mayHaveSideEffects() || I.mayThrow())
2925  return Changed;
2926 
2927  // Finally, do the actual predication for all predicatable blocks. A couple
2928  // of notes here:
2929  // 1) We don't bother to constant fold dominated exits with identical exit
2930  // counts; that's simply a form of CSE/equality propagation and we leave
2931  // it for dedicated passes.
2932  // 2) We insert the comparison at the branch. Hoisting introduces additional
2933  // legality constraints and we leave that to dedicated logic. We want to
2934  // predicate even if we can't insert a loop invariant expression as
2935  // peeling or unrolling will likely reduce the cost of the otherwise loop
2936  // varying check.
2937  Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2939  Value *ExactBTCV = nullptr; //lazy generated if needed
2940  for (BasicBlock *ExitingBB : ExitingBlocks) {
2941  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2942 
2943  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2944  Value *NewCond;
2945  if (ExitCount == ExactBTC) {
2946  NewCond = L->contains(BI->getSuccessor(0)) ?
2947  B.getFalse() : B.getTrue();
2948  } else {
2949  Value *ECV = Rewriter.expandCodeFor(ExitCount);
2950  if (!ExactBTCV)
2951  ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2952  Value *RHS = ExactBTCV;
2953  if (ECV->getType() != RHS->getType()) {
2954  Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2955  ECV = B.CreateZExt(ECV, WiderTy);
2956  RHS = B.CreateZExt(RHS, WiderTy);
2957  }
2958  auto Pred = L->contains(BI->getSuccessor(0)) ?
2960  NewCond = B.CreateICmp(Pred, ECV, RHS);
2961  }
2962  Value *OldCond = BI->getCondition();
2963  BI->setCondition(NewCond);
2964  if (OldCond->use_empty())
2965  DeadInsts.push_back(OldCond);
2966  Changed = true;
2967  }
2968 
2969  return Changed;
2970 }
2971 
2972 //===----------------------------------------------------------------------===//
2973 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2974 //===----------------------------------------------------------------------===//
2975 
2976 bool IndVarSimplify::run(Loop *L) {
2977  // We need (and expect!) the incoming loop to be in LCSSA.
2978  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2979  "LCSSA required to run indvars!");
2980  bool Changed = false;
2981 
2982  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2983  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2984  // canonicalization can be a pessimization without LSR to "clean up"
2985  // afterwards.
2986  // - We depend on having a preheader; in particular,
2987  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2988  // and we're in trouble if we can't find the induction variable even when
2989  // we've manually inserted one.
2990  // - LFTR relies on having a single backedge.
2991  if (!L->isLoopSimplifyForm())
2992  return false;
2993 
2994  // If there are any floating-point recurrences, attempt to
2995  // transform them to use integer recurrences.
2996  Changed |= rewriteNonIntegerIVs(L);
2997 
2998 #ifndef NDEBUG
2999  // Used below for a consistency check only
3000  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3001 #endif
3002 
3003  // Create a rewriter object which we'll use to transform the code with.
3004  SCEVExpander Rewriter(*SE, DL, "indvars");
3005 #ifndef NDEBUG
3006  Rewriter.setDebugType(DEBUG_TYPE);
3007 #endif
3008 
3009  // Eliminate redundant IV users.
3010  //
3011  // Simplification works best when run before other consumers of SCEV. We
3012  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3013  // other expressions involving loop IVs have been evaluated. This helps SCEV
3014  // set no-wrap flags before normalizing sign/zero extension.
3015  Rewriter.disableCanonicalMode();
3016  Changed |= simplifyAndExtend(L, Rewriter, LI);
3017 
3018  // Check to see if we can compute the final value of any expressions
3019  // that are recurrent in the loop, and substitute the exit values from the
3020  // loop into any instructions outside of the loop that use the final values
3021  // of the current expressions.
3022  if (ReplaceExitValue != NeverRepl)
3023  Changed |= rewriteLoopExitValues(L, Rewriter);
3024 
3025  // Eliminate redundant IV cycles.
3026  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3027 
3028  // Try to eliminate loop exits based on analyzeable exit counts
3029  Changed |= optimizeLoopExits(L, Rewriter);
3030 
3031  // Try to form loop invariant tests for loop exits by changing how many
3032  // iterations of the loop run when that is unobservable.
3033  Changed |= predicateLoopExits(L, Rewriter);
3034 
3035  // If we have a trip count expression, rewrite the loop's exit condition
3036  // using it.
3037  if (!DisableLFTR) {
3038  SmallVector<BasicBlock*, 16> ExitingBlocks;
3039  L->getExitingBlocks(ExitingBlocks);
3040  for (BasicBlock *ExitingBB : ExitingBlocks) {
3041  // Can't rewrite non-branch yet.
3042  if (!isa<BranchInst>(ExitingBB->getTerminator()))
3043  continue;
3044 
3045  // If our exitting block exits multiple loops, we can only rewrite the
3046  // innermost one. Otherwise, we're changing how many times the innermost
3047  // loop runs before it exits.
3048  if (LI->getLoopFor(ExitingBB) != L)
3049  continue;
3050 
3051  if (!needsLFTR(L, ExitingBB))
3052  continue;
3053 
3054  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3055  if (isa<SCEVCouldNotCompute>(ExitCount))
3056  continue;
3057 
3058  // This was handled above, but as we form SCEVs, we can sometimes refine
3059  // existing ones; this allows exit counts to be folded to zero which
3060  // weren't when optimizeLoopExits saw them. Arguably, we should iterate
3061  // until stable to handle cases like this better.
3062  if (ExitCount->isZero())
3063  continue;
3064 
3065  PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3066  if (!IndVar)
3067  continue;
3068 
3069  // Avoid high cost expansions. Note: This heuristic is questionable in
3070  // that our definition of "high cost" is not exactly principled.
3071  if (Rewriter.isHighCostExpansion(ExitCount, L))
3072  continue;
3073 
3074  // Check preconditions for proper SCEVExpander operation. SCEV does not
3075  // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3076  // any pass that uses the SCEVExpander must do it. This does not work
3077  // well for loop passes because SCEVExpander makes assumptions about
3078  // all loops, while LoopPassManager only forces the current loop to be
3079  // simplified.
3080  //
3081  // FIXME: SCEV expansion has no way to bail out, so the caller must
3082  // explicitly check any assumptions made by SCEV. Brittle.
3083  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3084  if (!AR || AR->getLoop()->getLoopPreheader())
3085  Changed |= linearFunctionTestReplace(L, ExitingBB,
3086  ExitCount, IndVar,
3087  Rewriter);
3088  }
3089  }
3090  // Clear the rewriter cache, because values that are in the rewriter's cache
3091  // can be deleted in the loop below, causing the AssertingVH in the cache to
3092  // trigger.
3093  Rewriter.clear();
3094 
3095  // Now that we're done iterating through lists, clean up any instructions
3096  // which are now dead.
3097  while (!DeadInsts.empty())
3098  if (Instruction *Inst =
3099  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3100  Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3101 
3102  // The Rewriter may not be used from this point on.
3103 
3104  // Loop-invariant instructions in the preheader that aren't used in the
3105  // loop may be sunk below the loop to reduce register pressure.
3106  Changed |= sinkUnusedInvariants(L);
3107 
3108  // rewriteFirstIterationLoopExitValues does not rely on the computation of
3109  // trip count and therefore can further simplify exit values in addition to
3110  // rewriteLoopExitValues.
3111  Changed |= rewriteFirstIterationLoopExitValues(L);
3112 
3113  // Clean up dead instructions.
3114  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3115 
3116  // Check a post-condition.
3117  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3118  "Indvars did not preserve LCSSA!");
3119 
3120  // Verify that LFTR, and any other change have not interfered with SCEV's
3121  // ability to compute trip count.
3122 #ifndef NDEBUG
3123  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3124  SE->forgetLoop(L);
3125  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3126  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3127  SE->getTypeSizeInBits(NewBECount->getType()))
3128  NewBECount = SE->getTruncateOrNoop(NewBECount,
3129  BackedgeTakenCount->getType());
3130  else
3131  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3132  NewBECount->getType());
3133  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
3134  }
3135 #endif
3136 
3137  return Changed;
3138 }
3139 
3142  LPMUpdater &) {
3143  Function *F = L.getHeader()->getParent();
3144  const DataLayout &DL = F->getParent()->getDataLayout();
3145 
3146  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3147  if (!IVS.run(&L))
3148  return PreservedAnalyses::all();
3149 
3150  auto PA = getLoopPassPreservedAnalyses();
3151  PA.preserveSet<CFGAnalyses>();
3152  return PA;
3153 }
3154 
3155 namespace {
3156 
3157 struct IndVarSimplifyLegacyPass : public LoopPass {
3158  static char ID; // Pass identification, replacement for typeid
3159 
3160  IndVarSimplifyLegacyPass() : LoopPass(ID) {
3162  }
3163 
3164  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3165  if (skipLoop(L))
3166  return false;
3167 
3168  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3169  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3170  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3171  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3172  auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3173  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3174  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3175  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3176 
3177  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3178  return IVS.run(L);
3179  }
3180 
3181  void getAnalysisUsage(AnalysisUsage &AU) const override {
3182  AU.setPreservesCFG();
3184  }
3185 };
3186 
3187 } // end anonymous namespace
3188 
3190 
3191 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3192  "Induction Variable Simplification", false, false)
3194 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3195  "Induction Variable Simplification", false, false)
3196 
3198  return new IndVarSimplifyLegacyPass();
3199 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false), cl::desc("Predicate conditions in read only loops"))
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2211
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
iterator_range< use_iterator > uses()
Definition: Value.h:375
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:209
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:508
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:455
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:448
This class represents zero extension of integer types.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
unsigned less than
Definition: InstrTypes.h:757
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1881
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
This class represents a sign extension of integer types.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
Hexagon Common GEP
Value * getCondition() const
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:67
bool propagatesFullPoison(const Instruction *I)
Return true if this function can prove that I is guaranteed to yield full-poison (all bits poison) if...
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1084
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:140
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isSigned() const
Definition: InstrTypes.h:902
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:640
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:368
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:105
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Key
PAL metadata keys.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:692
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:181
This node represents a polynomial recurrence on the trip count of the specified loop.
ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:156
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:1821
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1877
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
const SCEV * getCouldNotCompute()
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:194
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:223
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
void setInsertPoint(Instruction *IP)
Set the current insertion point.
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:652
Value * getIncomingValueForBlock(const BasicBlock *BB) const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:285
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:582
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool mayThrow() const
Return true if this instruction may throw an exception.
bool mustTriggerUB(const Instruction *I, const SmallSet< const Value *, 16 > &KnownPoison)
Return true if the given instruction must trigger undefined behavior.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:587
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:66
constexpr double e
Definition: MathExtras.h:57
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:237
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:40
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1205
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:440
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:75
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
static unsigned getIncomingValueNumForOperand(unsigned i)
size_t size() const
Definition: SmallVector.h:52
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1873
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
signed greater than
Definition: InstrTypes.h:759
#define DEBUG_TYPE
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
const APFloat & getValueAPF() const
Definition: Constants.h:302
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:345
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:254
signed less than
Definition: InstrTypes.h:761
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widened.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:937
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
signed less or equal
Definition: InstrTypes.h:762
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:420
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:95
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:115
static const SCEV * getMaxBackedgeTakenCount(ScalarEvolution &SE, DominatorTree &DT, Loop *L)
Return a symbolic upper bound for the backedge taken count of the loop.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:627
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
Virtual Register Rewriter
Definition: VirtRegMap.cpp:220
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:807
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:464
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:180
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:389
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:138
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:329
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:744
void setCondition(Value *V)
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:396
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:735
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:74
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:745
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:593
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:115
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:433
A container for analyses that lazily runs them and caches their results.
const SCEV * getExitCount(const Loop *L, BasicBlock *ExitingBlock)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:737
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool use_empty() const
Definition: Value.h:343
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:760
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
void resize(size_type N)
Definition: SmallVector.h:344
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:89
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:71