LLVM  16.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/LoopPass.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/ConstantRange.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/IRBuilder.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/IntrinsicInst.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/Operator.h"
61 #include "llvm/IR/PassManager.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/IR/ValueHandle.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
76 #include "llvm/Transforms/Scalar.h"
82 #include <cassert>
83 #include <cstdint>
84 #include <utility>
85 
86 using namespace llvm;
87 using namespace PatternMatch;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened , "Number of indvars widened");
92 STATISTIC(NumReplaced , "Number of exit values replaced");
93 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
101  "verify-indvars", cl::Hidden,
102  cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
103  "effect in release builds. (Note: this adds additional SCEV "
104  "queries potentially changing the analysis result)"));
105 
107  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109  cl::values(
110  clEnumValN(NeverRepl, "never", "never replace exit value"),
111  clEnumValN(OnlyCheapRepl, "cheap",
112  "only replace exit value when the cost is cheap"),
113  clEnumValN(
114  UnusedIndVarInLoop, "unusedindvarinloop",
115  "only replace exit value when it is an unused "
116  "induction variable in the loop and has cheap replacement cost"),
117  clEnumValN(NoHardUse, "noharduse",
118  "only replace exit values when loop def likely dead"),
119  clEnumValN(AlwaysRepl, "always",
120  "always replace exit value whenever possible")));
121 
123  "indvars-post-increment-ranges", cl::Hidden,
124  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
125  cl::init(true));
126 
127 static cl::opt<bool>
128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
129  cl::desc("Disable Linear Function Test Replace optimization"));
130 
131 static cl::opt<bool>
132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
133  cl::desc("Predicate conditions in read only loops"));
134 
135 static cl::opt<bool>
136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
137  cl::desc("Allow widening of indvars to eliminate s/zext"));
138 
139 namespace {
140 
141 class IndVarSimplify {
142  LoopInfo *LI;
143  ScalarEvolution *SE;
144  DominatorTree *DT;
145  const DataLayout &DL;
146  TargetLibraryInfo *TLI;
147  const TargetTransformInfo *TTI;
148  std::unique_ptr<MemorySSAUpdater> MSSAU;
149 
151  bool WidenIndVars;
152 
153  bool handleFloatingPointIV(Loop *L, PHINode *PH);
154  bool rewriteNonIntegerIVs(Loop *L);
155 
156  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157  /// Try to improve our exit conditions by converting condition from signed
158  /// to unsigned or rotating computation out of the loop.
159  /// (See inline comment about why this is duplicated from simplifyAndExtend)
160  bool canonicalizeExitCondition(Loop *L);
161  /// Try to eliminate loop exits based on analyzeable exit counts
162  bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
163  /// Try to form loop invariant tests for loop exits by changing how many
164  /// iterations of the loop run when that is unobservable.
165  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
166 
167  bool rewriteFirstIterationLoopExitValues(Loop *L);
168 
169  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
170  const SCEV *ExitCount,
171  PHINode *IndVar, SCEVExpander &Rewriter);
172 
173  bool sinkUnusedInvariants(Loop *L);
174 
175 public:
176  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
177  const DataLayout &DL, TargetLibraryInfo *TLI,
178  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
179  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
180  WidenIndVars(WidenIndVars) {
181  if (MSSA)
182  MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
183  }
184 
185  bool run(Loop *L);
186 };
187 
188 } // end anonymous namespace
189 
190 //===----------------------------------------------------------------------===//
191 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
192 //===----------------------------------------------------------------------===//
193 
194 /// Convert APF to an integer, if possible.
195 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
196  bool isExact = false;
197  // See if we can convert this to an int64_t
198  uint64_t UIntVal;
199  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
200  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
201  !isExact)
202  return false;
203  IntVal = UIntVal;
204  return true;
205 }
206 
207 /// If the loop has floating induction variable then insert corresponding
208 /// integer induction variable if possible.
209 /// For example,
210 /// for(double i = 0; i < 10000; ++i)
211 /// bar(i)
212 /// is converted into
213 /// for(int i = 0; i < 10000; ++i)
214 /// bar((double)i);
215 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
216  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
217  unsigned BackEdge = IncomingEdge^1;
218 
219  // Check incoming value.
220  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
221 
222  int64_t InitValue;
223  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
224  return false;
225 
226  // Check IV increment. Reject this PN if increment operation is not
227  // an add or increment value can not be represented by an integer.
228  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
229  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
230 
231  // If this is not an add of the PHI with a constantfp, or if the constant fp
232  // is not an integer, bail out.
233  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
234  int64_t IncValue;
235  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
236  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
237  return false;
238 
239  // Check Incr uses. One user is PN and the other user is an exit condition
240  // used by the conditional terminator.
241  Value::user_iterator IncrUse = Incr->user_begin();
242  Instruction *U1 = cast<Instruction>(*IncrUse++);
243  if (IncrUse == Incr->user_end()) return false;
244  Instruction *U2 = cast<Instruction>(*IncrUse++);
245  if (IncrUse != Incr->user_end()) return false;
246 
247  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
248  // only used by a branch, we can't transform it.
249  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
250  if (!Compare)
251  Compare = dyn_cast<FCmpInst>(U2);
252  if (!Compare || !Compare->hasOneUse() ||
253  !isa<BranchInst>(Compare->user_back()))
254  return false;
255 
256  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
257 
258  // We need to verify that the branch actually controls the iteration count
259  // of the loop. If not, the new IV can overflow and no one will notice.
260  // The branch block must be in the loop and one of the successors must be out
261  // of the loop.
262  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
263  if (!L->contains(TheBr->getParent()) ||
264  (L->contains(TheBr->getSuccessor(0)) &&
265  L->contains(TheBr->getSuccessor(1))))
266  return false;
267 
268  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
269  // transform it.
270  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
271  int64_t ExitValue;
272  if (ExitValueVal == nullptr ||
273  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
274  return false;
275 
276  // Find new predicate for integer comparison.
278  switch (Compare->getPredicate()) {
279  default: return false; // Unknown comparison.
280  case CmpInst::FCMP_OEQ:
281  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
282  case CmpInst::FCMP_ONE:
283  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
284  case CmpInst::FCMP_OGT:
285  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
286  case CmpInst::FCMP_OGE:
287  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
288  case CmpInst::FCMP_OLT:
289  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
290  case CmpInst::FCMP_OLE:
291  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
292  }
293 
294  // We convert the floating point induction variable to a signed i32 value if
295  // we can. This is only safe if the comparison will not overflow in a way
296  // that won't be trapped by the integer equivalent operations. Check for this
297  // now.
298  // TODO: We could use i64 if it is native and the range requires it.
299 
300  // The start/stride/exit values must all fit in signed i32.
301  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
302  return false;
303 
304  // If not actually striding (add x, 0.0), avoid touching the code.
305  if (IncValue == 0)
306  return false;
307 
308  // Positive and negative strides have different safety conditions.
309  if (IncValue > 0) {
310  // If we have a positive stride, we require the init to be less than the
311  // exit value.
312  if (InitValue >= ExitValue)
313  return false;
314 
315  uint32_t Range = uint32_t(ExitValue-InitValue);
316  // Check for infinite loop, either:
317  // while (i <= Exit) or until (i > Exit)
318  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
319  if (++Range == 0) return false; // Range overflows.
320  }
321 
322  unsigned Leftover = Range % uint32_t(IncValue);
323 
324  // If this is an equality comparison, we require that the strided value
325  // exactly land on the exit value, otherwise the IV condition will wrap
326  // around and do things the fp IV wouldn't.
327  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
328  Leftover != 0)
329  return false;
330 
331  // If the stride would wrap around the i32 before exiting, we can't
332  // transform the IV.
333  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
334  return false;
335  } else {
336  // If we have a negative stride, we require the init to be greater than the
337  // exit value.
338  if (InitValue <= ExitValue)
339  return false;
340 
341  uint32_t Range = uint32_t(InitValue-ExitValue);
342  // Check for infinite loop, either:
343  // while (i >= Exit) or until (i < Exit)
344  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
345  if (++Range == 0) return false; // Range overflows.
346  }
347 
348  unsigned Leftover = Range % uint32_t(-IncValue);
349 
350  // If this is an equality comparison, we require that the strided value
351  // exactly land on the exit value, otherwise the IV condition will wrap
352  // around and do things the fp IV wouldn't.
353  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
354  Leftover != 0)
355  return false;
356 
357  // If the stride would wrap around the i32 before exiting, we can't
358  // transform the IV.
359  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
360  return false;
361  }
362 
364 
365  // Insert new integer induction variable.
366  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
367  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
368  PN->getIncomingBlock(IncomingEdge));
369 
370  Value *NewAdd =
372  Incr->getName()+".int", Incr);
373  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
374 
375  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
376  ConstantInt::get(Int32Ty, ExitValue),
377  Compare->getName());
378 
379  // In the following deletions, PN may become dead and may be deleted.
380  // Use a WeakTrackingVH to observe whether this happens.
381  WeakTrackingVH WeakPH = PN;
382 
383  // Delete the old floating point exit comparison. The branch starts using the
384  // new comparison.
385  NewCompare->takeName(Compare);
386  Compare->replaceAllUsesWith(NewCompare);
388 
389  // Delete the old floating point increment.
390  Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
391  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
392 
393  // If the FP induction variable still has uses, this is because something else
394  // in the loop uses its value. In order to canonicalize the induction
395  // variable, we chose to eliminate the IV and rewrite it in terms of an
396  // int->fp cast.
397  //
398  // We give preference to sitofp over uitofp because it is faster on most
399  // platforms.
400  if (WeakPH) {
401  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
402  &*PN->getParent()->getFirstInsertionPt());
403  PN->replaceAllUsesWith(Conv);
404  RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
405  }
406  return true;
407 }
408 
409 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
410  // First step. Check to see if there are any floating-point recurrences.
411  // If there are, change them into integer recurrences, permitting analysis by
412  // the SCEV routines.
413  BasicBlock *Header = L->getHeader();
414 
416  for (PHINode &PN : Header->phis())
417  PHIs.push_back(&PN);
418 
419  bool Changed = false;
420  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
421  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
422  Changed |= handleFloatingPointIV(L, PN);
423 
424  // If the loop previously had floating-point IV, ScalarEvolution
425  // may not have been able to compute a trip count. Now that we've done some
426  // re-writing, the trip count may be computable.
427  if (Changed)
428  SE->forgetLoop(L);
429  return Changed;
430 }
431 
432 //===---------------------------------------------------------------------===//
433 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
434 // they will exit at the first iteration.
435 //===---------------------------------------------------------------------===//
436 
437 /// Check to see if this loop has loop invariant conditions which lead to loop
438 /// exits. If so, we know that if the exit path is taken, it is at the first
439 /// loop iteration. This lets us predict exit values of PHI nodes that live in
440 /// loop header.
441 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
442  // Verify the input to the pass is already in LCSSA form.
443  assert(L->isLCSSAForm(*DT));
444 
445  SmallVector<BasicBlock *, 8> ExitBlocks;
446  L->getUniqueExitBlocks(ExitBlocks);
447 
448  bool MadeAnyChanges = false;
449  for (auto *ExitBB : ExitBlocks) {
450  // If there are no more PHI nodes in this exit block, then no more
451  // values defined inside the loop are used on this path.
452  for (PHINode &PN : ExitBB->phis()) {
453  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
454  IncomingValIdx != E; ++IncomingValIdx) {
455  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
456 
457  // Can we prove that the exit must run on the first iteration if it
458  // runs at all? (i.e. early exits are fine for our purposes, but
459  // traces which lead to this exit being taken on the 2nd iteration
460  // aren't.) Note that this is about whether the exit branch is
461  // executed, not about whether it is taken.
462  if (!L->getLoopLatch() ||
463  !DT->dominates(IncomingBB, L->getLoopLatch()))
464  continue;
465 
466  // Get condition that leads to the exit path.
467  auto *TermInst = IncomingBB->getTerminator();
468 
469  Value *Cond = nullptr;
470  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
471  // Must be a conditional branch, otherwise the block
472  // should not be in the loop.
473  Cond = BI->getCondition();
474  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
475  Cond = SI->getCondition();
476  else
477  continue;
478 
479  if (!L->isLoopInvariant(Cond))
480  continue;
481 
482  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
483 
484  // Only deal with PHIs in the loop header.
485  if (!ExitVal || ExitVal->getParent() != L->getHeader())
486  continue;
487 
488  // If ExitVal is a PHI on the loop header, then we know its
489  // value along this exit because the exit can only be taken
490  // on the first iteration.
491  auto *LoopPreheader = L->getLoopPreheader();
492  assert(LoopPreheader && "Invalid loop");
493  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
494  if (PreheaderIdx != -1) {
495  assert(ExitVal->getParent() == L->getHeader() &&
496  "ExitVal must be in loop header");
497  MadeAnyChanges = true;
498  PN.setIncomingValue(IncomingValIdx,
499  ExitVal->getIncomingValue(PreheaderIdx));
500  SE->forgetValue(&PN);
501  }
502  }
503  }
504  }
505  return MadeAnyChanges;
506 }
507 
508 //===----------------------------------------------------------------------===//
509 // IV Widening - Extend the width of an IV to cover its widest uses.
510 //===----------------------------------------------------------------------===//
511 
512 /// Update information about the induction variable that is extended by this
513 /// sign or zero extend operation. This is used to determine the final width of
514 /// the IV before actually widening it.
515 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
516  ScalarEvolution *SE,
517  const TargetTransformInfo *TTI) {
518  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
519  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
520  return;
521 
522  Type *Ty = Cast->getType();
523  uint64_t Width = SE->getTypeSizeInBits(Ty);
524  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
525  return;
526 
527  // Check that `Cast` actually extends the induction variable (we rely on this
528  // later). This takes care of cases where `Cast` is extending a truncation of
529  // the narrow induction variable, and thus can end up being narrower than the
530  // "narrow" induction variable.
531  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
532  if (NarrowIVWidth >= Width)
533  return;
534 
535  // Cast is either an sext or zext up to this point.
536  // We should not widen an indvar if arithmetics on the wider indvar are more
537  // expensive than those on the narrower indvar. We check only the cost of ADD
538  // because at least an ADD is required to increment the induction variable. We
539  // could compute more comprehensively the cost of all instructions on the
540  // induction variable when necessary.
541  if (TTI &&
544  Cast->getOperand(0)->getType())) {
545  return;
546  }
547 
548  if (!WI.WidestNativeType ||
551  WI.IsSigned = IsSigned;
552  return;
553  }
554 
555  // We extend the IV to satisfy the sign of its user(s), or 'signed'
556  // if there are multiple users with both sign- and zero extensions,
557  // in order not to introduce nondeterministic behaviour based on the
558  // unspecified order of a PHI nodes' users-iterator.
559  WI.IsSigned |= IsSigned;
560 }
561 
562 //===----------------------------------------------------------------------===//
563 // Live IV Reduction - Minimize IVs live across the loop.
564 //===----------------------------------------------------------------------===//
565 
566 //===----------------------------------------------------------------------===//
567 // Simplification of IV users based on SCEV evaluation.
568 //===----------------------------------------------------------------------===//
569 
570 namespace {
571 
572 class IndVarSimplifyVisitor : public IVVisitor {
573  ScalarEvolution *SE;
574  const TargetTransformInfo *TTI;
575  PHINode *IVPhi;
576 
577 public:
578  WideIVInfo WI;
579 
580  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
581  const TargetTransformInfo *TTI,
582  const DominatorTree *DTree)
583  : SE(SCEV), TTI(TTI), IVPhi(IV) {
584  DT = DTree;
585  WI.NarrowIV = IVPhi;
586  }
587 
588  // Implement the interface used by simplifyUsersOfIV.
589  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
590 };
591 
592 } // end anonymous namespace
593 
594 /// Iteratively perform simplification on a worklist of IV users. Each
595 /// successive simplification may push more users which may themselves be
596 /// candidates for simplification.
597 ///
598 /// Sign/Zero extend elimination is interleaved with IV simplification.
599 bool IndVarSimplify::simplifyAndExtend(Loop *L,
601  LoopInfo *LI) {
603 
604  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
605  Intrinsic::getName(Intrinsic::experimental_guard));
606  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
607 
608  SmallVector<PHINode *, 8> LoopPhis;
609  for (PHINode &PN : L->getHeader()->phis())
610  LoopPhis.push_back(&PN);
611 
612  // Each round of simplification iterates through the SimplifyIVUsers worklist
613  // for all current phis, then determines whether any IVs can be
614  // widened. Widening adds new phis to LoopPhis, inducing another round of
615  // simplification on the wide IVs.
616  bool Changed = false;
617  while (!LoopPhis.empty()) {
618  // Evaluate as many IV expressions as possible before widening any IVs. This
619  // forces SCEV to set no-wrap flags before evaluating sign/zero
620  // extension. The first time SCEV attempts to normalize sign/zero extension,
621  // the result becomes final. So for the most predictable results, we delay
622  // evaluation of sign/zero extend evaluation until needed, and avoid running
623  // other SCEV based analysis prior to simplifyAndExtend.
624  do {
625  PHINode *CurrIV = LoopPhis.pop_back_val();
626 
627  // Information about sign/zero extensions of CurrIV.
628  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
629 
630  Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
631  &Visitor);
632 
633  if (Visitor.WI.WidestNativeType) {
634  WideIVs.push_back(Visitor.WI);
635  }
636  } while(!LoopPhis.empty());
637 
638  // Continue if we disallowed widening.
639  if (!WidenIndVars)
640  continue;
641 
642  for (; !WideIVs.empty(); WideIVs.pop_back()) {
643  unsigned ElimExt;
644  unsigned Widened;
645  if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
646  DT, DeadInsts, ElimExt, Widened,
647  HasGuards, UsePostIncrementRanges)) {
648  NumElimExt += ElimExt;
649  NumWidened += Widened;
650  Changed = true;
651  LoopPhis.push_back(WidePhi);
652  }
653  }
654  }
655  return Changed;
656 }
657 
658 //===----------------------------------------------------------------------===//
659 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
660 //===----------------------------------------------------------------------===//
661 
662 /// Given an Value which is hoped to be part of an add recurance in the given
663 /// loop, return the associated Phi node if so. Otherwise, return null. Note
664 /// that this is less general than SCEVs AddRec checking.
666  Instruction *IncI = dyn_cast<Instruction>(IncV);
667  if (!IncI)
668  return nullptr;
669 
670  switch (IncI->getOpcode()) {
671  case Instruction::Add:
672  case Instruction::Sub:
673  break;
674  case Instruction::GetElementPtr:
675  // An IV counter must preserve its type.
676  if (IncI->getNumOperands() == 2)
677  break;
678  [[fallthrough]];
679  default:
680  return nullptr;
681  }
682 
683  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
684  if (Phi && Phi->getParent() == L->getHeader()) {
685  if (L->isLoopInvariant(IncI->getOperand(1)))
686  return Phi;
687  return nullptr;
688  }
689  if (IncI->getOpcode() == Instruction::GetElementPtr)
690  return nullptr;
691 
692  // Allow add/sub to be commuted.
693  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
694  if (Phi && Phi->getParent() == L->getHeader()) {
695  if (L->isLoopInvariant(IncI->getOperand(0)))
696  return Phi;
697  }
698  return nullptr;
699 }
700 
701 /// Whether the current loop exit test is based on this value. Currently this
702 /// is limited to a direct use in the loop condition.
703 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
704  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
705  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
706  // TODO: Allow non-icmp loop test.
707  if (!ICmp)
708  return false;
709 
710  // TODO: Allow indirect use.
711  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
712 }
713 
714 /// linearFunctionTestReplace policy. Return true unless we can show that the
715 /// current exit test is already sufficiently canonical.
716 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
717  assert(L->getLoopLatch() && "Must be in simplified form");
718 
719  // Avoid converting a constant or loop invariant test back to a runtime
720  // test. This is critical for when SCEV's cached ExitCount is less precise
721  // than the current IR (such as after we've proven a particular exit is
722  // actually dead and thus the BE count never reaches our ExitCount.)
723  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
724  if (L->isLoopInvariant(BI->getCondition()))
725  return false;
726 
727  // Do LFTR to simplify the exit condition to an ICMP.
728  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
729  if (!Cond)
730  return true;
731 
732  // Do LFTR to simplify the exit ICMP to EQ/NE
733  ICmpInst::Predicate Pred = Cond->getPredicate();
734  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
735  return true;
736 
737  // Look for a loop invariant RHS
738  Value *LHS = Cond->getOperand(0);
739  Value *RHS = Cond->getOperand(1);
740  if (!L->isLoopInvariant(RHS)) {
741  if (!L->isLoopInvariant(LHS))
742  return true;
743  std::swap(LHS, RHS);
744  }
745  // Look for a simple IV counter LHS
746  PHINode *Phi = dyn_cast<PHINode>(LHS);
747  if (!Phi)
748  Phi = getLoopPhiForCounter(LHS, L);
749 
750  if (!Phi)
751  return true;
752 
753  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
754  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
755  if (Idx < 0)
756  return true;
757 
758  // Do LFTR if the exit condition's IV is *not* a simple counter.
759  Value *IncV = Phi->getIncomingValue(Idx);
760  return Phi != getLoopPhiForCounter(IncV, L);
761 }
762 
763 /// Return true if undefined behavior would provable be executed on the path to
764 /// OnPathTo if Root produced a posion result. Note that this doesn't say
765 /// anything about whether OnPathTo is actually executed or whether Root is
766 /// actually poison. This can be used to assess whether a new use of Root can
767 /// be added at a location which is control equivalent with OnPathTo (such as
768 /// immediately before it) without introducing UB which didn't previously
769 /// exist. Note that a false result conveys no information.
771  Instruction *OnPathTo,
772  DominatorTree *DT) {
773  // Basic approach is to assume Root is poison, propagate poison forward
774  // through all users we can easily track, and then check whether any of those
775  // users are provable UB and must execute before out exiting block might
776  // exit.
777 
778  // The set of all recursive users we've visited (which are assumed to all be
779  // poison because of said visit)
780  SmallSet<const Value *, 16> KnownPoison;
782  Worklist.push_back(Root);
783  while (!Worklist.empty()) {
784  const Instruction *I = Worklist.pop_back_val();
785 
786  // If we know this must trigger UB on a path leading our target.
787  if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
788  return true;
789 
790  // If we can't analyze propagation through this instruction, just skip it
791  // and transitive users. Safe as false is a conservative result.
792  if (!propagatesPoison(cast<Operator>(I)) && I != Root)
793  continue;
794 
795  if (KnownPoison.insert(I).second)
796  for (const User *User : I->users())
797  Worklist.push_back(cast<Instruction>(User));
798  }
799 
800  // Might be non-UB, or might have a path we couldn't prove must execute on
801  // way to exiting bb.
802  return false;
803 }
804 
805 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
806 /// down to checking that all operands are constant and listing instructions
807 /// that may hide undef.
809  unsigned Depth) {
810  if (isa<Constant>(V))
811  return !isa<UndefValue>(V);
812 
813  if (Depth >= 6)
814  return false;
815 
816  // Conservatively handle non-constant non-instructions. For example, Arguments
817  // may be undef.
818  Instruction *I = dyn_cast<Instruction>(V);
819  if (!I)
820  return false;
821 
822  // Load and return values may be undef.
823  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
824  return false;
825 
826  // Optimistically handle other instructions.
827  for (Value *Op : I->operands()) {
828  if (!Visited.insert(Op).second)
829  continue;
830  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
831  return false;
832  }
833  return true;
834 }
835 
836 /// Return true if the given value is concrete. We must prove that undef can
837 /// never reach it.
838 ///
839 /// TODO: If we decide that this is a good approach to checking for undef, we
840 /// may factor it into a common location.
841 static bool hasConcreteDef(Value *V) {
842  SmallPtrSet<Value*, 8> Visited;
843  Visited.insert(V);
844  return hasConcreteDefImpl(V, Visited, 0);
845 }
846 
847 /// Return true if this IV has any uses other than the (soon to be rewritten)
848 /// loop exit test.
849 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
850  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
851  Value *IncV = Phi->getIncomingValue(LatchIdx);
852 
853  for (User *U : Phi->users())
854  if (U != Cond && U != IncV) return false;
855 
856  for (User *U : IncV->users())
857  if (U != Cond && U != Phi) return false;
858  return true;
859 }
860 
861 /// Return true if the given phi is a "counter" in L. A counter is an
862 /// add recurance (of integer or pointer type) with an arbitrary start, and a
863 /// step of 1. Note that L must have exactly one latch.
864 static bool isLoopCounter(PHINode* Phi, Loop *L,
865  ScalarEvolution *SE) {
866  assert(Phi->getParent() == L->getHeader());
867  assert(L->getLoopLatch());
868 
869  if (!SE->isSCEVable(Phi->getType()))
870  return false;
871 
872  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
873  if (!AR || AR->getLoop() != L || !AR->isAffine())
874  return false;
875 
876  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
877  if (!Step || !Step->isOne())
878  return false;
879 
880  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
881  Value *IncV = Phi->getIncomingValue(LatchIdx);
882  return (getLoopPhiForCounter(IncV, L) == Phi &&
883  isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
884 }
885 
886 /// Search the loop header for a loop counter (anadd rec w/step of one)
887 /// suitable for use by LFTR. If multiple counters are available, select the
888 /// "best" one based profitable heuristics.
889 ///
890 /// BECount may be an i8* pointer type. The pointer difference is already
891 /// valid count without scaling the address stride, so it remains a pointer
892 /// expression as far as SCEV is concerned.
893 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
894  const SCEV *BECount,
895  ScalarEvolution *SE, DominatorTree *DT) {
896  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
897 
898  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
899 
900  // Loop over all of the PHI nodes, looking for a simple counter.
901  PHINode *BestPhi = nullptr;
902  const SCEV *BestInit = nullptr;
903  BasicBlock *LatchBlock = L->getLoopLatch();
904  assert(LatchBlock && "Must be in simplified form");
905  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
906 
907  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
908  PHINode *Phi = cast<PHINode>(I);
909  if (!isLoopCounter(Phi, L, SE))
910  continue;
911 
912  // Avoid comparing an integer IV against a pointer Limit.
913  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
914  continue;
915 
916  const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
917 
918  // AR may be a pointer type, while BECount is an integer type.
919  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
920  // AR may not be a narrower type, or we may never exit.
921  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
922  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
923  continue;
924 
925  // Avoid reusing a potentially undef value to compute other values that may
926  // have originally had a concrete definition.
927  if (!hasConcreteDef(Phi)) {
928  // We explicitly allow unknown phis as long as they are already used by
929  // the loop exit test. This is legal since performing LFTR could not
930  // increase the number of undef users.
931  Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
932  if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
933  !isLoopExitTestBasedOn(IncPhi, ExitingBB))
934  continue;
935  }
936 
937  // Avoid introducing undefined behavior due to poison which didn't exist in
938  // the original program. (Annoyingly, the rules for poison and undef
939  // propagation are distinct, so this does NOT cover the undef case above.)
940  // We have to ensure that we don't introduce UB by introducing a use on an
941  // iteration where said IV produces poison. Our strategy here differs for
942  // pointers and integer IVs. For integers, we strip and reinfer as needed,
943  // see code in linearFunctionTestReplace. For pointers, we restrict
944  // transforms as there is no good way to reinfer inbounds once lost.
945  if (!Phi->getType()->isIntegerTy() &&
946  !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
947  continue;
948 
949  const SCEV *Init = AR->getStart();
950 
951  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
952  // Don't force a live loop counter if another IV can be used.
953  if (AlmostDeadIV(Phi, LatchBlock, Cond))
954  continue;
955 
956  // Prefer to count-from-zero. This is a more "canonical" counter form. It
957  // also prefers integer to pointer IVs.
958  if (BestInit->isZero() != Init->isZero()) {
959  if (BestInit->isZero())
960  continue;
961  }
962  // If two IVs both count from zero or both count from nonzero then the
963  // narrower is likely a dead phi that has been widened. Use the wider phi
964  // to allow the other to be eliminated.
965  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
966  continue;
967  }
968  BestPhi = Phi;
969  BestInit = Init;
970  }
971  return BestPhi;
972 }
973 
974 /// Insert an IR expression which computes the value held by the IV IndVar
975 /// (which must be an loop counter w/unit stride) after the backedge of loop L
976 /// is taken ExitCount times.
977 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
978  const SCEV *ExitCount, bool UsePostInc, Loop *L,
980  assert(isLoopCounter(IndVar, L, SE));
981  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
982  const SCEV *IVInit = AR->getStart();
983  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
984 
985  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
986  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
987  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
988  // the existing GEPs whenever possible.
989  if (IndVar->getType()->isPointerTy() &&
990  !ExitCount->getType()->isPointerTy()) {
991  // IVOffset will be the new GEP offset that is interpreted by GEP as a
992  // signed value. ExitCount on the other hand represents the loop trip count,
993  // which is an unsigned value. FindLoopCounter only allows induction
994  // variables that have a positive unit stride of one. This means we don't
995  // have to handle the case of negative offsets (yet) and just need to zero
996  // extend ExitCount.
997  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
998  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
999  if (UsePostInc)
1000  IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1001 
1002  // Expand the code for the iteration count.
1003  assert(SE->isLoopInvariant(IVOffset, L) &&
1004  "Computed iteration count is not loop invariant!");
1005 
1006  const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1007  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1008  return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1009  } else {
1010  // In any other case, convert both IVInit and ExitCount to integers before
1011  // comparing. This may result in SCEV expansion of pointers, but in practice
1012  // SCEV will fold the pointer arithmetic away as such:
1013  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1014  //
1015  // Valid Cases: (1) both integers is most common; (2) both may be pointers
1016  // for simple memset-style loops.
1017  //
1018  // IVInit integer and ExitCount pointer would only occur if a canonical IV
1019  // were generated on top of case #2, which is not expected.
1020 
1021  // For unit stride, IVCount = Start + ExitCount with 2's complement
1022  // overflow.
1023 
1024  // For integer IVs, truncate the IV before computing IVInit + BECount,
1025  // unless we know apriori that the limit must be a constant when evaluated
1026  // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
1027  // of the IV in the loop over a (potentially) expensive expansion of the
1028  // widened exit count add(zext(add)) expression.
1029  if (SE->getTypeSizeInBits(IVInit->getType())
1030  > SE->getTypeSizeInBits(ExitCount->getType())) {
1031  if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1032  ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1033  else
1034  IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1035  }
1036 
1037  const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1038 
1039  if (UsePostInc)
1040  IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1041 
1042  // Expand the code for the iteration count.
1043  assert(SE->isLoopInvariant(IVLimit, L) &&
1044  "Computed iteration count is not loop invariant!");
1045  // Ensure that we generate the same type as IndVar, or a smaller integer
1046  // type. In the presence of null pointer values, we have an integer type
1047  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1048  Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1049  IndVar->getType() : ExitCount->getType();
1050  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1051  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1052  }
1053 }
1054 
1055 /// This method rewrites the exit condition of the loop to be a canonical !=
1056 /// comparison against the incremented loop induction variable. This pass is
1057 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1058 /// determine a loop-invariant trip count of the loop, which is actually a much
1059 /// broader range than just linear tests.
1060 bool IndVarSimplify::
1061 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1062  const SCEV *ExitCount,
1063  PHINode *IndVar, SCEVExpander &Rewriter) {
1064  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1065  assert(isLoopCounter(IndVar, L, SE));
1066  Instruction * const IncVar =
1067  cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1068 
1069  // Initialize CmpIndVar to the preincremented IV.
1070  Value *CmpIndVar = IndVar;
1071  bool UsePostInc = false;
1072 
1073  // If the exiting block is the same as the backedge block, we prefer to
1074  // compare against the post-incremented value, otherwise we must compare
1075  // against the preincremented value.
1076  if (ExitingBB == L->getLoopLatch()) {
1077  // For pointer IVs, we chose to not strip inbounds which requires us not
1078  // to add a potentially UB introducing use. We need to either a) show
1079  // the loop test we're modifying is already in post-inc form, or b) show
1080  // that adding a use must not introduce UB.
1081  bool SafeToPostInc =
1082  IndVar->getType()->isIntegerTy() ||
1083  isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1084  mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1085  if (SafeToPostInc) {
1086  UsePostInc = true;
1087  CmpIndVar = IncVar;
1088  }
1089  }
1090 
1091  // It may be necessary to drop nowrap flags on the incrementing instruction
1092  // if either LFTR moves from a pre-inc check to a post-inc check (in which
1093  // case the increment might have previously been poison on the last iteration
1094  // only) or if LFTR switches to a different IV that was previously dynamically
1095  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1096  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1097  // check), because the pre-inc addrec flags may be adopted from the original
1098  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1099  // TODO: This handling is inaccurate for one case: If we switch to a
1100  // dynamically dead IV that wraps on the first loop iteration only, which is
1101  // not covered by the post-inc addrec. (If the new IV was not dynamically
1102  // dead, it could not be poison on the first iteration in the first place.)
1103  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1104  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1105  if (BO->hasNoUnsignedWrap())
1106  BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1107  if (BO->hasNoSignedWrap())
1108  BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1109  }
1110 
1111  Value *ExitCnt = genLoopLimit(
1112  IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1113  assert(ExitCnt->getType()->isPointerTy() ==
1114  IndVar->getType()->isPointerTy() &&
1115  "genLoopLimit missed a cast");
1116 
1117  // Insert a new icmp_ne or icmp_eq instruction before the branch.
1118  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1120  if (L->contains(BI->getSuccessor(0)))
1121  P = ICmpInst::ICMP_NE;
1122  else
1123  P = ICmpInst::ICMP_EQ;
1124 
1125  IRBuilder<> Builder(BI);
1126 
1127  // The new loop exit condition should reuse the debug location of the
1128  // original loop exit condition.
1129  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1130  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1131 
1132  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1133  // avoid the expensive expansion of the limit expression in the wider type,
1134  // emit a truncate to narrow the IV to the ExitCount type. This is safe
1135  // since we know (from the exit count bitwidth), that we can't self-wrap in
1136  // the narrower type.
1137  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1138  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1139  if (CmpIndVarSize > ExitCntSize) {
1140  assert(!CmpIndVar->getType()->isPointerTy() &&
1141  !ExitCnt->getType()->isPointerTy());
1142 
1143  // Before resorting to actually inserting the truncate, use the same
1144  // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1145  // the other side of the comparison instead. We still evaluate the limit
1146  // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1147  // a truncate within in.
1148  bool Extended = false;
1149  const SCEV *IV = SE->getSCEV(CmpIndVar);
1150  const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1151  ExitCnt->getType());
1152  const SCEV *ZExtTrunc =
1153  SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1154 
1155  if (ZExtTrunc == IV) {
1156  Extended = true;
1157  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1158  "wide.trip.count");
1159  } else {
1160  const SCEV *SExtTrunc =
1161  SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1162  if (SExtTrunc == IV) {
1163  Extended = true;
1164  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1165  "wide.trip.count");
1166  }
1167  }
1168 
1169  if (Extended) {
1170  bool Discard;
1171  L->makeLoopInvariant(ExitCnt, Discard);
1172  } else
1173  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1174  "lftr.wideiv");
1175  }
1176  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1177  << " LHS:" << *CmpIndVar << '\n'
1178  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1179  << "\n"
1180  << " RHS:\t" << *ExitCnt << "\n"
1181  << "ExitCount:\t" << *ExitCount << "\n"
1182  << " was: " << *BI->getCondition() << "\n");
1183 
1184  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1185  Value *OrigCond = BI->getCondition();
1186  // It's tempting to use replaceAllUsesWith here to fully replace the old
1187  // comparison, but that's not immediately safe, since users of the old
1188  // comparison may not be dominated by the new comparison. Instead, just
1189  // update the branch to use the new comparison; in the common case this
1190  // will make old comparison dead.
1191  BI->setCondition(Cond);
1192  DeadInsts.emplace_back(OrigCond);
1193 
1194  ++NumLFTR;
1195  return true;
1196 }
1197 
1198 //===----------------------------------------------------------------------===//
1199 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1200 //===----------------------------------------------------------------------===//
1201 
1202 /// If there's a single exit block, sink any loop-invariant values that
1203 /// were defined in the preheader but not used inside the loop into the
1204 /// exit block to reduce register pressure in the loop.
1205 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1206  BasicBlock *ExitBlock = L->getExitBlock();
1207  if (!ExitBlock) return false;
1208 
1209  BasicBlock *Preheader = L->getLoopPreheader();
1210  if (!Preheader) return false;
1211 
1212  bool MadeAnyChanges = false;
1213  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1214  BasicBlock::iterator I(Preheader->getTerminator());
1215  while (I != Preheader->begin()) {
1216  --I;
1217  // New instructions were inserted at the end of the preheader.
1218  if (isa<PHINode>(I))
1219  break;
1220 
1221  // Don't move instructions which might have side effects, since the side
1222  // effects need to complete before instructions inside the loop. Also don't
1223  // move instructions which might read memory, since the loop may modify
1224  // memory. Note that it's okay if the instruction might have undefined
1225  // behavior: LoopSimplify guarantees that the preheader dominates the exit
1226  // block.
1227  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1228  continue;
1229 
1230  // Skip debug info intrinsics.
1231  if (isa<DbgInfoIntrinsic>(I))
1232  continue;
1233 
1234  // Skip eh pad instructions.
1235  if (I->isEHPad())
1236  continue;
1237 
1238  // Don't sink alloca: we never want to sink static alloca's out of the
1239  // entry block, and correctly sinking dynamic alloca's requires
1240  // checks for stacksave/stackrestore intrinsics.
1241  // FIXME: Refactor this check somehow?
1242  if (isa<AllocaInst>(I))
1243  continue;
1244 
1245  // Determine if there is a use in or before the loop (direct or
1246  // otherwise).
1247  bool UsedInLoop = false;
1248  for (Use &U : I->uses()) {
1249  Instruction *User = cast<Instruction>(U.getUser());
1250  BasicBlock *UseBB = User->getParent();
1251  if (PHINode *P = dyn_cast<PHINode>(User)) {
1252  unsigned i =
1253  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1254  UseBB = P->getIncomingBlock(i);
1255  }
1256  if (UseBB == Preheader || L->contains(UseBB)) {
1257  UsedInLoop = true;
1258  break;
1259  }
1260  }
1261 
1262  // If there is, the def must remain in the preheader.
1263  if (UsedInLoop)
1264  continue;
1265 
1266  // Otherwise, sink it to the exit block.
1267  Instruction *ToMove = &*I;
1268  bool Done = false;
1269 
1270  if (I != Preheader->begin()) {
1271  // Skip debug info intrinsics.
1272  do {
1273  --I;
1274  } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1275 
1276  if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1277  Done = true;
1278  } else {
1279  Done = true;
1280  }
1281 
1282  MadeAnyChanges = true;
1283  ToMove->moveBefore(*ExitBlock, InsertPt);
1284  SE->forgetValue(ToMove);
1285  if (Done) break;
1286  InsertPt = ToMove->getIterator();
1287  }
1288 
1289  return MadeAnyChanges;
1290 }
1291 
1292 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1293  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1294  auto *OldCond = BI->getCondition();
1295  LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1296  << " with " << *NewCond << "\n");
1297  BI->setCondition(NewCond);
1298  if (OldCond->use_empty())
1299  DeadInsts.emplace_back(OldCond);
1300 }
1301 
1302 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1303  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1304  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1305  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1306  auto *OldCond = BI->getCondition();
1307  auto *NewCond =
1308  ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1309  replaceExitCond(BI, NewCond, DeadInsts);
1310 }
1311 
1313  LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1314  ScalarEvolution &SE) {
1315  assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1316  auto *LoopPreheader = L->getLoopPreheader();
1317  auto *LoopHeader = L->getHeader();
1318  SmallVector<Instruction *> Worklist;
1319  for (auto &PN : LoopHeader->phis()) {
1320  auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1321  for (User *U : PN.users())
1322  Worklist.push_back(cast<Instruction>(U));
1323  SE.forgetValue(&PN);
1324  PN.replaceAllUsesWith(PreheaderIncoming);
1325  DeadInsts.emplace_back(&PN);
1326  }
1327 
1328  // Replacing with the preheader value will often allow IV users to simplify
1329  // (especially if the preheader value is a constant).
1331  while (!Worklist.empty()) {
1332  auto *I = cast<Instruction>(Worklist.pop_back_val());
1333  if (!Visited.insert(I).second)
1334  continue;
1335 
1336  // Don't simplify instructions outside the loop.
1337  if (!L->contains(I))
1338  continue;
1339 
1340  Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1341  if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1342  for (User *U : I->users())
1343  Worklist.push_back(cast<Instruction>(U));
1344  I->replaceAllUsesWith(Res);
1345  DeadInsts.emplace_back(I);
1346  }
1347  }
1348 }
1349 
1351  const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1352  const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1353  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1354  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1355  Rewriter.setInsertPoint(BI);
1356  auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1357  auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1358  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1359  if (ExitIfTrue)
1360  InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1361  IRBuilder<> Builder(BI);
1362  auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1363  BI->getCondition()->getName());
1364  replaceExitCond(BI, NewCond, DeadInsts);
1365 }
1366 
1368  const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1369  const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1371  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1372  ICmpInst::Predicate Pred;
1373  Value *LHS, *RHS;
1374  BasicBlock *TrueSucc, *FalseSucc;
1375  if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1376  m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1377  return false;
1378 
1379  assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1380  "Not a loop exit!");
1381 
1382  // 'LHS pred RHS' should now mean that we stay in loop.
1383  if (L->contains(FalseSucc))
1384  Pred = CmpInst::getInversePredicate(Pred);
1385 
1386  // If we are proving loop exit, invert the predicate.
1387  if (Inverted)
1388  Pred = CmpInst::getInversePredicate(Pred);
1389 
1390  const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1391  const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1392  // Can we prove it to be trivially true?
1393  if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1394  foldExit(L, ExitingBB, Inverted, DeadInsts);
1395  return true;
1396  }
1397  // Further logic works for non-inverted condition only.
1398  if (Inverted)
1399  return false;
1400 
1401  auto *ARTy = LHSS->getType();
1402  auto *MaxIterTy = MaxIter->getType();
1403  // If possible, adjust types.
1404  if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1405  MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1406  else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1407  const SCEV *MinusOne = SE->getMinusOne(ARTy);
1408  auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1409  if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1410  MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1411  }
1412 
1413  if (SkipLastIter) {
1414  const SCEV *One = SE->getOne(MaxIter->getType());
1415  MaxIter = SE->getMinusSCEV(MaxIter, One);
1416  }
1417 
1418  // Check if there is a loop-invariant predicate equivalent to our check.
1419  auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1420  L, BI, MaxIter);
1421  if (!LIP)
1422  return false;
1423 
1424  // Can we prove it to be trivially true?
1425  if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1426  foldExit(L, ExitingBB, Inverted, DeadInsts);
1427  else
1428  replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1429  Rewriter, DeadInsts);
1430 
1431  return true;
1432 }
1433 
1434 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1435  // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1436  // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1437  // never reaches the icmp since the zext doesn't fold to an AddRec unless
1438  // it already has flags. The alternative to this would be to extending the
1439  // set of "interesting" IV users to include the icmp, but doing that
1440  // regresses results in practice by querying SCEVs before trip counts which
1441  // rely on them which results in SCEV caching sub-optimal answers. The
1442  // concern about caching sub-optimal results is why we only query SCEVs of
1443  // the loop invariant RHS here.
1444  SmallVector<BasicBlock*, 16> ExitingBlocks;
1445  L->getExitingBlocks(ExitingBlocks);
1446  bool Changed = false;
1447  for (auto *ExitingBB : ExitingBlocks) {
1448  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1449  if (!BI)
1450  continue;
1451  assert(BI->isConditional() && "exit branch must be conditional");
1452 
1453  auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1454  if (!ICmp || !ICmp->hasOneUse())
1455  continue;
1456 
1457  auto *LHS = ICmp->getOperand(0);
1458  auto *RHS = ICmp->getOperand(1);
1459  // For the range reasoning, avoid computing SCEVs in the loop to avoid
1460  // poisoning cache with sub-optimal results. For the must-execute case,
1461  // this is a neccessary precondition for correctness.
1462  if (!L->isLoopInvariant(RHS)) {
1463  if (!L->isLoopInvariant(LHS))
1464  continue;
1465  // Same logic applies for the inverse case
1466  std::swap(LHS, RHS);
1467  }
1468 
1469  // Match (icmp signed-cond zext, RHS)
1470  Value *LHSOp = nullptr;
1471  if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1472  continue;
1473 
1474  const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1475  const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1476  const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1477  auto FullCR = ConstantRange::getFull(InnerBitWidth);
1478  FullCR = FullCR.zeroExtend(OuterBitWidth);
1479  auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1480  if (FullCR.contains(RHSCR)) {
1481  // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1482  // replace the signed condition with the unsigned version.
1483  ICmp->setPredicate(ICmp->getUnsignedPredicate());
1484  Changed = true;
1485  // Note: No SCEV invalidation needed. We've changed the predicate, but
1486  // have not changed exit counts, or the values produced by the compare.
1487  continue;
1488  }
1489  }
1490 
1491  // Now that we've canonicalized the condition to match the extend,
1492  // see if we can rotate the extend out of the loop.
1493  for (auto *ExitingBB : ExitingBlocks) {
1494  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1495  if (!BI)
1496  continue;
1497  assert(BI->isConditional() && "exit branch must be conditional");
1498 
1499  auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1500  if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1501  continue;
1502 
1503  bool Swapped = false;
1504  auto *LHS = ICmp->getOperand(0);
1505  auto *RHS = ICmp->getOperand(1);
1506  if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1507  // Nothing to rotate
1508  continue;
1509  if (L->isLoopInvariant(LHS)) {
1510  // Same logic applies for the inverse case until we actually pick
1511  // which operand of the compare to update.
1512  Swapped = true;
1513  std::swap(LHS, RHS);
1514  }
1516 
1517  // Match (icmp unsigned-cond zext, RHS)
1518  // TODO: Extend to handle corresponding sext/signed-cmp case
1519  // TODO: Extend to other invertible functions
1520  Value *LHSOp = nullptr;
1521  if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1522  continue;
1523 
1524  // In general, we only rotate if we can do so without increasing the number
1525  // of instructions. The exception is when we have an zext(add-rec). The
1526  // reason for allowing this exception is that we know we need to get rid
1527  // of the zext for SCEV to be able to compute a trip count for said loops;
1528  // we consider the new trip count valuable enough to increase instruction
1529  // count by one.
1530  if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1531  continue;
1532 
1533  // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1534  // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1535  // when zext is loop varying and RHS is loop invariant. This converts
1536  // loop varying work to loop-invariant work.
1537  auto doRotateTransform = [&]() {
1538  assert(ICmp->isUnsigned() && "must have proven unsigned already");
1539  auto *NewRHS =
1540  CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1542  ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1543  ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1544  if (LHS->use_empty())
1545  DeadInsts.push_back(LHS);
1546  };
1547 
1548 
1549  const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1550  const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1551  const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1552  auto FullCR = ConstantRange::getFull(InnerBitWidth);
1553  FullCR = FullCR.zeroExtend(OuterBitWidth);
1554  auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1555  if (FullCR.contains(RHSCR)) {
1556  doRotateTransform();
1557  Changed = true;
1558  // Note, we are leaving SCEV in an unfortunately imprecise case here
1559  // as rotation tends to reveal information about trip counts not
1560  // previously visible.
1561  continue;
1562  }
1563  }
1564 
1565  return Changed;
1566 }
1567 
1568 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1569  SmallVector<BasicBlock*, 16> ExitingBlocks;
1570  L->getExitingBlocks(ExitingBlocks);
1571 
1572  // Remove all exits which aren't both rewriteable and execute on every
1573  // iteration.
1574  llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1575  // If our exitting block exits multiple loops, we can only rewrite the
1576  // innermost one. Otherwise, we're changing how many times the innermost
1577  // loop runs before it exits.
1578  if (LI->getLoopFor(ExitingBB) != L)
1579  return true;
1580 
1581  // Can't rewrite non-branch yet.
1582  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1583  if (!BI)
1584  return true;
1585 
1586  // Likewise, the loop latch must be dominated by the exiting BB.
1587  if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1588  return true;
1589 
1590  if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1591  // If already constant, nothing to do. However, if this is an
1592  // unconditional exit, we can still replace header phis with their
1593  // preheader value.
1594  if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1595  replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1596  return true;
1597  }
1598 
1599  return false;
1600  });
1601 
1602  if (ExitingBlocks.empty())
1603  return false;
1604 
1605  // Get a symbolic upper bound on the loop backedge taken count.
1606  const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1607  if (isa<SCEVCouldNotCompute>(MaxExitCount))
1608  return false;
1609 
1610  // Visit our exit blocks in order of dominance. We know from the fact that
1611  // all exits must dominate the latch, so there is a total dominance order
1612  // between them.
1613  llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1614  // std::sort sorts in ascending order, so we want the inverse of
1615  // the normal dominance relation.
1616  if (A == B) return false;
1617  if (DT->properlyDominates(A, B))
1618  return true;
1619  else {
1620  assert(DT->properlyDominates(B, A) &&
1621  "expected total dominance order!");
1622  return false;
1623  }
1624  });
1625 #ifdef ASSERT
1626  for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1627  assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1628  }
1629 #endif
1630 
1631  bool Changed = false;
1632  bool SkipLastIter = false;
1633  SmallSet<const SCEV*, 8> DominatingExitCounts;
1634  for (BasicBlock *ExitingBB : ExitingBlocks) {
1635  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1636  if (isa<SCEVCouldNotCompute>(ExitCount)) {
1637  // Okay, we do not know the exit count here. Can we at least prove that it
1638  // will remain the same within iteration space?
1639  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1640  auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1642  L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1643  Rewriter, DeadInsts);
1644  };
1645 
1646  // TODO: We might have proved that we can skip the last iteration for
1647  // this check. In this case, we only want to check the condition on the
1648  // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1649  // corner case:
1650  //
1651  // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1652  //
1653  // If we could not prove that len != 0, then we also could not prove that
1654  // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1655  // OptimizeCond will likely not prove anything for it, even if it could
1656  // prove the same fact for len.
1657  //
1658  // As a temporary solution, we query both last and pre-last iterations in
1659  // hope that we will be able to prove triviality for at least one of
1660  // them. We can stop querying MaxExitCount for this case once SCEV
1661  // understands that (MaxExitCount - 1) will not overflow here.
1662  if (OptimizeCond(false, false) || OptimizeCond(true, false))
1663  Changed = true;
1664  else if (SkipLastIter)
1665  if (OptimizeCond(false, true) || OptimizeCond(true, true))
1666  Changed = true;
1667  continue;
1668  }
1669 
1670  if (MaxExitCount == ExitCount)
1671  // If the loop has more than 1 iteration, all further checks will be
1672  // executed 1 iteration less.
1673  SkipLastIter = true;
1674 
1675  // If we know we'd exit on the first iteration, rewrite the exit to
1676  // reflect this. This does not imply the loop must exit through this
1677  // exit; there may be an earlier one taken on the first iteration.
1678  // We know that the backedge can't be taken, so we replace all
1679  // the header PHIs with values coming from the preheader.
1680  if (ExitCount->isZero()) {
1681  foldExit(L, ExitingBB, true, DeadInsts);
1682  replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1683  Changed = true;
1684  continue;
1685  }
1686 
1687  assert(ExitCount->getType()->isIntegerTy() &&
1688  MaxExitCount->getType()->isIntegerTy() &&
1689  "Exit counts must be integers");
1690 
1691  Type *WiderType =
1692  SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1693  ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1694  MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1695  assert(MaxExitCount->getType() == ExitCount->getType());
1696 
1697  // Can we prove that some other exit must be taken strictly before this
1698  // one?
1699  if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1700  MaxExitCount, ExitCount)) {
1701  foldExit(L, ExitingBB, false, DeadInsts);
1702  Changed = true;
1703  continue;
1704  }
1705 
1706  // As we run, keep track of which exit counts we've encountered. If we
1707  // find a duplicate, we've found an exit which would have exited on the
1708  // exiting iteration, but (from the visit order) strictly follows another
1709  // which does the same and is thus dead.
1710  if (!DominatingExitCounts.insert(ExitCount).second) {
1711  foldExit(L, ExitingBB, false, DeadInsts);
1712  Changed = true;
1713  continue;
1714  }
1715 
1716  // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1717  // here. If we kept track of the min of dominanting exits so far, we could
1718  // discharge exits with EC >= MDEC. This is less powerful than the existing
1719  // transform (since later exits aren't considered), but potentially more
1720  // powerful for any case where SCEV can prove a >=u b, but neither a == b
1721  // or a >u b. Such a case is not currently known.
1722  }
1723  return Changed;
1724 }
1725 
1726 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1727  SmallVector<BasicBlock*, 16> ExitingBlocks;
1728  L->getExitingBlocks(ExitingBlocks);
1729 
1730  // Finally, see if we can rewrite our exit conditions into a loop invariant
1731  // form. If we have a read-only loop, and we can tell that we must exit down
1732  // a path which does not need any of the values computed within the loop, we
1733  // can rewrite the loop to exit on the first iteration. Note that this
1734  // doesn't either a) tell us the loop exits on the first iteration (unless
1735  // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1736  // This transformation looks a lot like a restricted form of dead loop
1737  // elimination, but restricted to read-only loops and without neccesssarily
1738  // needing to kill the loop entirely.
1739  if (!LoopPredication)
1740  return false;
1741 
1742  // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1743  // through *explicit* control flow. We have to eliminate the possibility of
1744  // implicit exits (see below) before we know it's truly exact.
1745  const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1746  if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1747  return false;
1748 
1749  assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1750  assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1751 
1752  auto BadExit = [&](BasicBlock *ExitingBB) {
1753  // If our exiting block exits multiple loops, we can only rewrite the
1754  // innermost one. Otherwise, we're changing how many times the innermost
1755  // loop runs before it exits.
1756  if (LI->getLoopFor(ExitingBB) != L)
1757  return true;
1758 
1759  // Can't rewrite non-branch yet.
1760  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1761  if (!BI)
1762  return true;
1763 
1764  // If already constant, nothing to do.
1765  if (isa<Constant>(BI->getCondition()))
1766  return true;
1767 
1768  // If the exit block has phis, we need to be able to compute the values
1769  // within the loop which contains them. This assumes trivially lcssa phis
1770  // have already been removed; TODO: generalize
1771  BasicBlock *ExitBlock =
1772  BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1773  if (!ExitBlock->phis().empty())
1774  return true;
1775 
1776  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1777  if (isa<SCEVCouldNotCompute>(ExitCount) ||
1778  !Rewriter.isSafeToExpand(ExitCount))
1779  return true;
1780 
1781  assert(SE->isLoopInvariant(ExitCount, L) &&
1782  "Exit count must be loop invariant");
1783  assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1784  return false;
1785  };
1786 
1787  // If we have any exits which can't be predicated themselves, than we can't
1788  // predicate any exit which isn't guaranteed to execute before it. Consider
1789  // two exits (a) and (b) which would both exit on the same iteration. If we
1790  // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1791  // we could convert a loop from exiting through (a) to one exiting through
1792  // (b). Note that this problem exists only for exits with the same exit
1793  // count, and we could be more aggressive when exit counts are known inequal.
1794  llvm::sort(ExitingBlocks,
1795  [&](BasicBlock *A, BasicBlock *B) {
1796  // std::sort sorts in ascending order, so we want the inverse of
1797  // the normal dominance relation, plus a tie breaker for blocks
1798  // unordered by dominance.
1799  if (DT->properlyDominates(A, B)) return true;
1800  if (DT->properlyDominates(B, A)) return false;
1801  return A->getName() < B->getName();
1802  });
1803  // Check to see if our exit blocks are a total order (i.e. a linear chain of
1804  // exits before the backedge). If they aren't, reasoning about reachability
1805  // is complicated and we choose not to for now.
1806  for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1807  if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1808  return false;
1809 
1810  // Given our sorted total order, we know that exit[j] must be evaluated
1811  // after all exit[i] such j > i.
1812  for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1813  if (BadExit(ExitingBlocks[i])) {
1814  ExitingBlocks.resize(i);
1815  break;
1816  }
1817 
1818  if (ExitingBlocks.empty())
1819  return false;
1820 
1821  // We rely on not being able to reach an exiting block on a later iteration
1822  // then it's statically compute exit count. The implementaton of
1823  // getExitCount currently has this invariant, but assert it here so that
1824  // breakage is obvious if this ever changes..
1825  assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1826  return DT->dominates(ExitingBB, L->getLoopLatch());
1827  }));
1828 
1829  // At this point, ExitingBlocks consists of only those blocks which are
1830  // predicatable. Given that, we know we have at least one exit we can
1831  // predicate if the loop is doesn't have side effects and doesn't have any
1832  // implicit exits (because then our exact BTC isn't actually exact).
1833  // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1834  // suggestions on how to improve this? I can obviously bail out for outer
1835  // loops, but that seems less than ideal. MemorySSA can find memory writes,
1836  // is that enough for *all* side effects?
1837  for (BasicBlock *BB : L->blocks())
1838  for (auto &I : *BB)
1839  // TODO:isGuaranteedToTransfer
1840  if (I.mayHaveSideEffects())
1841  return false;
1842 
1843  bool Changed = false;
1844  // Finally, do the actual predication for all predicatable blocks. A couple
1845  // of notes here:
1846  // 1) We don't bother to constant fold dominated exits with identical exit
1847  // counts; that's simply a form of CSE/equality propagation and we leave
1848  // it for dedicated passes.
1849  // 2) We insert the comparison at the branch. Hoisting introduces additional
1850  // legality constraints and we leave that to dedicated logic. We want to
1851  // predicate even if we can't insert a loop invariant expression as
1852  // peeling or unrolling will likely reduce the cost of the otherwise loop
1853  // varying check.
1854  Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1856  Value *ExactBTCV = nullptr; // Lazily generated if needed.
1857  for (BasicBlock *ExitingBB : ExitingBlocks) {
1858  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1859 
1860  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1861  Value *NewCond;
1862  if (ExitCount == ExactBTC) {
1863  NewCond = L->contains(BI->getSuccessor(0)) ?
1864  B.getFalse() : B.getTrue();
1865  } else {
1866  Value *ECV = Rewriter.expandCodeFor(ExitCount);
1867  if (!ExactBTCV)
1868  ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1869  Value *RHS = ExactBTCV;
1870  if (ECV->getType() != RHS->getType()) {
1871  Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1872  ECV = B.CreateZExt(ECV, WiderTy);
1873  RHS = B.CreateZExt(RHS, WiderTy);
1874  }
1875  auto Pred = L->contains(BI->getSuccessor(0)) ?
1877  NewCond = B.CreateICmp(Pred, ECV, RHS);
1878  }
1879  Value *OldCond = BI->getCondition();
1880  BI->setCondition(NewCond);
1881  if (OldCond->use_empty())
1882  DeadInsts.emplace_back(OldCond);
1883  Changed = true;
1884  }
1885 
1886  return Changed;
1887 }
1888 
1889 //===----------------------------------------------------------------------===//
1890 // IndVarSimplify driver. Manage several subpasses of IV simplification.
1891 //===----------------------------------------------------------------------===//
1892 
1893 bool IndVarSimplify::run(Loop *L) {
1894  // We need (and expect!) the incoming loop to be in LCSSA.
1895  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1896  "LCSSA required to run indvars!");
1897 
1898  // If LoopSimplify form is not available, stay out of trouble. Some notes:
1899  // - LSR currently only supports LoopSimplify-form loops. Indvars'
1900  // canonicalization can be a pessimization without LSR to "clean up"
1901  // afterwards.
1902  // - We depend on having a preheader; in particular,
1903  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1904  // and we're in trouble if we can't find the induction variable even when
1905  // we've manually inserted one.
1906  // - LFTR relies on having a single backedge.
1907  if (!L->isLoopSimplifyForm())
1908  return false;
1909 
1910 #ifndef NDEBUG
1911  // Used below for a consistency check only
1912  // Note: Since the result returned by ScalarEvolution may depend on the order
1913  // in which previous results are added to its cache, the call to
1914  // getBackedgeTakenCount() may change following SCEV queries.
1915  const SCEV *BackedgeTakenCount;
1916  if (VerifyIndvars)
1917  BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1918 #endif
1919 
1920  bool Changed = false;
1921  // If there are any floating-point recurrences, attempt to
1922  // transform them to use integer recurrences.
1923  Changed |= rewriteNonIntegerIVs(L);
1924 
1925  // Create a rewriter object which we'll use to transform the code with.
1926  SCEVExpander Rewriter(*SE, DL, "indvars");
1927 #ifndef NDEBUG
1928  Rewriter.setDebugType(DEBUG_TYPE);
1929 #endif
1930 
1931  // Eliminate redundant IV users.
1932  //
1933  // Simplification works best when run before other consumers of SCEV. We
1934  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1935  // other expressions involving loop IVs have been evaluated. This helps SCEV
1936  // set no-wrap flags before normalizing sign/zero extension.
1937  Rewriter.disableCanonicalMode();
1938  Changed |= simplifyAndExtend(L, Rewriter, LI);
1939 
1940  // Check to see if we can compute the final value of any expressions
1941  // that are recurrent in the loop, and substitute the exit values from the
1942  // loop into any instructions outside of the loop that use the final values
1943  // of the current expressions.
1944  if (ReplaceExitValue != NeverRepl) {
1945  if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1946  ReplaceExitValue, DeadInsts)) {
1947  NumReplaced += Rewrites;
1948  Changed = true;
1949  }
1950  }
1951 
1952  // Eliminate redundant IV cycles.
1953  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1954 
1955  // Try to convert exit conditions to unsigned and rotate computation
1956  // out of the loop. Note: Handles invalidation internally if needed.
1957  Changed |= canonicalizeExitCondition(L);
1958 
1959  // Try to eliminate loop exits based on analyzeable exit counts
1960  if (optimizeLoopExits(L, Rewriter)) {
1961  Changed = true;
1962  // Given we've changed exit counts, notify SCEV
1963  // Some nested loops may share same folded exit basic block,
1964  // thus we need to notify top most loop.
1965  SE->forgetTopmostLoop(L);
1966  }
1967 
1968  // Try to form loop invariant tests for loop exits by changing how many
1969  // iterations of the loop run when that is unobservable.
1970  if (predicateLoopExits(L, Rewriter)) {
1971  Changed = true;
1972  // Given we've changed exit counts, notify SCEV
1973  SE->forgetLoop(L);
1974  }
1975 
1976  // If we have a trip count expression, rewrite the loop's exit condition
1977  // using it.
1978  if (!DisableLFTR) {
1979  BasicBlock *PreHeader = L->getLoopPreheader();
1980 
1981  SmallVector<BasicBlock*, 16> ExitingBlocks;
1982  L->getExitingBlocks(ExitingBlocks);
1983  for (BasicBlock *ExitingBB : ExitingBlocks) {
1984  // Can't rewrite non-branch yet.
1985  if (!isa<BranchInst>(ExitingBB->getTerminator()))
1986  continue;
1987 
1988  // If our exitting block exits multiple loops, we can only rewrite the
1989  // innermost one. Otherwise, we're changing how many times the innermost
1990  // loop runs before it exits.
1991  if (LI->getLoopFor(ExitingBB) != L)
1992  continue;
1993 
1994  if (!needsLFTR(L, ExitingBB))
1995  continue;
1996 
1997  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1998  if (isa<SCEVCouldNotCompute>(ExitCount))
1999  continue;
2000 
2001  // This was handled above, but as we form SCEVs, we can sometimes refine
2002  // existing ones; this allows exit counts to be folded to zero which
2003  // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2004  // until stable to handle cases like this better.
2005  if (ExitCount->isZero())
2006  continue;
2007 
2008  PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2009  if (!IndVar)
2010  continue;
2011 
2012  // Avoid high cost expansions. Note: This heuristic is questionable in
2013  // that our definition of "high cost" is not exactly principled.
2014  if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2015  TTI, PreHeader->getTerminator()))
2016  continue;
2017 
2018  // Check preconditions for proper SCEVExpander operation. SCEV does not
2019  // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2020  // any pass that uses the SCEVExpander must do it. This does not work
2021  // well for loop passes because SCEVExpander makes assumptions about
2022  // all loops, while LoopPassManager only forces the current loop to be
2023  // simplified.
2024  //
2025  // FIXME: SCEV expansion has no way to bail out, so the caller must
2026  // explicitly check any assumptions made by SCEV. Brittle.
2027  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2028  if (!AR || AR->getLoop()->getLoopPreheader())
2029  Changed |= linearFunctionTestReplace(L, ExitingBB,
2030  ExitCount, IndVar,
2031  Rewriter);
2032  }
2033  }
2034  // Clear the rewriter cache, because values that are in the rewriter's cache
2035  // can be deleted in the loop below, causing the AssertingVH in the cache to
2036  // trigger.
2037  Rewriter.clear();
2038 
2039  // Now that we're done iterating through lists, clean up any instructions
2040  // which are now dead.
2041  while (!DeadInsts.empty()) {
2042  Value *V = DeadInsts.pop_back_val();
2043 
2044  if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2045  Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2046  else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2047  Changed |=
2048  RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2049  }
2050 
2051  // The Rewriter may not be used from this point on.
2052 
2053  // Loop-invariant instructions in the preheader that aren't used in the
2054  // loop may be sunk below the loop to reduce register pressure.
2055  Changed |= sinkUnusedInvariants(L);
2056 
2057  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2058  // trip count and therefore can further simplify exit values in addition to
2059  // rewriteLoopExitValues.
2060  Changed |= rewriteFirstIterationLoopExitValues(L);
2061 
2062  // Clean up dead instructions.
2063  Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2064 
2065  // Check a post-condition.
2066  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2067  "Indvars did not preserve LCSSA!");
2068 
2069  // Verify that LFTR, and any other change have not interfered with SCEV's
2070  // ability to compute trip count. We may have *changed* the exit count, but
2071  // only by reducing it.
2072 #ifndef NDEBUG
2073  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2074  SE->forgetLoop(L);
2075  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2076  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2077  SE->getTypeSizeInBits(NewBECount->getType()))
2078  NewBECount = SE->getTruncateOrNoop(NewBECount,
2079  BackedgeTakenCount->getType());
2080  else
2081  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2082  NewBECount->getType());
2083  assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2084  NewBECount) && "indvars must preserve SCEV");
2085  }
2086  if (VerifyMemorySSA && MSSAU)
2087  MSSAU->getMemorySSA()->verifyMemorySSA();
2088 #endif
2089 
2090  return Changed;
2091 }
2092 
2095  LPMUpdater &) {
2096  Function *F = L.getHeader()->getParent();
2097  const DataLayout &DL = F->getParent()->getDataLayout();
2098 
2099  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2100  WidenIndVars && AllowIVWidening);
2101  if (!IVS.run(&L))
2102  return PreservedAnalyses::all();
2103 
2104  auto PA = getLoopPassPreservedAnalyses();
2105  PA.preserveSet<CFGAnalyses>();
2106  if (AR.MSSA)
2107  PA.preserve<MemorySSAAnalysis>();
2108  return PA;
2109 }
2110 
2111 namespace {
2112 
2113 struct IndVarSimplifyLegacyPass : public LoopPass {
2114  static char ID; // Pass identification, replacement for typeid
2115 
2116  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2118  }
2119 
2120  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2121  if (skipLoop(L))
2122  return false;
2123 
2124  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2125  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2126  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2127  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2128  auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2129  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2130  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2131  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2132  auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2133  MemorySSA *MSSA = nullptr;
2134  if (MSSAAnalysis)
2135  MSSA = &MSSAAnalysis->getMSSA();
2136 
2137  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2138  return IVS.run(L);
2139  }
2140 
2141  void getAnalysisUsage(AnalysisUsage &AU) const override {
2142  AU.setPreservesCFG();
2145  }
2146 };
2147 
2148 } // end anonymous namespace
2149 
2151 
2152 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2153  "Induction Variable Simplification", false, false)
2155 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2156  "Induction Variable Simplification", false, false)
2157 
2159  return new IndVarSimplifyLegacyPass();
2160 }
mustExecuteUBIfPoisonOnPathTo
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
Definition: IndVarSimplify.cpp:770
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:735
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:519
llvm::rewriteLoopExitValues
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1260
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::createWideIV
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Definition: SimplifyIndVar.cpp:2087
llvm::WideIVInfo::WidestNativeType
Type * WidestNativeType
Definition: SimplifyIndVar.h:68
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:107
Optional.h
llvm::WeakTrackingVH
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
llvm::ScalarEvolution::getEffectiveSCEVType
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Definition: ScalarEvolution.cpp:4345
ConvertToSInt
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
Definition: IndVarSimplify.cpp:195
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
ScalarEvolutionExpander.h
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::OnlyCheapRepl
@ OnlyCheapRepl
Definition: LoopUtils.h:440
llvm::initializeIndVarSimplifyLegacyPassPass
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:50
Simplification
Induction Variable Simplification
Definition: IndVarSimplify.cpp:2156
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
AlmostDeadIV
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test.
Definition: IndVarSimplify.cpp:849
llvm::SCEVNAryExpr::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition: ScalarEvolutionExpressions.h:217
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:728
IndVarSimplify.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:942
llvm::IRBuilder<>
hasConcreteDefImpl
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
Definition: IndVarSimplify.cpp:808
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3340
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1997
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:834
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ScalarEvolution::getTruncateExpr
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1214
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
llvm::ScalarEvolution::getTypeSizeInBits
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
Definition: ScalarEvolution.cpp:4335
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1731
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::NeverRepl
@ NeverRepl
Definition: LoopUtils.h:439
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
VerifyIndvars
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars. Has no " "effect in release builds. (Note: this adds additional SCEV " "queries potentially changing the analysis result)"))
Operator.h
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:750
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::ScalarEvolution::getTruncateOrZeroExtend
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4600
replaceExitCond
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1292
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:724
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2798
getLoopPhiForCounter
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
Definition: IndVarSimplify.cpp:665
Use.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::WideIVInfo
Collect information about induction variables that are used by sign/zero extend operations.
Definition: SimplifyIndVar.h:64
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RecursivelyDeleteDeadPHINode
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:629
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::mustTriggerUB
bool mustTriggerUB(const Instruction *I, const SmallSet< const Value *, 16 > &KnownPoison)
Return true if the given instruction must trigger undefined behavior when I is executed with any oper...
Definition: ValueTracking.cpp:5655
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:734
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Instruction.h
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
genLoopLimit
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
Definition: IndVarSimplify.cpp:977
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:169
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
indvars
indvars
Definition: IndVarSimplify.cpp:2155
llvm::APFloat::convertToInteger
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1120
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:791
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:984
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:649
llvm::ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations
Optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
Definition: ScalarEvolution.cpp:10967
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2795
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
Intrinsics.h
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:746
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
LoopPredication
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1360
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:733
llvm::NoHardUse
@ NoHardUse
Definition: LoopUtils.h:441
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::APFloatBase::opOK
@ opOK
Definition: APFloat.h:217
TargetLibraryInfo.h
llvm::LoopInfo::replacementPreservesLCSSAForm
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1140
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1629
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2884
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
llvm::Instruction
Definition: Instruction.h:42
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3220
llvm::WideIVInfo::IsSigned
bool IsSigned
Definition: SimplifyIndVar.h:71
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:364
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
APFloat.h
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:642
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::WideIVInfo::NarrowIV
PHINode * NarrowIV
Definition: SimplifyIndVar.h:65
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::createIndVarSimplifyPass
Pass * createIndVarSimplifyPass()
Definition: IndVarSimplify.cpp:2158
LoopUtils.h
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:723
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:726
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
PatternMatch.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2791
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
ReplaceExitValue
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3215
AllowIVWidening
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4442
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2809
llvm::simplifyUsersOfIV
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
Definition: SimplifyIndVar.cpp:961
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1682
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
BasicBlock.h
llvm::cl::opt< bool >
llvm::APFloat
Definition: APFloat.h:716
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
FindLoopCounter
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
Definition: IndVarSimplify.cpp:893
hasConcreteDef
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
Definition: IndVarSimplify.cpp:841
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
uint64_t
llvm::LoopPass
Definition: LoopPass.h:28
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:142
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2849
isLoopExitTestBasedOn
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
Definition: IndVarSimplify.cpp:703
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
optimizeLoopExitWithUnknownExitCount
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1367
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:2877
llvm::SCEVCheapExpansionBudget
cl::opt< unsigned > SCEVCheapExpansionBudget
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6600
llvm::IVVisitor
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
Definition: SimplifyIndVar.h:34
ArrayRef.h
llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8432
llvm::AlwaysRepl
@ AlwaysRepl
Definition: LoopUtils.h:443
visitIVCast
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
Definition: IndVarSimplify.cpp:515
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:725
llvm::IndVarSimplifyPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: IndVarSimplify.cpp:2093
iterator_range.h
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:753
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:168
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, bool IgnoreTokens=true) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:469
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:934
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:749
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
llvm::LoopInfo
Definition: LoopInfo.h:1108
DataLayout.h
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::logicalview::LVAttributeKind::Range
@ Range
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
uint32_t
Compiler.h
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
ConstantRange.h
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
llvm::Loop::makeLoopInvariant
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr, ScalarEvolution *SE=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:70
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
ValueHandle.h
llvm::Init
Definition: Record.h:281
llvm::DataLayout::isLegalInteger
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:266
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13637
llvm::tgtok::IntVal
@ IntVal
Definition: TGLexer.h:65
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:732
Constant.h
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:204
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::ScalarEvolution::isKnownPredicateAt
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10782
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4550
isLoopCounter
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
Definition: IndVarSimplify.cpp:864
llvm::Value::user_iterator
user_iterator_impl< User > user_iterator
Definition: Value.h:390
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2741
DisableLFTR
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: IndVarSimplify.cpp:89
Function.h
needsLFTR
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
Definition: IndVarSimplify.cpp:716
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4328
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::SCEV::isOne
bool isOne() const
Return true if the expression is a constant one.
Definition: ScalarEvolution.cpp:441
foldExit
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1302
llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:748
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
replaceWithInvariantCond
static void replaceWithInvariantCond(const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1350
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:163
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:433
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1605
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::ScalarEvolution::getMinusOne
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
Definition: ScalarEvolution.h:652
SmallVector.h
llvm::SCEVNAryExpr::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition: ScalarEvolutionExpressions.h:221
User.h
llvm::logicalview::LVAttributeKind::Extended
@ Extended
llvm::SIToFPInst
This class represents a cast from signed integer to floating point.
Definition: Instructions.h:5045
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1394
Dominators.h
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:736
llvm::CastInst::getOpcode
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:677
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2815
TargetTransformInfo.h
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:727
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens=true) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:462
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:399
DerivedTypes.h
llvm::SmallPtrSetImpl< Value * >
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:403
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::propagatesPoison
bool propagatesPoison(const Operator *I)
Return true if I yields poison or raises UB if any of its operands is poison.
Definition: ValueTracking.cpp:5548
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2493
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::makeMutableArrayRef
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:533
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:241
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
llvm::PatternMatch::m_BasicBlock
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:168
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
raw_ostream.h
BasicBlockUtils.h
Value.h
llvm::SCEV::isZero
bool isZero() const
Return true if the expression is a constant zero.
Definition: ScalarEvolution.cpp:435
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:57
InitializePasses.h
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9621
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
replaceLoopPHINodesWithPreheaderValues
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
Definition: IndVarSimplify.cpp:1312
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3213
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3227
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
SimplifyIndVar.h
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:107
llvm::UnusedIndVarInLoop
@ UnusedIndVarInLoop
Definition: LoopUtils.h:442
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:731
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
UsePostIncrementRanges
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732