LLVM  15.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/LoopPass.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/ConstantRange.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/IRBuilder.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/IntrinsicInst.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/Operator.h"
61 #include "llvm/IR/PassManager.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/IR/ValueHandle.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
76 #include "llvm/Transforms/Scalar.h"
82 #include <cassert>
83 #include <cstdint>
84 #include <utility>
85 
86 using namespace llvm;
87 using namespace PatternMatch;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened , "Number of indvars widened");
92 STATISTIC(NumReplaced , "Number of exit values replaced");
93 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
101  "verify-indvars", cl::Hidden,
102  cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
103  "effect in release builds. (Note: this adds additional SCEV "
104  "queries potentially changing the analysis result)"));
105 
107  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110  clEnumValN(OnlyCheapRepl, "cheap",
111  "only replace exit value when the cost is cheap"),
112  clEnumValN(NoHardUse, "noharduse",
113  "only replace exit values when loop def likely dead"),
114  clEnumValN(AlwaysRepl, "always",
115  "always replace exit value whenever possible")));
116 
118  "indvars-post-increment-ranges", cl::Hidden,
119  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
120  cl::init(true));
121 
122 static cl::opt<bool>
123 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
124  cl::desc("Disable Linear Function Test Replace optimization"));
125 
126 static cl::opt<bool>
127 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
128  cl::desc("Predicate conditions in read only loops"));
129 
130 static cl::opt<bool>
131 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
132  cl::desc("Allow widening of indvars to eliminate s/zext"));
133 
134 namespace {
135 
136 class IndVarSimplify {
137  LoopInfo *LI;
138  ScalarEvolution *SE;
139  DominatorTree *DT;
140  const DataLayout &DL;
141  TargetLibraryInfo *TLI;
142  const TargetTransformInfo *TTI;
143  std::unique_ptr<MemorySSAUpdater> MSSAU;
144 
146  bool WidenIndVars;
147 
148  bool handleFloatingPointIV(Loop *L, PHINode *PH);
149  bool rewriteNonIntegerIVs(Loop *L);
150 
151  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152  /// Try to improve our exit conditions by converting condition from signed
153  /// to unsigned or rotating computation out of the loop.
154  /// (See inline comment about why this is duplicated from simplifyAndExtend)
155  bool canonicalizeExitCondition(Loop *L);
156  /// Try to eliminate loop exits based on analyzeable exit counts
157  bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
158  /// Try to form loop invariant tests for loop exits by changing how many
159  /// iterations of the loop run when that is unobservable.
160  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
161 
162  bool rewriteFirstIterationLoopExitValues(Loop *L);
163 
164  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
165  const SCEV *ExitCount,
166  PHINode *IndVar, SCEVExpander &Rewriter);
167 
168  bool sinkUnusedInvariants(Loop *L);
169 
170 public:
171  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
172  const DataLayout &DL, TargetLibraryInfo *TLI,
173  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
174  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
175  WidenIndVars(WidenIndVars) {
176  if (MSSA)
177  MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178  }
179 
180  bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 //===----------------------------------------------------------------------===//
186 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
187 //===----------------------------------------------------------------------===//
188 
189 /// Convert APF to an integer, if possible.
190 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
191  bool isExact = false;
192  // See if we can convert this to an int64_t
193  uint64_t UIntVal;
194  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
195  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
196  !isExact)
197  return false;
198  IntVal = UIntVal;
199  return true;
200 }
201 
202 /// If the loop has floating induction variable then insert corresponding
203 /// integer induction variable if possible.
204 /// For example,
205 /// for(double i = 0; i < 10000; ++i)
206 /// bar(i)
207 /// is converted into
208 /// for(int i = 0; i < 10000; ++i)
209 /// bar((double)i);
210 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
211  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
212  unsigned BackEdge = IncomingEdge^1;
213 
214  // Check incoming value.
215  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
216 
217  int64_t InitValue;
218  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
219  return false;
220 
221  // Check IV increment. Reject this PN if increment operation is not
222  // an add or increment value can not be represented by an integer.
223  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
224  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
225 
226  // If this is not an add of the PHI with a constantfp, or if the constant fp
227  // is not an integer, bail out.
228  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
229  int64_t IncValue;
230  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
231  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
232  return false;
233 
234  // Check Incr uses. One user is PN and the other user is an exit condition
235  // used by the conditional terminator.
236  Value::user_iterator IncrUse = Incr->user_begin();
237  Instruction *U1 = cast<Instruction>(*IncrUse++);
238  if (IncrUse == Incr->user_end()) return false;
239  Instruction *U2 = cast<Instruction>(*IncrUse++);
240  if (IncrUse != Incr->user_end()) return false;
241 
242  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
243  // only used by a branch, we can't transform it.
244  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
245  if (!Compare)
246  Compare = dyn_cast<FCmpInst>(U2);
247  if (!Compare || !Compare->hasOneUse() ||
248  !isa<BranchInst>(Compare->user_back()))
249  return false;
250 
251  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
252 
253  // We need to verify that the branch actually controls the iteration count
254  // of the loop. If not, the new IV can overflow and no one will notice.
255  // The branch block must be in the loop and one of the successors must be out
256  // of the loop.
257  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
258  if (!L->contains(TheBr->getParent()) ||
259  (L->contains(TheBr->getSuccessor(0)) &&
260  L->contains(TheBr->getSuccessor(1))))
261  return false;
262 
263  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
264  // transform it.
265  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
266  int64_t ExitValue;
267  if (ExitValueVal == nullptr ||
268  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
269  return false;
270 
271  // Find new predicate for integer comparison.
273  switch (Compare->getPredicate()) {
274  default: return false; // Unknown comparison.
275  case CmpInst::FCMP_OEQ:
276  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
277  case CmpInst::FCMP_ONE:
278  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
279  case CmpInst::FCMP_OGT:
280  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
281  case CmpInst::FCMP_OGE:
282  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
283  case CmpInst::FCMP_OLT:
284  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
285  case CmpInst::FCMP_OLE:
286  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
287  }
288 
289  // We convert the floating point induction variable to a signed i32 value if
290  // we can. This is only safe if the comparison will not overflow in a way
291  // that won't be trapped by the integer equivalent operations. Check for this
292  // now.
293  // TODO: We could use i64 if it is native and the range requires it.
294 
295  // The start/stride/exit values must all fit in signed i32.
296  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
297  return false;
298 
299  // If not actually striding (add x, 0.0), avoid touching the code.
300  if (IncValue == 0)
301  return false;
302 
303  // Positive and negative strides have different safety conditions.
304  if (IncValue > 0) {
305  // If we have a positive stride, we require the init to be less than the
306  // exit value.
307  if (InitValue >= ExitValue)
308  return false;
309 
310  uint32_t Range = uint32_t(ExitValue-InitValue);
311  // Check for infinite loop, either:
312  // while (i <= Exit) or until (i > Exit)
313  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
314  if (++Range == 0) return false; // Range overflows.
315  }
316 
317  unsigned Leftover = Range % uint32_t(IncValue);
318 
319  // If this is an equality comparison, we require that the strided value
320  // exactly land on the exit value, otherwise the IV condition will wrap
321  // around and do things the fp IV wouldn't.
322  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
323  Leftover != 0)
324  return false;
325 
326  // If the stride would wrap around the i32 before exiting, we can't
327  // transform the IV.
328  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
329  return false;
330  } else {
331  // If we have a negative stride, we require the init to be greater than the
332  // exit value.
333  if (InitValue <= ExitValue)
334  return false;
335 
336  uint32_t Range = uint32_t(InitValue-ExitValue);
337  // Check for infinite loop, either:
338  // while (i >= Exit) or until (i < Exit)
339  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
340  if (++Range == 0) return false; // Range overflows.
341  }
342 
343  unsigned Leftover = Range % uint32_t(-IncValue);
344 
345  // If this is an equality comparison, we require that the strided value
346  // exactly land on the exit value, otherwise the IV condition will wrap
347  // around and do things the fp IV wouldn't.
348  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
349  Leftover != 0)
350  return false;
351 
352  // If the stride would wrap around the i32 before exiting, we can't
353  // transform the IV.
354  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
355  return false;
356  }
357 
359 
360  // Insert new integer induction variable.
361  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
362  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
363  PN->getIncomingBlock(IncomingEdge));
364 
365  Value *NewAdd =
367  Incr->getName()+".int", Incr);
368  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
369 
370  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
371  ConstantInt::get(Int32Ty, ExitValue),
372  Compare->getName());
373 
374  // In the following deletions, PN may become dead and may be deleted.
375  // Use a WeakTrackingVH to observe whether this happens.
376  WeakTrackingVH WeakPH = PN;
377 
378  // Delete the old floating point exit comparison. The branch starts using the
379  // new comparison.
380  NewCompare->takeName(Compare);
381  Compare->replaceAllUsesWith(NewCompare);
383 
384  // Delete the old floating point increment.
385  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
386  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
387 
388  // If the FP induction variable still has uses, this is because something else
389  // in the loop uses its value. In order to canonicalize the induction
390  // variable, we chose to eliminate the IV and rewrite it in terms of an
391  // int->fp cast.
392  //
393  // We give preference to sitofp over uitofp because it is faster on most
394  // platforms.
395  if (WeakPH) {
396  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
397  &*PN->getParent()->getFirstInsertionPt());
398  PN->replaceAllUsesWith(Conv);
399  RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
400  }
401  return true;
402 }
403 
404 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
405  // First step. Check to see if there are any floating-point recurrences.
406  // If there are, change them into integer recurrences, permitting analysis by
407  // the SCEV routines.
408  BasicBlock *Header = L->getHeader();
409 
411  for (PHINode &PN : Header->phis())
412  PHIs.push_back(&PN);
413 
414  bool Changed = false;
415  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
416  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
417  Changed |= handleFloatingPointIV(L, PN);
418 
419  // If the loop previously had floating-point IV, ScalarEvolution
420  // may not have been able to compute a trip count. Now that we've done some
421  // re-writing, the trip count may be computable.
422  if (Changed)
423  SE->forgetLoop(L);
424  return Changed;
425 }
426 
427 //===---------------------------------------------------------------------===//
428 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
429 // they will exit at the first iteration.
430 //===---------------------------------------------------------------------===//
431 
432 /// Check to see if this loop has loop invariant conditions which lead to loop
433 /// exits. If so, we know that if the exit path is taken, it is at the first
434 /// loop iteration. This lets us predict exit values of PHI nodes that live in
435 /// loop header.
436 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
437  // Verify the input to the pass is already in LCSSA form.
438  assert(L->isLCSSAForm(*DT));
439 
440  SmallVector<BasicBlock *, 8> ExitBlocks;
441  L->getUniqueExitBlocks(ExitBlocks);
442 
443  bool MadeAnyChanges = false;
444  for (auto *ExitBB : ExitBlocks) {
445  // If there are no more PHI nodes in this exit block, then no more
446  // values defined inside the loop are used on this path.
447  for (PHINode &PN : ExitBB->phis()) {
448  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
449  IncomingValIdx != E; ++IncomingValIdx) {
450  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
451 
452  // Can we prove that the exit must run on the first iteration if it
453  // runs at all? (i.e. early exits are fine for our purposes, but
454  // traces which lead to this exit being taken on the 2nd iteration
455  // aren't.) Note that this is about whether the exit branch is
456  // executed, not about whether it is taken.
457  if (!L->getLoopLatch() ||
458  !DT->dominates(IncomingBB, L->getLoopLatch()))
459  continue;
460 
461  // Get condition that leads to the exit path.
462  auto *TermInst = IncomingBB->getTerminator();
463 
464  Value *Cond = nullptr;
465  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
466  // Must be a conditional branch, otherwise the block
467  // should not be in the loop.
468  Cond = BI->getCondition();
469  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
470  Cond = SI->getCondition();
471  else
472  continue;
473 
474  if (!L->isLoopInvariant(Cond))
475  continue;
476 
477  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
478 
479  // Only deal with PHIs in the loop header.
480  if (!ExitVal || ExitVal->getParent() != L->getHeader())
481  continue;
482 
483  // If ExitVal is a PHI on the loop header, then we know its
484  // value along this exit because the exit can only be taken
485  // on the first iteration.
486  auto *LoopPreheader = L->getLoopPreheader();
487  assert(LoopPreheader && "Invalid loop");
488  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
489  if (PreheaderIdx != -1) {
490  assert(ExitVal->getParent() == L->getHeader() &&
491  "ExitVal must be in loop header");
492  MadeAnyChanges = true;
493  PN.setIncomingValue(IncomingValIdx,
494  ExitVal->getIncomingValue(PreheaderIdx));
495  SE->forgetValue(&PN);
496  }
497  }
498  }
499  }
500  return MadeAnyChanges;
501 }
502 
503 //===----------------------------------------------------------------------===//
504 // IV Widening - Extend the width of an IV to cover its widest uses.
505 //===----------------------------------------------------------------------===//
506 
507 /// Update information about the induction variable that is extended by this
508 /// sign or zero extend operation. This is used to determine the final width of
509 /// the IV before actually widening it.
510 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
511  ScalarEvolution *SE,
512  const TargetTransformInfo *TTI) {
513  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
514  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
515  return;
516 
517  Type *Ty = Cast->getType();
518  uint64_t Width = SE->getTypeSizeInBits(Ty);
519  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
520  return;
521 
522  // Check that `Cast` actually extends the induction variable (we rely on this
523  // later). This takes care of cases where `Cast` is extending a truncation of
524  // the narrow induction variable, and thus can end up being narrower than the
525  // "narrow" induction variable.
526  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
527  if (NarrowIVWidth >= Width)
528  return;
529 
530  // Cast is either an sext or zext up to this point.
531  // We should not widen an indvar if arithmetics on the wider indvar are more
532  // expensive than those on the narrower indvar. We check only the cost of ADD
533  // because at least an ADD is required to increment the induction variable. We
534  // could compute more comprehensively the cost of all instructions on the
535  // induction variable when necessary.
536  if (TTI &&
539  Cast->getOperand(0)->getType())) {
540  return;
541  }
542 
543  if (!WI.WidestNativeType ||
546  WI.IsSigned = IsSigned;
547  return;
548  }
549 
550  // We extend the IV to satisfy the sign of its user(s), or 'signed'
551  // if there are multiple users with both sign- and zero extensions,
552  // in order not to introduce nondeterministic behaviour based on the
553  // unspecified order of a PHI nodes' users-iterator.
554  WI.IsSigned |= IsSigned;
555 }
556 
557 //===----------------------------------------------------------------------===//
558 // Live IV Reduction - Minimize IVs live across the loop.
559 //===----------------------------------------------------------------------===//
560 
561 //===----------------------------------------------------------------------===//
562 // Simplification of IV users based on SCEV evaluation.
563 //===----------------------------------------------------------------------===//
564 
565 namespace {
566 
567 class IndVarSimplifyVisitor : public IVVisitor {
568  ScalarEvolution *SE;
569  const TargetTransformInfo *TTI;
570  PHINode *IVPhi;
571 
572 public:
573  WideIVInfo WI;
574 
575  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
576  const TargetTransformInfo *TTI,
577  const DominatorTree *DTree)
578  : SE(SCEV), TTI(TTI), IVPhi(IV) {
579  DT = DTree;
580  WI.NarrowIV = IVPhi;
581  }
582 
583  // Implement the interface used by simplifyUsersOfIV.
584  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
585 };
586 
587 } // end anonymous namespace
588 
589 /// Iteratively perform simplification on a worklist of IV users. Each
590 /// successive simplification may push more users which may themselves be
591 /// candidates for simplification.
592 ///
593 /// Sign/Zero extend elimination is interleaved with IV simplification.
594 bool IndVarSimplify::simplifyAndExtend(Loop *L,
596  LoopInfo *LI) {
598 
599  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
600  Intrinsic::getName(Intrinsic::experimental_guard));
601  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
602 
603  SmallVector<PHINode *, 8> LoopPhis;
604  for (PHINode &PN : L->getHeader()->phis())
605  LoopPhis.push_back(&PN);
606 
607  // Each round of simplification iterates through the SimplifyIVUsers worklist
608  // for all current phis, then determines whether any IVs can be
609  // widened. Widening adds new phis to LoopPhis, inducing another round of
610  // simplification on the wide IVs.
611  bool Changed = false;
612  while (!LoopPhis.empty()) {
613  // Evaluate as many IV expressions as possible before widening any IVs. This
614  // forces SCEV to set no-wrap flags before evaluating sign/zero
615  // extension. The first time SCEV attempts to normalize sign/zero extension,
616  // the result becomes final. So for the most predictable results, we delay
617  // evaluation of sign/zero extend evaluation until needed, and avoid running
618  // other SCEV based analysis prior to simplifyAndExtend.
619  do {
620  PHINode *CurrIV = LoopPhis.pop_back_val();
621 
622  // Information about sign/zero extensions of CurrIV.
623  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
624 
625  Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
626  &Visitor);
627 
628  if (Visitor.WI.WidestNativeType) {
629  WideIVs.push_back(Visitor.WI);
630  }
631  } while(!LoopPhis.empty());
632 
633  // Continue if we disallowed widening.
634  if (!WidenIndVars)
635  continue;
636 
637  for (; !WideIVs.empty(); WideIVs.pop_back()) {
638  unsigned ElimExt;
639  unsigned Widened;
640  if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
641  DT, DeadInsts, ElimExt, Widened,
642  HasGuards, UsePostIncrementRanges)) {
643  NumElimExt += ElimExt;
644  NumWidened += Widened;
645  Changed = true;
646  LoopPhis.push_back(WidePhi);
647  }
648  }
649  }
650  return Changed;
651 }
652 
653 //===----------------------------------------------------------------------===//
654 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
655 //===----------------------------------------------------------------------===//
656 
657 /// Given an Value which is hoped to be part of an add recurance in the given
658 /// loop, return the associated Phi node if so. Otherwise, return null. Note
659 /// that this is less general than SCEVs AddRec checking.
661  Instruction *IncI = dyn_cast<Instruction>(IncV);
662  if (!IncI)
663  return nullptr;
664 
665  switch (IncI->getOpcode()) {
666  case Instruction::Add:
667  case Instruction::Sub:
668  break;
669  case Instruction::GetElementPtr:
670  // An IV counter must preserve its type.
671  if (IncI->getNumOperands() == 2)
672  break;
674  default:
675  return nullptr;
676  }
677 
678  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
679  if (Phi && Phi->getParent() == L->getHeader()) {
680  if (L->isLoopInvariant(IncI->getOperand(1)))
681  return Phi;
682  return nullptr;
683  }
684  if (IncI->getOpcode() == Instruction::GetElementPtr)
685  return nullptr;
686 
687  // Allow add/sub to be commuted.
688  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
689  if (Phi && Phi->getParent() == L->getHeader()) {
690  if (L->isLoopInvariant(IncI->getOperand(0)))
691  return Phi;
692  }
693  return nullptr;
694 }
695 
696 /// Whether the current loop exit test is based on this value. Currently this
697 /// is limited to a direct use in the loop condition.
698 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
699  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
700  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
701  // TODO: Allow non-icmp loop test.
702  if (!ICmp)
703  return false;
704 
705  // TODO: Allow indirect use.
706  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
707 }
708 
709 /// linearFunctionTestReplace policy. Return true unless we can show that the
710 /// current exit test is already sufficiently canonical.
711 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
712  assert(L->getLoopLatch() && "Must be in simplified form");
713 
714  // Avoid converting a constant or loop invariant test back to a runtime
715  // test. This is critical for when SCEV's cached ExitCount is less precise
716  // than the current IR (such as after we've proven a particular exit is
717  // actually dead and thus the BE count never reaches our ExitCount.)
718  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
719  if (L->isLoopInvariant(BI->getCondition()))
720  return false;
721 
722  // Do LFTR to simplify the exit condition to an ICMP.
723  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
724  if (!Cond)
725  return true;
726 
727  // Do LFTR to simplify the exit ICMP to EQ/NE
728  ICmpInst::Predicate Pred = Cond->getPredicate();
729  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
730  return true;
731 
732  // Look for a loop invariant RHS
733  Value *LHS = Cond->getOperand(0);
734  Value *RHS = Cond->getOperand(1);
735  if (!L->isLoopInvariant(RHS)) {
736  if (!L->isLoopInvariant(LHS))
737  return true;
738  std::swap(LHS, RHS);
739  }
740  // Look for a simple IV counter LHS
741  PHINode *Phi = dyn_cast<PHINode>(LHS);
742  if (!Phi)
743  Phi = getLoopPhiForCounter(LHS, L);
744 
745  if (!Phi)
746  return true;
747 
748  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
749  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
750  if (Idx < 0)
751  return true;
752 
753  // Do LFTR if the exit condition's IV is *not* a simple counter.
754  Value *IncV = Phi->getIncomingValue(Idx);
755  return Phi != getLoopPhiForCounter(IncV, L);
756 }
757 
758 /// Return true if undefined behavior would provable be executed on the path to
759 /// OnPathTo if Root produced a posion result. Note that this doesn't say
760 /// anything about whether OnPathTo is actually executed or whether Root is
761 /// actually poison. This can be used to assess whether a new use of Root can
762 /// be added at a location which is control equivalent with OnPathTo (such as
763 /// immediately before it) without introducing UB which didn't previously
764 /// exist. Note that a false result conveys no information.
766  Instruction *OnPathTo,
767  DominatorTree *DT) {
768  // Basic approach is to assume Root is poison, propagate poison forward
769  // through all users we can easily track, and then check whether any of those
770  // users are provable UB and must execute before out exiting block might
771  // exit.
772 
773  // The set of all recursive users we've visited (which are assumed to all be
774  // poison because of said visit)
775  SmallSet<const Value *, 16> KnownPoison;
777  Worklist.push_back(Root);
778  while (!Worklist.empty()) {
779  const Instruction *I = Worklist.pop_back_val();
780 
781  // If we know this must trigger UB on a path leading our target.
782  if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
783  return true;
784 
785  // If we can't analyze propagation through this instruction, just skip it
786  // and transitive users. Safe as false is a conservative result.
787  if (!propagatesPoison(cast<Operator>(I)) && I != Root)
788  continue;
789 
790  if (KnownPoison.insert(I).second)
791  for (const User *User : I->users())
792  Worklist.push_back(cast<Instruction>(User));
793  }
794 
795  // Might be non-UB, or might have a path we couldn't prove must execute on
796  // way to exiting bb.
797  return false;
798 }
799 
800 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
801 /// down to checking that all operands are constant and listing instructions
802 /// that may hide undef.
804  unsigned Depth) {
805  if (isa<Constant>(V))
806  return !isa<UndefValue>(V);
807 
808  if (Depth >= 6)
809  return false;
810 
811  // Conservatively handle non-constant non-instructions. For example, Arguments
812  // may be undef.
813  Instruction *I = dyn_cast<Instruction>(V);
814  if (!I)
815  return false;
816 
817  // Load and return values may be undef.
818  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
819  return false;
820 
821  // Optimistically handle other instructions.
822  for (Value *Op : I->operands()) {
823  if (!Visited.insert(Op).second)
824  continue;
825  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
826  return false;
827  }
828  return true;
829 }
830 
831 /// Return true if the given value is concrete. We must prove that undef can
832 /// never reach it.
833 ///
834 /// TODO: If we decide that this is a good approach to checking for undef, we
835 /// may factor it into a common location.
836 static bool hasConcreteDef(Value *V) {
837  SmallPtrSet<Value*, 8> Visited;
838  Visited.insert(V);
839  return hasConcreteDefImpl(V, Visited, 0);
840 }
841 
842 /// Return true if this IV has any uses other than the (soon to be rewritten)
843 /// loop exit test.
844 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
845  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
846  Value *IncV = Phi->getIncomingValue(LatchIdx);
847 
848  for (User *U : Phi->users())
849  if (U != Cond && U != IncV) return false;
850 
851  for (User *U : IncV->users())
852  if (U != Cond && U != Phi) return false;
853  return true;
854 }
855 
856 /// Return true if the given phi is a "counter" in L. A counter is an
857 /// add recurance (of integer or pointer type) with an arbitrary start, and a
858 /// step of 1. Note that L must have exactly one latch.
859 static bool isLoopCounter(PHINode* Phi, Loop *L,
860  ScalarEvolution *SE) {
861  assert(Phi->getParent() == L->getHeader());
862  assert(L->getLoopLatch());
863 
864  if (!SE->isSCEVable(Phi->getType()))
865  return false;
866 
867  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
868  if (!AR || AR->getLoop() != L || !AR->isAffine())
869  return false;
870 
871  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
872  if (!Step || !Step->isOne())
873  return false;
874 
875  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
876  Value *IncV = Phi->getIncomingValue(LatchIdx);
877  return (getLoopPhiForCounter(IncV, L) == Phi &&
878  isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
879 }
880 
881 /// Search the loop header for a loop counter (anadd rec w/step of one)
882 /// suitable for use by LFTR. If multiple counters are available, select the
883 /// "best" one based profitable heuristics.
884 ///
885 /// BECount may be an i8* pointer type. The pointer difference is already
886 /// valid count without scaling the address stride, so it remains a pointer
887 /// expression as far as SCEV is concerned.
888 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
889  const SCEV *BECount,
890  ScalarEvolution *SE, DominatorTree *DT) {
891  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
892 
893  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
894 
895  // Loop over all of the PHI nodes, looking for a simple counter.
896  PHINode *BestPhi = nullptr;
897  const SCEV *BestInit = nullptr;
898  BasicBlock *LatchBlock = L->getLoopLatch();
899  assert(LatchBlock && "Must be in simplified form");
900  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
901 
902  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
903  PHINode *Phi = cast<PHINode>(I);
904  if (!isLoopCounter(Phi, L, SE))
905  continue;
906 
907  // Avoid comparing an integer IV against a pointer Limit.
908  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
909  continue;
910 
911  const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
912 
913  // AR may be a pointer type, while BECount is an integer type.
914  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
915  // AR may not be a narrower type, or we may never exit.
916  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
917  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
918  continue;
919 
920  // Avoid reusing a potentially undef value to compute other values that may
921  // have originally had a concrete definition.
922  if (!hasConcreteDef(Phi)) {
923  // We explicitly allow unknown phis as long as they are already used by
924  // the loop exit test. This is legal since performing LFTR could not
925  // increase the number of undef users.
926  Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
927  if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
928  !isLoopExitTestBasedOn(IncPhi, ExitingBB))
929  continue;
930  }
931 
932  // Avoid introducing undefined behavior due to poison which didn't exist in
933  // the original program. (Annoyingly, the rules for poison and undef
934  // propagation are distinct, so this does NOT cover the undef case above.)
935  // We have to ensure that we don't introduce UB by introducing a use on an
936  // iteration where said IV produces poison. Our strategy here differs for
937  // pointers and integer IVs. For integers, we strip and reinfer as needed,
938  // see code in linearFunctionTestReplace. For pointers, we restrict
939  // transforms as there is no good way to reinfer inbounds once lost.
940  if (!Phi->getType()->isIntegerTy() &&
941  !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
942  continue;
943 
944  const SCEV *Init = AR->getStart();
945 
946  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
947  // Don't force a live loop counter if another IV can be used.
948  if (AlmostDeadIV(Phi, LatchBlock, Cond))
949  continue;
950 
951  // Prefer to count-from-zero. This is a more "canonical" counter form. It
952  // also prefers integer to pointer IVs.
953  if (BestInit->isZero() != Init->isZero()) {
954  if (BestInit->isZero())
955  continue;
956  }
957  // If two IVs both count from zero or both count from nonzero then the
958  // narrower is likely a dead phi that has been widened. Use the wider phi
959  // to allow the other to be eliminated.
960  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
961  continue;
962  }
963  BestPhi = Phi;
964  BestInit = Init;
965  }
966  return BestPhi;
967 }
968 
969 /// Insert an IR expression which computes the value held by the IV IndVar
970 /// (which must be an loop counter w/unit stride) after the backedge of loop L
971 /// is taken ExitCount times.
972 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
973  const SCEV *ExitCount, bool UsePostInc, Loop *L,
975  assert(isLoopCounter(IndVar, L, SE));
976  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
977  const SCEV *IVInit = AR->getStart();
978  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
979 
980  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
981  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
982  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
983  // the existing GEPs whenever possible.
984  if (IndVar->getType()->isPointerTy() &&
985  !ExitCount->getType()->isPointerTy()) {
986  // IVOffset will be the new GEP offset that is interpreted by GEP as a
987  // signed value. ExitCount on the other hand represents the loop trip count,
988  // which is an unsigned value. FindLoopCounter only allows induction
989  // variables that have a positive unit stride of one. This means we don't
990  // have to handle the case of negative offsets (yet) and just need to zero
991  // extend ExitCount.
992  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
993  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
994  if (UsePostInc)
995  IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
996 
997  // Expand the code for the iteration count.
998  assert(SE->isLoopInvariant(IVOffset, L) &&
999  "Computed iteration count is not loop invariant!");
1000 
1001  const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1002  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1003  return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1004  } else {
1005  // In any other case, convert both IVInit and ExitCount to integers before
1006  // comparing. This may result in SCEV expansion of pointers, but in practice
1007  // SCEV will fold the pointer arithmetic away as such:
1008  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1009  //
1010  // Valid Cases: (1) both integers is most common; (2) both may be pointers
1011  // for simple memset-style loops.
1012  //
1013  // IVInit integer and ExitCount pointer would only occur if a canonical IV
1014  // were generated on top of case #2, which is not expected.
1015 
1016  // For unit stride, IVCount = Start + ExitCount with 2's complement
1017  // overflow.
1018 
1019  // For integer IVs, truncate the IV before computing IVInit + BECount,
1020  // unless we know apriori that the limit must be a constant when evaluated
1021  // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
1022  // of the IV in the loop over a (potentially) expensive expansion of the
1023  // widened exit count add(zext(add)) expression.
1024  if (SE->getTypeSizeInBits(IVInit->getType())
1025  > SE->getTypeSizeInBits(ExitCount->getType())) {
1026  if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1027  ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1028  else
1029  IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1030  }
1031 
1032  const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1033 
1034  if (UsePostInc)
1035  IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1036 
1037  // Expand the code for the iteration count.
1038  assert(SE->isLoopInvariant(IVLimit, L) &&
1039  "Computed iteration count is not loop invariant!");
1040  // Ensure that we generate the same type as IndVar, or a smaller integer
1041  // type. In the presence of null pointer values, we have an integer type
1042  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1043  Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1044  IndVar->getType() : ExitCount->getType();
1045  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1046  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1047  }
1048 }
1049 
1050 /// This method rewrites the exit condition of the loop to be a canonical !=
1051 /// comparison against the incremented loop induction variable. This pass is
1052 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1053 /// determine a loop-invariant trip count of the loop, which is actually a much
1054 /// broader range than just linear tests.
1055 bool IndVarSimplify::
1056 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1057  const SCEV *ExitCount,
1058  PHINode *IndVar, SCEVExpander &Rewriter) {
1059  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1060  assert(isLoopCounter(IndVar, L, SE));
1061  Instruction * const IncVar =
1062  cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1063 
1064  // Initialize CmpIndVar to the preincremented IV.
1065  Value *CmpIndVar = IndVar;
1066  bool UsePostInc = false;
1067 
1068  // If the exiting block is the same as the backedge block, we prefer to
1069  // compare against the post-incremented value, otherwise we must compare
1070  // against the preincremented value.
1071  if (ExitingBB == L->getLoopLatch()) {
1072  // For pointer IVs, we chose to not strip inbounds which requires us not
1073  // to add a potentially UB introducing use. We need to either a) show
1074  // the loop test we're modifying is already in post-inc form, or b) show
1075  // that adding a use must not introduce UB.
1076  bool SafeToPostInc =
1077  IndVar->getType()->isIntegerTy() ||
1078  isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1079  mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1080  if (SafeToPostInc) {
1081  UsePostInc = true;
1082  CmpIndVar = IncVar;
1083  }
1084  }
1085 
1086  // It may be necessary to drop nowrap flags on the incrementing instruction
1087  // if either LFTR moves from a pre-inc check to a post-inc check (in which
1088  // case the increment might have previously been poison on the last iteration
1089  // only) or if LFTR switches to a different IV that was previously dynamically
1090  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1091  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1092  // check), because the pre-inc addrec flags may be adopted from the original
1093  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1094  // TODO: This handling is inaccurate for one case: If we switch to a
1095  // dynamically dead IV that wraps on the first loop iteration only, which is
1096  // not covered by the post-inc addrec. (If the new IV was not dynamically
1097  // dead, it could not be poison on the first iteration in the first place.)
1098  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1099  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1100  if (BO->hasNoUnsignedWrap())
1101  BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1102  if (BO->hasNoSignedWrap())
1103  BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1104  }
1105 
1106  Value *ExitCnt = genLoopLimit(
1107  IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1108  assert(ExitCnt->getType()->isPointerTy() ==
1109  IndVar->getType()->isPointerTy() &&
1110  "genLoopLimit missed a cast");
1111 
1112  // Insert a new icmp_ne or icmp_eq instruction before the branch.
1113  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1115  if (L->contains(BI->getSuccessor(0)))
1116  P = ICmpInst::ICMP_NE;
1117  else
1118  P = ICmpInst::ICMP_EQ;
1119 
1120  IRBuilder<> Builder(BI);
1121 
1122  // The new loop exit condition should reuse the debug location of the
1123  // original loop exit condition.
1124  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1125  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1126 
1127  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1128  // avoid the expensive expansion of the limit expression in the wider type,
1129  // emit a truncate to narrow the IV to the ExitCount type. This is safe
1130  // since we know (from the exit count bitwidth), that we can't self-wrap in
1131  // the narrower type.
1132  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1133  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1134  if (CmpIndVarSize > ExitCntSize) {
1135  assert(!CmpIndVar->getType()->isPointerTy() &&
1136  !ExitCnt->getType()->isPointerTy());
1137 
1138  // Before resorting to actually inserting the truncate, use the same
1139  // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1140  // the other side of the comparison instead. We still evaluate the limit
1141  // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1142  // a truncate within in.
1143  bool Extended = false;
1144  const SCEV *IV = SE->getSCEV(CmpIndVar);
1145  const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1146  ExitCnt->getType());
1147  const SCEV *ZExtTrunc =
1148  SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1149 
1150  if (ZExtTrunc == IV) {
1151  Extended = true;
1152  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1153  "wide.trip.count");
1154  } else {
1155  const SCEV *SExtTrunc =
1156  SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1157  if (SExtTrunc == IV) {
1158  Extended = true;
1159  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1160  "wide.trip.count");
1161  }
1162  }
1163 
1164  if (Extended) {
1165  bool Discard;
1166  L->makeLoopInvariant(ExitCnt, Discard);
1167  } else
1168  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1169  "lftr.wideiv");
1170  }
1171  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1172  << " LHS:" << *CmpIndVar << '\n'
1173  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1174  << "\n"
1175  << " RHS:\t" << *ExitCnt << "\n"
1176  << "ExitCount:\t" << *ExitCount << "\n"
1177  << " was: " << *BI->getCondition() << "\n");
1178 
1179  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1180  Value *OrigCond = BI->getCondition();
1181  // It's tempting to use replaceAllUsesWith here to fully replace the old
1182  // comparison, but that's not immediately safe, since users of the old
1183  // comparison may not be dominated by the new comparison. Instead, just
1184  // update the branch to use the new comparison; in the common case this
1185  // will make old comparison dead.
1186  BI->setCondition(Cond);
1187  DeadInsts.emplace_back(OrigCond);
1188 
1189  ++NumLFTR;
1190  return true;
1191 }
1192 
1193 //===----------------------------------------------------------------------===//
1194 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1195 //===----------------------------------------------------------------------===//
1196 
1197 /// If there's a single exit block, sink any loop-invariant values that
1198 /// were defined in the preheader but not used inside the loop into the
1199 /// exit block to reduce register pressure in the loop.
1200 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1201  BasicBlock *ExitBlock = L->getExitBlock();
1202  if (!ExitBlock) return false;
1203 
1204  BasicBlock *Preheader = L->getLoopPreheader();
1205  if (!Preheader) return false;
1206 
1207  bool MadeAnyChanges = false;
1208  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1209  BasicBlock::iterator I(Preheader->getTerminator());
1210  while (I != Preheader->begin()) {
1211  --I;
1212  // New instructions were inserted at the end of the preheader.
1213  if (isa<PHINode>(I))
1214  break;
1215 
1216  // Don't move instructions which might have side effects, since the side
1217  // effects need to complete before instructions inside the loop. Also don't
1218  // move instructions which might read memory, since the loop may modify
1219  // memory. Note that it's okay if the instruction might have undefined
1220  // behavior: LoopSimplify guarantees that the preheader dominates the exit
1221  // block.
1222  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1223  continue;
1224 
1225  // Skip debug info intrinsics.
1226  if (isa<DbgInfoIntrinsic>(I))
1227  continue;
1228 
1229  // Skip eh pad instructions.
1230  if (I->isEHPad())
1231  continue;
1232 
1233  // Don't sink alloca: we never want to sink static alloca's out of the
1234  // entry block, and correctly sinking dynamic alloca's requires
1235  // checks for stacksave/stackrestore intrinsics.
1236  // FIXME: Refactor this check somehow?
1237  if (isa<AllocaInst>(I))
1238  continue;
1239 
1240  // Determine if there is a use in or before the loop (direct or
1241  // otherwise).
1242  bool UsedInLoop = false;
1243  for (Use &U : I->uses()) {
1244  Instruction *User = cast<Instruction>(U.getUser());
1245  BasicBlock *UseBB = User->getParent();
1246  if (PHINode *P = dyn_cast<PHINode>(User)) {
1247  unsigned i =
1248  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1249  UseBB = P->getIncomingBlock(i);
1250  }
1251  if (UseBB == Preheader || L->contains(UseBB)) {
1252  UsedInLoop = true;
1253  break;
1254  }
1255  }
1256 
1257  // If there is, the def must remain in the preheader.
1258  if (UsedInLoop)
1259  continue;
1260 
1261  // Otherwise, sink it to the exit block.
1262  Instruction *ToMove = &*I;
1263  bool Done = false;
1264 
1265  if (I != Preheader->begin()) {
1266  // Skip debug info intrinsics.
1267  do {
1268  --I;
1269  } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1270 
1271  if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1272  Done = true;
1273  } else {
1274  Done = true;
1275  }
1276 
1277  MadeAnyChanges = true;
1278  ToMove->moveBefore(*ExitBlock, InsertPt);
1279  if (Done) break;
1280  InsertPt = ToMove->getIterator();
1281  }
1282 
1283  return MadeAnyChanges;
1284 }
1285 
1286 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1287  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1288  auto *OldCond = BI->getCondition();
1289  BI->setCondition(NewCond);
1290  if (OldCond->use_empty())
1291  DeadInsts.emplace_back(OldCond);
1292 }
1293 
1294 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1295  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1296  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1297  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1298  auto *OldCond = BI->getCondition();
1299  auto *NewCond =
1300  ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1301  replaceExitCond(BI, NewCond, DeadInsts);
1302 }
1303 
1305  Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1306  assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1307  auto *LoopPreheader = L->getLoopPreheader();
1308  auto *LoopHeader = L->getHeader();
1309  for (auto &PN : LoopHeader->phis()) {
1310  auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1311  PN.replaceAllUsesWith(PreheaderIncoming);
1312  DeadInsts.emplace_back(&PN);
1313  }
1314 }
1315 
1317  const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1318  const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1319  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1320  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1321  Rewriter.setInsertPoint(BI);
1322  auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1323  auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1324  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1325  if (ExitIfTrue)
1326  InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1327  IRBuilder<> Builder(BI);
1328  auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1329  BI->getCondition()->getName());
1330  replaceExitCond(BI, NewCond, DeadInsts);
1331 }
1332 
1334  const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1335  const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1337  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1338  ICmpInst::Predicate Pred;
1339  Value *LHS, *RHS;
1340  BasicBlock *TrueSucc, *FalseSucc;
1341  if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1342  m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1343  return false;
1344 
1345  assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1346  "Not a loop exit!");
1347 
1348  // 'LHS pred RHS' should now mean that we stay in loop.
1349  if (L->contains(FalseSucc))
1350  Pred = CmpInst::getInversePredicate(Pred);
1351 
1352  // If we are proving loop exit, invert the predicate.
1353  if (Inverted)
1354  Pred = CmpInst::getInversePredicate(Pred);
1355 
1356  const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1357  const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1358  // Can we prove it to be trivially true?
1359  if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1360  foldExit(L, ExitingBB, Inverted, DeadInsts);
1361  return true;
1362  }
1363  // Further logic works for non-inverted condition only.
1364  if (Inverted)
1365  return false;
1366 
1367  auto *ARTy = LHSS->getType();
1368  auto *MaxIterTy = MaxIter->getType();
1369  // If possible, adjust types.
1370  if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1371  MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1372  else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1373  const SCEV *MinusOne = SE->getMinusOne(ARTy);
1374  auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1375  if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1376  MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1377  }
1378 
1379  if (SkipLastIter) {
1380  const SCEV *One = SE->getOne(MaxIter->getType());
1381  MaxIter = SE->getMinusSCEV(MaxIter, One);
1382  }
1383 
1384  // Check if there is a loop-invariant predicate equivalent to our check.
1385  auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1386  L, BI, MaxIter);
1387  if (!LIP)
1388  return false;
1389 
1390  // Can we prove it to be trivially true?
1391  if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1392  foldExit(L, ExitingBB, Inverted, DeadInsts);
1393  else
1394  replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1395  Rewriter, DeadInsts);
1396 
1397  return true;
1398 }
1399 
1400 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1401  // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1402  // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1403  // never reaches the icmp since the zext doesn't fold to an AddRec unless
1404  // it already has flags. The alternative to this would be to extending the
1405  // set of "interesting" IV users to include the icmp, but doing that
1406  // regresses results in practice by querying SCEVs before trip counts which
1407  // rely on them which results in SCEV caching sub-optimal answers. The
1408  // concern about caching sub-optimal results is why we only query SCEVs of
1409  // the loop invariant RHS here.
1410  SmallVector<BasicBlock*, 16> ExitingBlocks;
1411  L->getExitingBlocks(ExitingBlocks);
1412  bool Changed = false;
1413  for (auto *ExitingBB : ExitingBlocks) {
1414  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1415  if (!BI)
1416  continue;
1417  assert(BI->isConditional() && "exit branch must be conditional");
1418 
1419  auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1420  if (!ICmp || !ICmp->hasOneUse())
1421  continue;
1422 
1423  auto *LHS = ICmp->getOperand(0);
1424  auto *RHS = ICmp->getOperand(1);
1425  // For the range reasoning, avoid computing SCEVs in the loop to avoid
1426  // poisoning cache with sub-optimal results. For the must-execute case,
1427  // this is a neccessary precondition for correctness.
1428  if (!L->isLoopInvariant(RHS)) {
1429  if (!L->isLoopInvariant(LHS))
1430  continue;
1431  // Same logic applies for the inverse case
1432  std::swap(LHS, RHS);
1433  }
1434 
1435  // Match (icmp signed-cond zext, RHS)
1436  Value *LHSOp = nullptr;
1437  if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1438  continue;
1439 
1440  const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1441  const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1442  const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1443  auto FullCR = ConstantRange::getFull(InnerBitWidth);
1444  FullCR = FullCR.zeroExtend(OuterBitWidth);
1445  auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1446  if (FullCR.contains(RHSCR)) {
1447  // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1448  // replace the signed condition with the unsigned version.
1449  ICmp->setPredicate(ICmp->getUnsignedPredicate());
1450  Changed = true;
1451  // Note: No SCEV invalidation needed. We've changed the predicate, but
1452  // have not changed exit counts, or the values produced by the compare.
1453  continue;
1454  }
1455  }
1456 
1457  // Now that we've canonicalized the condition to match the extend,
1458  // see if we can rotate the extend out of the loop.
1459  for (auto *ExitingBB : ExitingBlocks) {
1460  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1461  if (!BI)
1462  continue;
1463  assert(BI->isConditional() && "exit branch must be conditional");
1464 
1465  auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1466  if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1467  continue;
1468 
1469  bool Swapped = false;
1470  auto *LHS = ICmp->getOperand(0);
1471  auto *RHS = ICmp->getOperand(1);
1472  if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1473  // Nothing to rotate
1474  continue;
1475  if (L->isLoopInvariant(LHS)) {
1476  // Same logic applies for the inverse case until we actually pick
1477  // which operand of the compare to update.
1478  Swapped = true;
1479  std::swap(LHS, RHS);
1480  }
1482 
1483  // Match (icmp unsigned-cond zext, RHS)
1484  // TODO: Extend to handle corresponding sext/signed-cmp case
1485  // TODO: Extend to other invertible functions
1486  Value *LHSOp = nullptr;
1487  if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1488  continue;
1489 
1490  // In general, we only rotate if we can do so without increasing the number
1491  // of instructions. The exception is when we have an zext(add-rec). The
1492  // reason for allowing this exception is that we know we need to get rid
1493  // of the zext for SCEV to be able to compute a trip count for said loops;
1494  // we consider the new trip count valuable enough to increase instruction
1495  // count by one.
1496  if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1497  continue;
1498 
1499  // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1500  // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1501  // when zext is loop varying and RHS is loop invariant. This converts
1502  // loop varying work to loop-invariant work.
1503  auto doRotateTransform = [&]() {
1504  assert(ICmp->isUnsigned() && "must have proven unsigned already");
1505  auto *NewRHS =
1506  CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1508  ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1509  ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1510  if (LHS->use_empty())
1511  DeadInsts.push_back(LHS);
1512  };
1513 
1514 
1515  const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1516  const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1517  const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1518  auto FullCR = ConstantRange::getFull(InnerBitWidth);
1519  FullCR = FullCR.zeroExtend(OuterBitWidth);
1520  auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1521  if (FullCR.contains(RHSCR)) {
1522  doRotateTransform();
1523  Changed = true;
1524  // Note, we are leaving SCEV in an unfortunately imprecise case here
1525  // as rotation tends to reveal information about trip counts not
1526  // previously visible.
1527  continue;
1528  }
1529  }
1530 
1531  return Changed;
1532 }
1533 
1534 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1535  SmallVector<BasicBlock*, 16> ExitingBlocks;
1536  L->getExitingBlocks(ExitingBlocks);
1537 
1538  // Remove all exits which aren't both rewriteable and execute on every
1539  // iteration.
1540  llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1541  // If our exitting block exits multiple loops, we can only rewrite the
1542  // innermost one. Otherwise, we're changing how many times the innermost
1543  // loop runs before it exits.
1544  if (LI->getLoopFor(ExitingBB) != L)
1545  return true;
1546 
1547  // Can't rewrite non-branch yet.
1548  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1549  if (!BI)
1550  return true;
1551 
1552  // If already constant, nothing to do.
1553  if (isa<Constant>(BI->getCondition()))
1554  return true;
1555 
1556  // Likewise, the loop latch must be dominated by the exiting BB.
1557  if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1558  return true;
1559 
1560  return false;
1561  });
1562 
1563  if (ExitingBlocks.empty())
1564  return false;
1565 
1566  // Get a symbolic upper bound on the loop backedge taken count.
1567  const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1568  if (isa<SCEVCouldNotCompute>(MaxExitCount))
1569  return false;
1570 
1571  // Visit our exit blocks in order of dominance. We know from the fact that
1572  // all exits must dominate the latch, so there is a total dominance order
1573  // between them.
1574  llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1575  // std::sort sorts in ascending order, so we want the inverse of
1576  // the normal dominance relation.
1577  if (A == B) return false;
1578  if (DT->properlyDominates(A, B))
1579  return true;
1580  else {
1581  assert(DT->properlyDominates(B, A) &&
1582  "expected total dominance order!");
1583  return false;
1584  }
1585  });
1586 #ifdef ASSERT
1587  for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1588  assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1589  }
1590 #endif
1591 
1592  bool Changed = false;
1593  bool SkipLastIter = false;
1594  SmallSet<const SCEV*, 8> DominatingExitCounts;
1595  for (BasicBlock *ExitingBB : ExitingBlocks) {
1596  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1597  if (isa<SCEVCouldNotCompute>(ExitCount)) {
1598  // Okay, we do not know the exit count here. Can we at least prove that it
1599  // will remain the same within iteration space?
1600  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1601  auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1603  L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1604  Rewriter, DeadInsts);
1605  };
1606 
1607  // TODO: We might have proved that we can skip the last iteration for
1608  // this check. In this case, we only want to check the condition on the
1609  // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1610  // corner case:
1611  //
1612  // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1613  //
1614  // If we could not prove that len != 0, then we also could not prove that
1615  // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1616  // OptimizeCond will likely not prove anything for it, even if it could
1617  // prove the same fact for len.
1618  //
1619  // As a temporary solution, we query both last and pre-last iterations in
1620  // hope that we will be able to prove triviality for at least one of
1621  // them. We can stop querying MaxExitCount for this case once SCEV
1622  // understands that (MaxExitCount - 1) will not overflow here.
1623  if (OptimizeCond(false, false) || OptimizeCond(true, false))
1624  Changed = true;
1625  else if (SkipLastIter)
1626  if (OptimizeCond(false, true) || OptimizeCond(true, true))
1627  Changed = true;
1628  continue;
1629  }
1630 
1631  if (MaxExitCount == ExitCount)
1632  // If the loop has more than 1 iteration, all further checks will be
1633  // executed 1 iteration less.
1634  SkipLastIter = true;
1635 
1636  // If we know we'd exit on the first iteration, rewrite the exit to
1637  // reflect this. This does not imply the loop must exit through this
1638  // exit; there may be an earlier one taken on the first iteration.
1639  // We know that the backedge can't be taken, so we replace all
1640  // the header PHIs with values coming from the preheader.
1641  if (ExitCount->isZero()) {
1642  foldExit(L, ExitingBB, true, DeadInsts);
1644  Changed = true;
1645  continue;
1646  }
1647 
1648  assert(ExitCount->getType()->isIntegerTy() &&
1649  MaxExitCount->getType()->isIntegerTy() &&
1650  "Exit counts must be integers");
1651 
1652  Type *WiderType =
1653  SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1654  ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1655  MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1656  assert(MaxExitCount->getType() == ExitCount->getType());
1657 
1658  // Can we prove that some other exit must be taken strictly before this
1659  // one?
1660  if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1661  MaxExitCount, ExitCount)) {
1662  foldExit(L, ExitingBB, false, DeadInsts);
1663  Changed = true;
1664  continue;
1665  }
1666 
1667  // As we run, keep track of which exit counts we've encountered. If we
1668  // find a duplicate, we've found an exit which would have exited on the
1669  // exiting iteration, but (from the visit order) strictly follows another
1670  // which does the same and is thus dead.
1671  if (!DominatingExitCounts.insert(ExitCount).second) {
1672  foldExit(L, ExitingBB, false, DeadInsts);
1673  Changed = true;
1674  continue;
1675  }
1676 
1677  // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1678  // here. If we kept track of the min of dominanting exits so far, we could
1679  // discharge exits with EC >= MDEC. This is less powerful than the existing
1680  // transform (since later exits aren't considered), but potentially more
1681  // powerful for any case where SCEV can prove a >=u b, but neither a == b
1682  // or a >u b. Such a case is not currently known.
1683  }
1684  return Changed;
1685 }
1686 
1687 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1688  SmallVector<BasicBlock*, 16> ExitingBlocks;
1689  L->getExitingBlocks(ExitingBlocks);
1690 
1691  // Finally, see if we can rewrite our exit conditions into a loop invariant
1692  // form. If we have a read-only loop, and we can tell that we must exit down
1693  // a path which does not need any of the values computed within the loop, we
1694  // can rewrite the loop to exit on the first iteration. Note that this
1695  // doesn't either a) tell us the loop exits on the first iteration (unless
1696  // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1697  // This transformation looks a lot like a restricted form of dead loop
1698  // elimination, but restricted to read-only loops and without neccesssarily
1699  // needing to kill the loop entirely.
1700  if (!LoopPredication)
1701  return false;
1702 
1703  // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1704  // through *explicit* control flow. We have to eliminate the possibility of
1705  // implicit exits (see below) before we know it's truly exact.
1706  const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1707  if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
1708  return false;
1709 
1710  assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1711  assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1712 
1713  auto BadExit = [&](BasicBlock *ExitingBB) {
1714  // If our exiting block exits multiple loops, we can only rewrite the
1715  // innermost one. Otherwise, we're changing how many times the innermost
1716  // loop runs before it exits.
1717  if (LI->getLoopFor(ExitingBB) != L)
1718  return true;
1719 
1720  // Can't rewrite non-branch yet.
1721  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1722  if (!BI)
1723  return true;
1724 
1725  // If already constant, nothing to do.
1726  if (isa<Constant>(BI->getCondition()))
1727  return true;
1728 
1729  // If the exit block has phis, we need to be able to compute the values
1730  // within the loop which contains them. This assumes trivially lcssa phis
1731  // have already been removed; TODO: generalize
1732  BasicBlock *ExitBlock =
1733  BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1734  if (!ExitBlock->phis().empty())
1735  return true;
1736 
1737  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1738  if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
1739  return true;
1740 
1741  assert(SE->isLoopInvariant(ExitCount, L) &&
1742  "Exit count must be loop invariant");
1743  assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1744  return false;
1745  };
1746 
1747  // If we have any exits which can't be predicated themselves, than we can't
1748  // predicate any exit which isn't guaranteed to execute before it. Consider
1749  // two exits (a) and (b) which would both exit on the same iteration. If we
1750  // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1751  // we could convert a loop from exiting through (a) to one exiting through
1752  // (b). Note that this problem exists only for exits with the same exit
1753  // count, and we could be more aggressive when exit counts are known inequal.
1754  llvm::sort(ExitingBlocks,
1755  [&](BasicBlock *A, BasicBlock *B) {
1756  // std::sort sorts in ascending order, so we want the inverse of
1757  // the normal dominance relation, plus a tie breaker for blocks
1758  // unordered by dominance.
1759  if (DT->properlyDominates(A, B)) return true;
1760  if (DT->properlyDominates(B, A)) return false;
1761  return A->getName() < B->getName();
1762  });
1763  // Check to see if our exit blocks are a total order (i.e. a linear chain of
1764  // exits before the backedge). If they aren't, reasoning about reachability
1765  // is complicated and we choose not to for now.
1766  for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1767  if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1768  return false;
1769 
1770  // Given our sorted total order, we know that exit[j] must be evaluated
1771  // after all exit[i] such j > i.
1772  for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1773  if (BadExit(ExitingBlocks[i])) {
1774  ExitingBlocks.resize(i);
1775  break;
1776  }
1777 
1778  if (ExitingBlocks.empty())
1779  return false;
1780 
1781  // We rely on not being able to reach an exiting block on a later iteration
1782  // then it's statically compute exit count. The implementaton of
1783  // getExitCount currently has this invariant, but assert it here so that
1784  // breakage is obvious if this ever changes..
1785  assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1786  return DT->dominates(ExitingBB, L->getLoopLatch());
1787  }));
1788 
1789  // At this point, ExitingBlocks consists of only those blocks which are
1790  // predicatable. Given that, we know we have at least one exit we can
1791  // predicate if the loop is doesn't have side effects and doesn't have any
1792  // implicit exits (because then our exact BTC isn't actually exact).
1793  // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1794  // suggestions on how to improve this? I can obviously bail out for outer
1795  // loops, but that seems less than ideal. MemorySSA can find memory writes,
1796  // is that enough for *all* side effects?
1797  for (BasicBlock *BB : L->blocks())
1798  for (auto &I : *BB)
1799  // TODO:isGuaranteedToTransfer
1800  if (I.mayHaveSideEffects())
1801  return false;
1802 
1803  bool Changed = false;
1804  // Finally, do the actual predication for all predicatable blocks. A couple
1805  // of notes here:
1806  // 1) We don't bother to constant fold dominated exits with identical exit
1807  // counts; that's simply a form of CSE/equality propagation and we leave
1808  // it for dedicated passes.
1809  // 2) We insert the comparison at the branch. Hoisting introduces additional
1810  // legality constraints and we leave that to dedicated logic. We want to
1811  // predicate even if we can't insert a loop invariant expression as
1812  // peeling or unrolling will likely reduce the cost of the otherwise loop
1813  // varying check.
1814  Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1816  Value *ExactBTCV = nullptr; // Lazily generated if needed.
1817  for (BasicBlock *ExitingBB : ExitingBlocks) {
1818  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1819 
1820  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1821  Value *NewCond;
1822  if (ExitCount == ExactBTC) {
1823  NewCond = L->contains(BI->getSuccessor(0)) ?
1824  B.getFalse() : B.getTrue();
1825  } else {
1826  Value *ECV = Rewriter.expandCodeFor(ExitCount);
1827  if (!ExactBTCV)
1828  ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1829  Value *RHS = ExactBTCV;
1830  if (ECV->getType() != RHS->getType()) {
1831  Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1832  ECV = B.CreateZExt(ECV, WiderTy);
1833  RHS = B.CreateZExt(RHS, WiderTy);
1834  }
1835  auto Pred = L->contains(BI->getSuccessor(0)) ?
1837  NewCond = B.CreateICmp(Pred, ECV, RHS);
1838  }
1839  Value *OldCond = BI->getCondition();
1840  BI->setCondition(NewCond);
1841  if (OldCond->use_empty())
1842  DeadInsts.emplace_back(OldCond);
1843  Changed = true;
1844  }
1845 
1846  return Changed;
1847 }
1848 
1849 //===----------------------------------------------------------------------===//
1850 // IndVarSimplify driver. Manage several subpasses of IV simplification.
1851 //===----------------------------------------------------------------------===//
1852 
1853 bool IndVarSimplify::run(Loop *L) {
1854  // We need (and expect!) the incoming loop to be in LCSSA.
1855  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1856  "LCSSA required to run indvars!");
1857 
1858  // If LoopSimplify form is not available, stay out of trouble. Some notes:
1859  // - LSR currently only supports LoopSimplify-form loops. Indvars'
1860  // canonicalization can be a pessimization without LSR to "clean up"
1861  // afterwards.
1862  // - We depend on having a preheader; in particular,
1863  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1864  // and we're in trouble if we can't find the induction variable even when
1865  // we've manually inserted one.
1866  // - LFTR relies on having a single backedge.
1867  if (!L->isLoopSimplifyForm())
1868  return false;
1869 
1870 #ifndef NDEBUG
1871  // Used below for a consistency check only
1872  // Note: Since the result returned by ScalarEvolution may depend on the order
1873  // in which previous results are added to its cache, the call to
1874  // getBackedgeTakenCount() may change following SCEV queries.
1875  const SCEV *BackedgeTakenCount;
1876  if (VerifyIndvars)
1877  BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1878 #endif
1879 
1880  bool Changed = false;
1881  // If there are any floating-point recurrences, attempt to
1882  // transform them to use integer recurrences.
1883  Changed |= rewriteNonIntegerIVs(L);
1884 
1885  // Create a rewriter object which we'll use to transform the code with.
1886  SCEVExpander Rewriter(*SE, DL, "indvars");
1887 #ifndef NDEBUG
1888  Rewriter.setDebugType(DEBUG_TYPE);
1889 #endif
1890 
1891  // Eliminate redundant IV users.
1892  //
1893  // Simplification works best when run before other consumers of SCEV. We
1894  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1895  // other expressions involving loop IVs have been evaluated. This helps SCEV
1896  // set no-wrap flags before normalizing sign/zero extension.
1897  Rewriter.disableCanonicalMode();
1898  Changed |= simplifyAndExtend(L, Rewriter, LI);
1899 
1900  // Check to see if we can compute the final value of any expressions
1901  // that are recurrent in the loop, and substitute the exit values from the
1902  // loop into any instructions outside of the loop that use the final values
1903  // of the current expressions.
1904  if (ReplaceExitValue != NeverRepl) {
1905  if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1906  ReplaceExitValue, DeadInsts)) {
1907  NumReplaced += Rewrites;
1908  Changed = true;
1909  }
1910  }
1911 
1912  // Eliminate redundant IV cycles.
1913  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1914 
1915  // Try to convert exit conditions to unsigned and rotate computation
1916  // out of the loop. Note: Handles invalidation internally if needed.
1917  Changed |= canonicalizeExitCondition(L);
1918 
1919  // Try to eliminate loop exits based on analyzeable exit counts
1920  if (optimizeLoopExits(L, Rewriter)) {
1921  Changed = true;
1922  // Given we've changed exit counts, notify SCEV
1923  // Some nested loops may share same folded exit basic block,
1924  // thus we need to notify top most loop.
1925  SE->forgetTopmostLoop(L);
1926  }
1927 
1928  // Try to form loop invariant tests for loop exits by changing how many
1929  // iterations of the loop run when that is unobservable.
1930  if (predicateLoopExits(L, Rewriter)) {
1931  Changed = true;
1932  // Given we've changed exit counts, notify SCEV
1933  SE->forgetLoop(L);
1934  }
1935 
1936  // If we have a trip count expression, rewrite the loop's exit condition
1937  // using it.
1938  if (!DisableLFTR) {
1939  BasicBlock *PreHeader = L->getLoopPreheader();
1940 
1941  SmallVector<BasicBlock*, 16> ExitingBlocks;
1942  L->getExitingBlocks(ExitingBlocks);
1943  for (BasicBlock *ExitingBB : ExitingBlocks) {
1944  // Can't rewrite non-branch yet.
1945  if (!isa<BranchInst>(ExitingBB->getTerminator()))
1946  continue;
1947 
1948  // If our exitting block exits multiple loops, we can only rewrite the
1949  // innermost one. Otherwise, we're changing how many times the innermost
1950  // loop runs before it exits.
1951  if (LI->getLoopFor(ExitingBB) != L)
1952  continue;
1953 
1954  if (!needsLFTR(L, ExitingBB))
1955  continue;
1956 
1957  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1958  if (isa<SCEVCouldNotCompute>(ExitCount))
1959  continue;
1960 
1961  // This was handled above, but as we form SCEVs, we can sometimes refine
1962  // existing ones; this allows exit counts to be folded to zero which
1963  // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1964  // until stable to handle cases like this better.
1965  if (ExitCount->isZero())
1966  continue;
1967 
1968  PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1969  if (!IndVar)
1970  continue;
1971 
1972  // Avoid high cost expansions. Note: This heuristic is questionable in
1973  // that our definition of "high cost" is not exactly principled.
1974  if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1975  TTI, PreHeader->getTerminator()))
1976  continue;
1977 
1978  // Check preconditions for proper SCEVExpander operation. SCEV does not
1979  // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1980  // any pass that uses the SCEVExpander must do it. This does not work
1981  // well for loop passes because SCEVExpander makes assumptions about
1982  // all loops, while LoopPassManager only forces the current loop to be
1983  // simplified.
1984  //
1985  // FIXME: SCEV expansion has no way to bail out, so the caller must
1986  // explicitly check any assumptions made by SCEV. Brittle.
1987  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1988  if (!AR || AR->getLoop()->getLoopPreheader())
1989  Changed |= linearFunctionTestReplace(L, ExitingBB,
1990  ExitCount, IndVar,
1991  Rewriter);
1992  }
1993  }
1994  // Clear the rewriter cache, because values that are in the rewriter's cache
1995  // can be deleted in the loop below, causing the AssertingVH in the cache to
1996  // trigger.
1997  Rewriter.clear();
1998 
1999  // Now that we're done iterating through lists, clean up any instructions
2000  // which are now dead.
2001  while (!DeadInsts.empty()) {
2002  Value *V = DeadInsts.pop_back_val();
2003 
2004  if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2005  Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2006  else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2007  Changed |=
2008  RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2009  }
2010 
2011  // The Rewriter may not be used from this point on.
2012 
2013  // Loop-invariant instructions in the preheader that aren't used in the
2014  // loop may be sunk below the loop to reduce register pressure.
2015  Changed |= sinkUnusedInvariants(L);
2016 
2017  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2018  // trip count and therefore can further simplify exit values in addition to
2019  // rewriteLoopExitValues.
2020  Changed |= rewriteFirstIterationLoopExitValues(L);
2021 
2022  // Clean up dead instructions.
2023  Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2024 
2025  // Check a post-condition.
2026  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2027  "Indvars did not preserve LCSSA!");
2028 
2029  // Verify that LFTR, and any other change have not interfered with SCEV's
2030  // ability to compute trip count. We may have *changed* the exit count, but
2031  // only by reducing it.
2032 #ifndef NDEBUG
2033  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2034  SE->forgetLoop(L);
2035  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2036  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2037  SE->getTypeSizeInBits(NewBECount->getType()))
2038  NewBECount = SE->getTruncateOrNoop(NewBECount,
2039  BackedgeTakenCount->getType());
2040  else
2041  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2042  NewBECount->getType());
2043  assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2044  NewBECount) && "indvars must preserve SCEV");
2045  }
2046  if (VerifyMemorySSA && MSSAU)
2047  MSSAU->getMemorySSA()->verifyMemorySSA();
2048 #endif
2049 
2050  return Changed;
2051 }
2052 
2055  LPMUpdater &) {
2056  Function *F = L.getHeader()->getParent();
2057  const DataLayout &DL = F->getParent()->getDataLayout();
2058 
2059  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2060  WidenIndVars && AllowIVWidening);
2061  if (!IVS.run(&L))
2062  return PreservedAnalyses::all();
2063 
2064  auto PA = getLoopPassPreservedAnalyses();
2065  PA.preserveSet<CFGAnalyses>();
2066  if (AR.MSSA)
2067  PA.preserve<MemorySSAAnalysis>();
2068  return PA;
2069 }
2070 
2071 namespace {
2072 
2073 struct IndVarSimplifyLegacyPass : public LoopPass {
2074  static char ID; // Pass identification, replacement for typeid
2075 
2076  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2078  }
2079 
2080  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2081  if (skipLoop(L))
2082  return false;
2083 
2084  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2085  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2086  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2087  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2088  auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2089  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2090  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2091  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2092  auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2093  MemorySSA *MSSA = nullptr;
2094  if (MSSAAnalysis)
2095  MSSA = &MSSAAnalysis->getMSSA();
2096 
2097  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2098  return IVS.run(L);
2099  }
2100 
2101  void getAnalysisUsage(AnalysisUsage &AU) const override {
2102  AU.setPreservesCFG();
2105  }
2106 };
2107 
2108 } // end anonymous namespace
2109 
2111 
2112 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2113  "Induction Variable Simplification", false, false)
2115 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2116  "Induction Variable Simplification", false, false)
2117 
2119  return new IndVarSimplifyLegacyPass();
2120 }
mustExecuteUBIfPoisonOnPathTo
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
Definition: IndVarSimplify.cpp:765
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:734
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:517
llvm::rewriteLoopExitValues
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1252
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::createWideIV
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Definition: SimplifyIndVar.cpp:2043
llvm::WideIVInfo::WidestNativeType
Type * WidestNativeType
Definition: SimplifyIndVar.h:68
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:81
Optional.h
llvm::WeakTrackingVH
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
llvm::ScalarEvolution::getEffectiveSCEVType
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Definition: ScalarEvolution.cpp:4310
ConvertToSInt
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
Definition: IndVarSimplify.cpp:190
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode=true)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2617
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
ScalarEvolutionExpander.h
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::OnlyCheapRepl
@ OnlyCheapRepl
Definition: LoopUtils.h:436
llvm::initializeIndVarSimplifyLegacyPassPass
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
Simplification
Induction Variable Simplification
Definition: IndVarSimplify.cpp:2116
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
AlmostDeadIV
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test.
Definition: IndVarSimplify.cpp:844
llvm::SCEVNAryExpr::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition: ScalarEvolutionExpressions.h:217
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:727
IndVarSimplify.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
llvm::IRBuilder<>
hasConcreteDefImpl
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
Definition: IndVarSimplify.cpp:803
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3180
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1797
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ScalarEvolution::getTruncateExpr
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1204
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:144
llvm::ScalarEvolution::getTypeSizeInBits
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
Definition: ScalarEvolution.cpp:4300
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1741
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::NeverRepl
@ NeverRepl
Definition: LoopUtils.h:436
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:746
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
VerifyIndvars
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars. Has no " "effect in release builds. (Note: this adds additional SCEV " "queries potentially changing the analysis result)"))
Operator.h
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:749
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::ScalarEvolution::getTruncateOrZeroExtend
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4574
replaceExitCond
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1286
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:723
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2763
getLoopPhiForCounter
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
Definition: IndVarSimplify.cpp:660
Use.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::WideIVInfo
Collect information about induction variables that are used by sign/zero extend operations.
Definition: SimplifyIndVar.h:64
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RecursivelyDeleteDeadPHINode
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:627
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::mustTriggerUB
bool mustTriggerUB(const Instruction *I, const SmallSet< const Value *, 16 > &KnownPoison)
Return true if the given instruction must trigger undefined behavior when I is executed with any oper...
Definition: ValueTracking.cpp:5461
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:733
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Instruction.h
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
genLoopLimit
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
Definition: IndVarSimplify.cpp:972
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1607
indvars
indvars
Definition: IndVarSimplify.cpp:2115
llvm::APFloat::convertToInteger
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1104
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:645
llvm::ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations
Optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
Definition: ScalarEvolution.cpp:10583
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2760
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
Intrinsics.h
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:745
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
LoopPredication
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1360
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:732
llvm::NoHardUse
@ NoHardUse
Definition: LoopUtils.h:436
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::APFloatBase::opOK
@ opOK
Definition: APFloat.h:206
TargetLibraryInfo.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1639
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2849
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:42
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3185
llvm::WideIVInfo::IsSigned
bool IsSigned
Definition: SimplifyIndVar.h:71
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:355
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
APFloat.h
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1769
llvm::WideIVInfo::NarrowIV
PHINode * NarrowIV
Definition: SimplifyIndVar.h:65
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::createIndVarSimplifyPass
Pass * createIndVarSimplifyPass()
Definition: IndVarSimplify.cpp:2118
LoopUtils.h
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:722
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:725
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
PatternMatch.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2756
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3180
AllowIVWidening
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4407
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2774
llvm::simplifyUsersOfIV
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
Definition: SimplifyIndVar.cpp:916
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:475
BasicBlock.h
llvm::cl::opt< bool >
llvm::APFloat
Definition: APFloat.h:700
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
FindLoopCounter
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
Definition: IndVarSimplify.cpp:888
hasConcreteDef
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
Definition: IndVarSimplify.cpp:836
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:685
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
uint64_t
llvm::LoopPass
Definition: LoopPass.h:28
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:121
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
isLoopExitTestBasedOn
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
Definition: IndVarSimplify.cpp:698
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
optimizeLoopExitWithUnknownExitCount
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1333
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:2842
llvm::SCEVCheapExpansionBudget
cl::opt< unsigned > SCEVCheapExpansionBudget
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::IVVisitor
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
Definition: SimplifyIndVar.h:34
ArrayRef.h
llvm::AlwaysRepl
@ AlwaysRepl
Definition: LoopUtils.h:436
visitIVCast
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
Definition: IndVarSimplify.cpp:510
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:724
llvm::IndVarSimplifyPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: IndVarSimplify.cpp:2053
iterator_range.h
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:752
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:748
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
llvm::LoopInfo
Definition: LoopInfo.h:1086
DataLayout.h
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:178
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::Loop::makeLoopInvariant
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:70
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
uint32_t
Compiler.h
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
ConstantRange.h
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:660
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
ValueHandle.h
llvm::Init
Definition: Record.h:281
llvm::DataLayout::isLegalInteger
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:266
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13248
llvm::tgtok::IntVal
@ IntVal
Definition: TGLexer.h:64
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:731
Constant.h
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:193
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::ScalarEvolution::isKnownPredicateAt
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10437
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4524
isLoopCounter
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
Definition: IndVarSimplify.cpp:859
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:458
llvm::Value::user_iterator
user_iterator_impl< User > user_iterator
Definition: Value.h:390
replaceLoopPHINodesWithPreheaderValues
static void replaceLoopPHINodesWithPreheaderValues(Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1304
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2706
DisableLFTR
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: IndVarSimplify.cpp:89
Function.h
needsLFTR
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
Definition: IndVarSimplify.cpp:711
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1552
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4293
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::SCEV::isOne
bool isOne() const
Return true if the expression is a constant one.
Definition: ScalarEvolution.cpp:431
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:465
foldExit
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1294
llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:747
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
replaceWithInvariantCond
static void replaceWithInvariantCond(const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Definition: IndVarSimplify.cpp:1316
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:162
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:432
ReplaceExitValue
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1595
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::ScalarEvolution::getMinusOne
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
Definition: ScalarEvolution.h:648
SmallVector.h
llvm::SCEVNAryExpr::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition: ScalarEvolutionExpressions.h:221
User.h
llvm::SIToFPInst
This class represents a cast from signed integer to floating point.
Definition: Instructions.h:5014
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1404
Dominators.h
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:735
llvm::CastInst::getOpcode
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:676
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2780
TargetTransformInfo.h
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:726
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
DerivedTypes.h
llvm::SmallPtrSetImpl< Value * >
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:393
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::propagatesPoison
bool propagatesPoison(const Operator *I)
Return true if I yields poison or raises UB if any of its operands is poison.
Definition: ValueTracking.cpp:5354
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2454
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::makeMutableArrayRef
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:533
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:231
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:378
llvm::PatternMatch::m_BasicBlock
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:160
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:742
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
raw_ostream.h
BasicBlockUtils.h
Value.h
llvm::SCEV::isZero
bool isZero() const
Return true if the expression is a constant zero.
Definition: ScalarEvolution.cpp:425
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:57
InitializePasses.h
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9245
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3178
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
SimplifyIndVar.h
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:96
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:730
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
UsePostIncrementRanges
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37