LLVM  14.0.0git
LoopPredication.cpp
Go to the documentation of this file.
1 //===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The LoopPredication pass tries to convert loop variant range checks to loop
10 // invariant by widening checks across loop iterations. For example, it will
11 // convert
12 //
13 // for (i = 0; i < n; i++) {
14 // guard(i < len);
15 // ...
16 // }
17 //
18 // to
19 //
20 // for (i = 0; i < n; i++) {
21 // guard(n - 1 < len);
22 // ...
23 // }
24 //
25 // After this transformation the condition of the guard is loop invariant, so
26 // loop-unswitch can later unswitch the loop by this condition which basically
27 // predicates the loop by the widened condition:
28 //
29 // if (n - 1 < len)
30 // for (i = 0; i < n; i++) {
31 // ...
32 // }
33 // else
34 // deoptimize
35 //
36 // It's tempting to rely on SCEV here, but it has proven to be problematic.
37 // Generally the facts SCEV provides about the increment step of add
38 // recurrences are true if the backedge of the loop is taken, which implicitly
39 // assumes that the guard doesn't fail. Using these facts to optimize the
40 // guard results in a circular logic where the guard is optimized under the
41 // assumption that it never fails.
42 //
43 // For example, in the loop below the induction variable will be marked as nuw
44 // basing on the guard. Basing on nuw the guard predicate will be considered
45 // monotonic. Given a monotonic condition it's tempting to replace the induction
46 // variable in the condition with its value on the last iteration. But this
47 // transformation is not correct, e.g. e = 4, b = 5 breaks the loop.
48 //
49 // for (int i = b; i != e; i++)
50 // guard(i u< len)
51 //
52 // One of the ways to reason about this problem is to use an inductive proof
53 // approach. Given the loop:
54 //
55 // if (B(0)) {
56 // do {
57 // I = PHI(0, I.INC)
58 // I.INC = I + Step
59 // guard(G(I));
60 // } while (B(I));
61 // }
62 //
63 // where B(x) and G(x) are predicates that map integers to booleans, we want a
64 // loop invariant expression M such the following program has the same semantics
65 // as the above:
66 //
67 // if (B(0)) {
68 // do {
69 // I = PHI(0, I.INC)
70 // I.INC = I + Step
71 // guard(G(0) && M);
72 // } while (B(I));
73 // }
74 //
75 // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)
76 //
77 // Informal proof that the transformation above is correct:
78 //
79 // By the definition of guards we can rewrite the guard condition to:
80 // G(I) && G(0) && M
81 //
82 // Let's prove that for each iteration of the loop:
83 // G(0) && M => G(I)
84 // And the condition above can be simplified to G(Start) && M.
85 //
86 // Induction base.
87 // G(0) && M => G(0)
88 //
89 // Induction step. Assuming G(0) && M => G(I) on the subsequent
90 // iteration:
91 //
92 // B(I) is true because it's the backedge condition.
93 // G(I) is true because the backedge is guarded by this condition.
94 //
95 // So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step).
96 //
97 // Note that we can use anything stronger than M, i.e. any condition which
98 // implies M.
99 //
100 // When S = 1 (i.e. forward iterating loop), the transformation is supported
101 // when:
102 // * The loop has a single latch with the condition of the form:
103 // B(X) = latchStart + X <pred> latchLimit,
104 // where <pred> is u<, u<=, s<, or s<=.
105 // * The guard condition is of the form
106 // G(X) = guardStart + X u< guardLimit
107 //
108 // For the ult latch comparison case M is:
109 // forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit =>
110 // guardStart + X + 1 u< guardLimit
111 //
112 // The only way the antecedent can be true and the consequent can be false is
113 // if
114 // X == guardLimit - 1 - guardStart
115 // (and guardLimit is non-zero, but we won't use this latter fact).
116 // If X == guardLimit - 1 - guardStart then the second half of the antecedent is
117 // latchStart + guardLimit - 1 - guardStart u< latchLimit
118 // and its negation is
119 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
120 //
121 // In other words, if
122 // latchLimit u<= latchStart + guardLimit - 1 - guardStart
123 // then:
124 // (the ranges below are written in ConstantRange notation, where [A, B) is the
125 // set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
126 //
127 // forall X . guardStart + X u< guardLimit &&
128 // latchStart + X u< latchLimit =>
129 // guardStart + X + 1 u< guardLimit
130 // == forall X . guardStart + X u< guardLimit &&
131 // latchStart + X u< latchStart + guardLimit - 1 - guardStart =>
132 // guardStart + X + 1 u< guardLimit
133 // == forall X . (guardStart + X) in [0, guardLimit) &&
134 // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>
135 // (guardStart + X + 1) in [0, guardLimit)
136 // == forall X . X in [-guardStart, guardLimit - guardStart) &&
137 // X in [-latchStart, guardLimit - 1 - guardStart) =>
138 // X in [-guardStart - 1, guardLimit - guardStart - 1)
139 // == true
140 //
141 // So the widened condition is:
142 // guardStart u< guardLimit &&
143 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
144 // Similarly for ule condition the widened condition is:
145 // guardStart u< guardLimit &&
146 // latchStart + guardLimit - 1 - guardStart u> latchLimit
147 // For slt condition the widened condition is:
148 // guardStart u< guardLimit &&
149 // latchStart + guardLimit - 1 - guardStart s>= latchLimit
150 // For sle condition the widened condition is:
151 // guardStart u< guardLimit &&
152 // latchStart + guardLimit - 1 - guardStart s> latchLimit
153 //
154 // When S = -1 (i.e. reverse iterating loop), the transformation is supported
155 // when:
156 // * The loop has a single latch with the condition of the form:
157 // B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=.
158 // * The guard condition is of the form
159 // G(X) = X - 1 u< guardLimit
160 //
161 // For the ugt latch comparison case M is:
162 // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit
163 //
164 // The only way the antecedent can be true and the consequent can be false is if
165 // X == 1.
166 // If X == 1 then the second half of the antecedent is
167 // 1 u> latchLimit, and its negation is latchLimit u>= 1.
168 //
169 // So the widened condition is:
170 // guardStart u< guardLimit && latchLimit u>= 1.
171 // Similarly for sgt condition the widened condition is:
172 // guardStart u< guardLimit && latchLimit s>= 1.
173 // For uge condition the widened condition is:
174 // guardStart u< guardLimit && latchLimit u> 1.
175 // For sge condition the widened condition is:
176 // guardStart u< guardLimit && latchLimit s> 1.
177 //===----------------------------------------------------------------------===//
178 
180 #include "llvm/ADT/Statistic.h"
184 #include "llvm/Analysis/LoopInfo.h"
185 #include "llvm/Analysis/LoopPass.h"
186 #include "llvm/Analysis/MemorySSA.h"
190 #include "llvm/IR/Function.h"
191 #include "llvm/IR/GlobalValue.h"
192 #include "llvm/IR/IntrinsicInst.h"
193 #include "llvm/IR/Module.h"
194 #include "llvm/IR/PatternMatch.h"
195 #include "llvm/InitializePasses.h"
196 #include "llvm/Pass.h"
198 #include "llvm/Support/Debug.h"
199 #include "llvm/Transforms/Scalar.h"
204 
205 #define DEBUG_TYPE "loop-predication"
206 
207 STATISTIC(TotalConsidered, "Number of guards considered");
208 STATISTIC(TotalWidened, "Number of checks widened");
209 
210 using namespace llvm;
211 
212 static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
213  cl::Hidden, cl::init(true));
214 
215 static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop",
216  cl::Hidden, cl::init(true));
217 
218 static cl::opt<bool>
219  SkipProfitabilityChecks("loop-predication-skip-profitability-checks",
220  cl::Hidden, cl::init(false));
221 
222 // This is the scale factor for the latch probability. We use this during
223 // profitability analysis to find other exiting blocks that have a much higher
224 // probability of exiting the loop instead of loop exiting via latch.
225 // This value should be greater than 1 for a sane profitability check.
227  "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0),
228  cl::desc("scale factor for the latch probability. Value should be greater "
229  "than 1. Lower values are ignored"));
230 
232  "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
233  cl::desc("Whether or not we should predicate guards "
234  "expressed as widenable branches to deoptimize blocks"),
235  cl::init(true));
236 
237 namespace {
238 /// Represents an induction variable check:
239 /// icmp Pred, <induction variable>, <loop invariant limit>
240 struct LoopICmp {
241  ICmpInst::Predicate Pred;
242  const SCEVAddRecExpr *IV;
243  const SCEV *Limit;
244  LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
245  const SCEV *Limit)
246  : Pred(Pred), IV(IV), Limit(Limit) {}
247  LoopICmp() {}
248  void dump() {
249  dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
250  << ", Limit = " << *Limit << "\n";
251  }
252 };
253 
254 class LoopPredication {
255  AliasAnalysis *AA;
256  DominatorTree *DT;
257  ScalarEvolution *SE;
258  LoopInfo *LI;
259  MemorySSAUpdater *MSSAU;
260 
261  Loop *L;
262  const DataLayout *DL;
263  BasicBlock *Preheader;
264  LoopICmp LatchCheck;
265 
266  bool isSupportedStep(const SCEV* Step);
267  Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
268  Optional<LoopICmp> parseLoopLatchICmp();
269 
270  /// Return an insertion point suitable for inserting a safe to speculate
271  /// instruction whose only user will be 'User' which has operands 'Ops'. A
272  /// trivial result would be the at the User itself, but we try to return a
273  /// loop invariant location if possible.
274  Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
275  /// Same as above, *except* that this uses the SCEV definition of invariant
276  /// which is that an expression *can be made* invariant via SCEVExpander.
277  /// Thus, this version is only suitable for finding an insert point to be be
278  /// passed to SCEVExpander!
279  Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops);
280 
281  /// Return true if the value is known to produce a single fixed value across
282  /// all iterations on which it executes. Note that this does not imply
283  /// speculation safety. That must be established separately.
284  bool isLoopInvariantValue(const SCEV* S);
285 
286  Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
287  ICmpInst::Predicate Pred, const SCEV *LHS,
288  const SCEV *RHS);
289 
290  Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
291  Instruction *Guard);
292  Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck,
293  LoopICmp RangeCheck,
294  SCEVExpander &Expander,
295  Instruction *Guard);
296  Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
297  LoopICmp RangeCheck,
298  SCEVExpander &Expander,
299  Instruction *Guard);
300  unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
301  SCEVExpander &Expander, Instruction *Guard);
302  bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
303  bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
304  // If the loop always exits through another block in the loop, we should not
305  // predicate based on the latch check. For example, the latch check can be a
306  // very coarse grained check and there can be more fine grained exit checks
307  // within the loop.
308  bool isLoopProfitableToPredicate();
309 
310  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
311 
312 public:
314  LoopInfo *LI, MemorySSAUpdater *MSSAU)
315  : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};
316  bool runOnLoop(Loop *L);
317 };
318 
319 class LoopPredicationLegacyPass : public LoopPass {
320 public:
321  static char ID;
322  LoopPredicationLegacyPass() : LoopPass(ID) {
324  }
325 
326  void getAnalysisUsage(AnalysisUsage &AU) const override {
330  }
331 
332  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
333  if (skipLoop(L))
334  return false;
335  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
336  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
337  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
338  auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
339  std::unique_ptr<MemorySSAUpdater> MSSAU;
340  if (MSSAWP)
341  MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
342  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
343  LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
344  return LP.runOnLoop(L);
345  }
346 };
347 
349 } // end namespace
350 
351 INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
352  "Loop predication", false, false)
355 INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
357 
359  return new LoopPredicationLegacyPass();
360 }
361 
364  LPMUpdater &U) {
365  std::unique_ptr<MemorySSAUpdater> MSSAU;
366  if (AR.MSSA)
367  MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
368  LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,
369  MSSAU ? MSSAU.get() : nullptr);
370  if (!LP.runOnLoop(&L))
371  return PreservedAnalyses::all();
372 
373  auto PA = getLoopPassPreservedAnalyses();
374  if (AR.MSSA)
375  PA.preserve<MemorySSAAnalysis>();
376  return PA;
377 }
378 
380 LoopPredication::parseLoopICmp(ICmpInst *ICI) {
381  auto Pred = ICI->getPredicate();
382  auto *LHS = ICI->getOperand(0);
383  auto *RHS = ICI->getOperand(1);
384 
385  const SCEV *LHSS = SE->getSCEV(LHS);
386  if (isa<SCEVCouldNotCompute>(LHSS))
387  return None;
388  const SCEV *RHSS = SE->getSCEV(RHS);
389  if (isa<SCEVCouldNotCompute>(RHSS))
390  return None;
391 
392  // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV
393  if (SE->isLoopInvariant(LHSS, L)) {
394  std::swap(LHS, RHS);
395  std::swap(LHSS, RHSS);
396  Pred = ICmpInst::getSwappedPredicate(Pred);
397  }
398 
399  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS);
400  if (!AR || AR->getLoop() != L)
401  return None;
402 
403  return LoopICmp(Pred, AR, RHSS);
404 }
405 
406 Value *LoopPredication::expandCheck(SCEVExpander &Expander,
407  Instruction *Guard,
408  ICmpInst::Predicate Pred, const SCEV *LHS,
409  const SCEV *RHS) {
410  Type *Ty = LHS->getType();
411  assert(Ty == RHS->getType() && "expandCheck operands have different types?");
412 
413  if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
414  IRBuilder<> Builder(Guard);
415  if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
416  return Builder.getTrue();
417  if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
418  LHS, RHS))
419  return Builder.getFalse();
420  }
421 
422  Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS}));
423  Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS}));
424  IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
425  return Builder.CreateICmp(Pred, LHSV, RHSV);
426 }
427 
428 
429 // Returns true if its safe to truncate the IV to RangeCheckType.
430 // When the IV type is wider than the range operand type, we can still do loop
431 // predication, by generating SCEVs for the range and latch that are of the
432 // same type. We achieve this by generating a SCEV truncate expression for the
433 // latch IV. This is done iff truncation of the IV is a safe operation,
434 // without loss of information.
435 // Another way to achieve this is by generating a wider type SCEV for the
436 // range check operand, however, this needs a more involved check that
437 // operands do not overflow. This can lead to loss of information when the
438 // range operand is of the form: add i32 %offset, %iv. We need to prove that
439 // sext(x + y) is same as sext(x) + sext(y).
440 // This function returns true if we can safely represent the IV type in
441 // the RangeCheckType without loss of information.
443  ScalarEvolution &SE,
444  const LoopICmp LatchCheck,
445  Type *RangeCheckType) {
446  if (!EnableIVTruncation)
447  return false;
448  assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() >
449  DL.getTypeSizeInBits(RangeCheckType).getFixedSize() &&
450  "Expected latch check IV type to be larger than range check operand "
451  "type!");
452  // The start and end values of the IV should be known. This is to guarantee
453  // that truncating the wide type will not lose information.
454  auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
455  auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
456  if (!Limit || !Start)
457  return false;
458  // This check makes sure that the IV does not change sign during loop
459  // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
460  // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
461  // IV wraps around, and the truncation of the IV would lose the range of
462  // iterations between 2^32 and 2^64.
463  if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))
464  return false;
465  // The active bits should be less than the bits in the RangeCheckType. This
466  // guarantees that truncating the latch check to RangeCheckType is a safe
467  // operation.
468  auto RangeCheckTypeBitSize =
469  DL.getTypeSizeInBits(RangeCheckType).getFixedSize();
470  return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
471  Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
472 }
473 
474 
475 // Return an LoopICmp describing a latch check equivlent to LatchCheck but with
476 // the requested type if safe to do so. May involve the use of a new IV.
478  ScalarEvolution &SE,
479  const LoopICmp LatchCheck,
480  Type *RangeCheckType) {
481 
482  auto *LatchType = LatchCheck.IV->getType();
483  if (RangeCheckType == LatchType)
484  return LatchCheck;
485  // For now, bail out if latch type is narrower than range type.
486  if (DL.getTypeSizeInBits(LatchType).getFixedSize() <
487  DL.getTypeSizeInBits(RangeCheckType).getFixedSize())
488  return None;
489  if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
490  return None;
491  // We can now safely identify the truncated version of the IV and limit for
492  // RangeCheckType.
493  LoopICmp NewLatchCheck;
494  NewLatchCheck.Pred = LatchCheck.Pred;
495  NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
496  SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
497  if (!NewLatchCheck.IV)
498  return None;
499  NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
500  LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
501  << "can be represented as range check type:"
502  << *RangeCheckType << "\n");
503  LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");
504  LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");
505  return NewLatchCheck;
506 }
507 
508 bool LoopPredication::isSupportedStep(const SCEV* Step) {
509  return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
510 }
511 
512 Instruction *LoopPredication::findInsertPt(Instruction *Use,
513  ArrayRef<Value*> Ops) {
514  for (Value *Op : Ops)
515  if (!L->isLoopInvariant(Op))
516  return Use;
517  return Preheader->getTerminator();
518 }
519 
520 Instruction *LoopPredication::findInsertPt(Instruction *Use,
521  ArrayRef<const SCEV*> Ops) {
522  // Subtlety: SCEV considers things to be invariant if the value produced is
523  // the same across iterations. This is not the same as being able to
524  // evaluate outside the loop, which is what we actually need here.
525  for (const SCEV *Op : Ops)
526  if (!SE->isLoopInvariant(Op, L) ||
527  !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
528  return Use;
529  return Preheader->getTerminator();
530 }
531 
532 bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
533  // Handling expressions which produce invariant results, but *haven't* yet
534  // been removed from the loop serves two important purposes.
535  // 1) Most importantly, it resolves a pass ordering cycle which would
536  // otherwise need us to iteration licm, loop-predication, and either
537  // loop-unswitch or loop-peeling to make progress on examples with lots of
538  // predicable range checks in a row. (Since, in the general case, we can't
539  // hoist the length checks until the dominating checks have been discharged
540  // as we can't prove doing so is safe.)
541  // 2) As a nice side effect, this exposes the value of peeling or unswitching
542  // much more obviously in the IR. Otherwise, the cost modeling for other
543  // transforms would end up needing to duplicate all of this logic to model a
544  // check which becomes predictable based on a modeled peel or unswitch.
545  //
546  // The cost of doing so in the worst case is an extra fill from the stack in
547  // the loop to materialize the loop invariant test value instead of checking
548  // against the original IV which is presumable in a register inside the loop.
549  // Such cases are presumably rare, and hint at missing oppurtunities for
550  // other passes.
551 
552  if (SE->isLoopInvariant(S, L))
553  // Note: This the SCEV variant, so the original Value* may be within the
554  // loop even though SCEV has proven it is loop invariant.
555  return true;
556 
557  // Handle a particular important case which SCEV doesn't yet know about which
558  // shows up in range checks on arrays with immutable lengths.
559  // TODO: This should be sunk inside SCEV.
560  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
561  if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
562  if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
563  if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
564  LI->hasMetadata(LLVMContext::MD_invariant_load))
565  return true;
566  return false;
567 }
568 
569 Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
570  LoopICmp LatchCheck, LoopICmp RangeCheck,
571  SCEVExpander &Expander, Instruction *Guard) {
572  auto *Ty = RangeCheck.IV->getType();
573  // Generate the widened condition for the forward loop:
574  // guardStart u< guardLimit &&
575  // latchLimit <pred> guardLimit - 1 - guardStart + latchStart
576  // where <pred> depends on the latch condition predicate. See the file
577  // header comment for the reasoning.
578  // guardLimit - guardStart + latchStart - 1
579  const SCEV *GuardStart = RangeCheck.IV->getStart();
580  const SCEV *GuardLimit = RangeCheck.Limit;
581  const SCEV *LatchStart = LatchCheck.IV->getStart();
582  const SCEV *LatchLimit = LatchCheck.Limit;
583  // Subtlety: We need all the values to be *invariant* across all iterations,
584  // but we only need to check expansion safety for those which *aren't*
585  // already guaranteed to dominate the guard.
586  if (!isLoopInvariantValue(GuardStart) ||
587  !isLoopInvariantValue(GuardLimit) ||
588  !isLoopInvariantValue(LatchStart) ||
589  !isLoopInvariantValue(LatchLimit)) {
590  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
591  return None;
592  }
593  if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
594  !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
595  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
596  return None;
597  }
598 
599  // guardLimit - guardStart + latchStart - 1
600  const SCEV *RHS =
601  SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
602  SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
603  auto LimitCheckPred =
605 
606  LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");
607  LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");
608  LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");
609 
610  auto *LimitCheck =
611  expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);
612  auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
613  GuardStart, GuardLimit);
614  IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
615  return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
616 }
617 
618 Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
619  LoopICmp LatchCheck, LoopICmp RangeCheck,
620  SCEVExpander &Expander, Instruction *Guard) {
621  auto *Ty = RangeCheck.IV->getType();
622  const SCEV *GuardStart = RangeCheck.IV->getStart();
623  const SCEV *GuardLimit = RangeCheck.Limit;
624  const SCEV *LatchStart = LatchCheck.IV->getStart();
625  const SCEV *LatchLimit = LatchCheck.Limit;
626  // Subtlety: We need all the values to be *invariant* across all iterations,
627  // but we only need to check expansion safety for those which *aren't*
628  // already guaranteed to dominate the guard.
629  if (!isLoopInvariantValue(GuardStart) ||
630  !isLoopInvariantValue(GuardLimit) ||
631  !isLoopInvariantValue(LatchStart) ||
632  !isLoopInvariantValue(LatchLimit)) {
633  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
634  return None;
635  }
636  if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
637  !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
638  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
639  return None;
640  }
641  // The decrement of the latch check IV should be the same as the
642  // rangeCheckIV.
643  auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE);
644  if (RangeCheck.IV != PostDecLatchCheckIV) {
645  LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: "
646  << *PostDecLatchCheckIV
647  << " and RangeCheckIV: " << *RangeCheck.IV << "\n");
648  return None;
649  }
650 
651  // Generate the widened condition for CountDownLoop:
652  // guardStart u< guardLimit &&
653  // latchLimit <pred> 1.
654  // See the header comment for reasoning of the checks.
655  auto LimitCheckPred =
657  auto *FirstIterationCheck = expandCheck(Expander, Guard,
659  GuardStart, GuardLimit);
660  auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
661  SE->getOne(Ty));
662  IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
663  return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
664 }
665 
667  LoopICmp& RC) {
668  // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the
669  // ULT/UGE form for ease of handling by our caller.
670  if (ICmpInst::isEquality(RC.Pred) &&
671  RC.IV->getStepRecurrence(*SE)->isOne() &&
672  SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
673  RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?
675 }
676 
677 
678 /// If ICI can be widened to a loop invariant condition emits the loop
679 /// invariant condition in the loop preheader and return it, otherwise
680 /// returns None.
681 Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
682  SCEVExpander &Expander,
683  Instruction *Guard) {
684  LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
685  LLVM_DEBUG(ICI->dump());
686 
687  // parseLoopStructure guarantees that the latch condition is:
688  // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=.
689  // We are looking for the range checks of the form:
690  // i u< guardLimit
691  auto RangeCheck = parseLoopICmp(ICI);
692  if (!RangeCheck) {
693  LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
694  return None;
695  }
696  LLVM_DEBUG(dbgs() << "Guard check:\n");
697  LLVM_DEBUG(RangeCheck->dump());
698  if (RangeCheck->Pred != ICmpInst::ICMP_ULT) {
699  LLVM_DEBUG(dbgs() << "Unsupported range check predicate("
700  << RangeCheck->Pred << ")!\n");
701  return None;
702  }
703  auto *RangeCheckIV = RangeCheck->IV;
704  if (!RangeCheckIV->isAffine()) {
705  LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n");
706  return None;
707  }
708  auto *Step = RangeCheckIV->getStepRecurrence(*SE);
709  // We cannot just compare with latch IV step because the latch and range IVs
710  // may have different types.
711  if (!isSupportedStep(Step)) {
712  LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");
713  return None;
714  }
715  auto *Ty = RangeCheckIV->getType();
716  auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
717  if (!CurrLatchCheckOpt) {
718  LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
719  "corresponding to range type: "
720  << *Ty << "\n");
721  return None;
722  }
723 
724  LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
725  // At this point, the range and latch step should have the same type, but need
726  // not have the same value (we support both 1 and -1 steps).
727  assert(Step->getType() ==
728  CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() &&
729  "Range and latch steps should be of same type!");
730  if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) {
731  LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n");
732  return None;
733  }
734 
735  if (Step->isOne())
736  return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
737  Expander, Guard);
738  else {
739  assert(Step->isAllOnesValue() && "Step should be -1!");
740  return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
741  Expander, Guard);
742  }
743 }
744 
745 unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
746  Value *Condition,
747  SCEVExpander &Expander,
748  Instruction *Guard) {
749  unsigned NumWidened = 0;
750  // The guard condition is expected to be in form of:
751  // cond1 && cond2 && cond3 ...
752  // Iterate over subconditions looking for icmp conditions which can be
753  // widened across loop iterations. Widening these conditions remember the
754  // resulting list of subconditions in Checks vector.
755  SmallVector<Value *, 4> Worklist(1, Condition);
756  SmallPtrSet<Value *, 4> Visited;
757  Value *WideableCond = nullptr;
758  do {
759  Value *Condition = Worklist.pop_back_val();
760  if (!Visited.insert(Condition).second)
761  continue;
762 
763  Value *LHS, *RHS;
764  using namespace llvm::PatternMatch;
765  if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
766  Worklist.push_back(LHS);
767  Worklist.push_back(RHS);
768  continue;
769  }
770 
771  if (match(Condition,
772  m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
773  // Pick any, we don't care which
774  WideableCond = Condition;
775  continue;
776  }
777 
778  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
779  if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
780  Guard)) {
781  Checks.push_back(NewRangeCheck.getValue());
782  NumWidened++;
783  continue;
784  }
785  }
786 
787  // Save the condition as is if we can't widen it
788  Checks.push_back(Condition);
789  } while (!Worklist.empty());
790  // At the moment, our matching logic for wideable conditions implicitly
791  // assumes we preserve the form: (br (and Cond, WC())). FIXME
792  // Note that if there were multiple calls to wideable condition in the
793  // traversal, we only need to keep one, and which one is arbitrary.
794  if (WideableCond)
795  Checks.push_back(WideableCond);
796  return NumWidened;
797 }
798 
799 bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
800  SCEVExpander &Expander) {
801  LLVM_DEBUG(dbgs() << "Processing guard:\n");
802  LLVM_DEBUG(Guard->dump());
803 
804  TotalConsidered++;
806  unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
807  Guard);
808  if (NumWidened == 0)
809  return false;
810 
811  TotalWidened += NumWidened;
812 
813  // Emit the new guard condition
814  IRBuilder<> Builder(findInsertPt(Guard, Checks));
815  Value *AllChecks = Builder.CreateAnd(Checks);
816  auto *OldCond = Guard->getOperand(0);
817  Guard->setOperand(0, AllChecks);
818  RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
819 
820  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
821  return true;
822 }
823 
824 bool LoopPredication::widenWidenableBranchGuardConditions(
825  BranchInst *BI, SCEVExpander &Expander) {
826  assert(isGuardAsWidenableBranch(BI) && "Must be!");
827  LLVM_DEBUG(dbgs() << "Processing guard:\n");
828  LLVM_DEBUG(BI->dump());
829 
830  TotalConsidered++;
832  unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
833  Expander, BI);
834  if (NumWidened == 0)
835  return false;
836 
837  TotalWidened += NumWidened;
838 
839  // Emit the new guard condition
840  IRBuilder<> Builder(findInsertPt(BI, Checks));
841  Value *AllChecks = Builder.CreateAnd(Checks);
842  auto *OldCond = BI->getCondition();
843  BI->setCondition(AllChecks);
844  RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
846  "Stopped being a guard after transform?");
847 
848  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
849  return true;
850 }
851 
852 Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
853  using namespace PatternMatch;
854 
855  BasicBlock *LoopLatch = L->getLoopLatch();
856  if (!LoopLatch) {
857  LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n");
858  return None;
859  }
860 
861  auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
862  if (!BI || !BI->isConditional()) {
863  LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
864  return None;
865  }
866  BasicBlock *TrueDest = BI->getSuccessor(0);
867  assert(
868  (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
869  "One of the latch's destinations must be the header");
870 
871  auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
872  if (!ICI) {
873  LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
874  return None;
875  }
876  auto Result = parseLoopICmp(ICI);
877  if (!Result) {
878  LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
879  return None;
880  }
881 
882  if (TrueDest != L->getHeader())
884 
885  // Check affine first, so if it's not we don't try to compute the step
886  // recurrence.
887  if (!Result->IV->isAffine()) {
888  LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n");
889  return None;
890  }
891 
892  auto *Step = Result->IV->getStepRecurrence(*SE);
893  if (!isSupportedStep(Step)) {
894  LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n");
895  return None;
896  }
897 
898  auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) {
899  if (Step->isOne()) {
900  return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT &&
901  Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
902  } else {
903  assert(Step->isAllOnesValue() && "Step should be -1!");
904  return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT &&
905  Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE;
906  }
907  };
908 
909  normalizePredicate(SE, L, *Result);
910  if (IsUnsupportedPredicate(Step, Result->Pred)) {
911  LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
912  << ")!\n");
913  return None;
914  }
915 
916  return Result;
917 }
918 
919 
920 bool LoopPredication::isLoopProfitableToPredicate() {
922  return true;
923 
925  L->getExitEdges(ExitEdges);
926  // If there is only one exiting edge in the loop, it is always profitable to
927  // predicate the loop.
928  if (ExitEdges.size() == 1)
929  return true;
930 
931  // Calculate the exiting probabilities of all exiting edges from the loop,
932  // starting with the LatchExitProbability.
933  // Heuristic for profitability: If any of the exiting blocks' probability of
934  // exiting the loop is larger than exiting through the latch block, it's not
935  // profitable to predicate the loop.
936  auto *LatchBlock = L->getLoopLatch();
937  assert(LatchBlock && "Should have a single latch at this point!");
938  auto *LatchTerm = LatchBlock->getTerminator();
939  assert(LatchTerm->getNumSuccessors() == 2 &&
940  "expected to be an exiting block with 2 succs!");
941  unsigned LatchBrExitIdx =
942  LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
943  // We compute branch probabilities without BPI. We do not rely on BPI since
944  // Loop predication is usually run in an LPM and BPI is only preserved
945  // lossily within loop pass managers, while BPI has an inherent notion of
946  // being complete for an entire function.
947 
948  // If the latch exits into a deoptimize or an unreachable block, do not
949  // predicate on that latch check.
950  auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);
951  if (isa<UnreachableInst>(LatchTerm) ||
952  LatchExitBlock->getTerminatingDeoptimizeCall())
953  return false;
954 
955  auto IsValidProfileData = [](MDNode *ProfileData, const Instruction *Term) {
956  if (!ProfileData || !ProfileData->getOperand(0))
957  return false;
958  if (MDString *MDS = dyn_cast<MDString>(ProfileData->getOperand(0)))
959  if (!MDS->getString().equals("branch_weights"))
960  return false;
961  if (ProfileData->getNumOperands() != 1 + Term->getNumSuccessors())
962  return false;
963  return true;
964  };
965  MDNode *LatchProfileData = LatchTerm->getMetadata(LLVMContext::MD_prof);
966  // Latch terminator has no valid profile data, so nothing to check
967  // profitability on.
968  if (!IsValidProfileData(LatchProfileData, LatchTerm))
969  return true;
970 
971  auto ComputeBranchProbability =
972  [&](const BasicBlock *ExitingBlock,
973  const BasicBlock *ExitBlock) -> BranchProbability {
974  auto *Term = ExitingBlock->getTerminator();
975  MDNode *ProfileData = Term->getMetadata(LLVMContext::MD_prof);
976  unsigned NumSucc = Term->getNumSuccessors();
977  if (IsValidProfileData(ProfileData, Term)) {
978  uint64_t Numerator = 0, Denominator = 0, ProfVal = 0;
979  for (unsigned i = 0; i < NumSucc; i++) {
980  ConstantInt *CI =
981  mdconst::extract<ConstantInt>(ProfileData->getOperand(i + 1));
982  ProfVal = CI->getValue().getZExtValue();
983  if (Term->getSuccessor(i) == ExitBlock)
984  Numerator += ProfVal;
985  Denominator += ProfVal;
986  }
987  return BranchProbability::getBranchProbability(Numerator, Denominator);
988  } else {
989  assert(LatchBlock != ExitingBlock &&
990  "Latch term should always have profile data!");
991  // No profile data, so we choose the weight as 1/num_of_succ(Src)
992  return BranchProbability::getBranchProbability(1, NumSucc);
993  }
994  };
995 
996  BranchProbability LatchExitProbability =
997  ComputeBranchProbability(LatchBlock, LatchExitBlock);
998 
999  // Protect against degenerate inputs provided by the user. Providing a value
1000  // less than one, can invert the definition of profitable loop predication.
1001  float ScaleFactor = LatchExitProbabilityScale;
1002  if (ScaleFactor < 1) {
1003  LLVM_DEBUG(
1004  dbgs()
1005  << "Ignored user setting for loop-predication-latch-probability-scale: "
1006  << LatchExitProbabilityScale << "\n");
1007  LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
1008  ScaleFactor = 1.0;
1009  }
1010  const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;
1011 
1012  for (const auto &ExitEdge : ExitEdges) {
1013  BranchProbability ExitingBlockProbability =
1014  ComputeBranchProbability(ExitEdge.first, ExitEdge.second);
1015  // Some exiting edge has higher probability than the latch exiting edge.
1016  // No longer profitable to predicate.
1017  if (ExitingBlockProbability > LatchProbabilityThreshold)
1018  return false;
1019  }
1020 
1021  // We have concluded that the most probable way to exit from the
1022  // loop is through the latch (or there's no profile information and all
1023  // exits are equally likely).
1024  return true;
1025 }
1026 
1027 /// If we can (cheaply) find a widenable branch which controls entry into the
1028 /// loop, return it.
1030  // Walk back through any unconditional executed blocks and see if we can find
1031  // a widenable condition which seems to control execution of this loop. Note
1032  // that we predict that maythrow calls are likely untaken and thus that it's
1033  // profitable to widen a branch before a maythrow call with a condition
1034  // afterwards even though that may cause the slow path to run in a case where
1035  // it wouldn't have otherwise.
1036  BasicBlock *BB = L->getLoopPreheader();
1037  if (!BB)
1038  return nullptr;
1039  do {
1040  if (BasicBlock *Pred = BB->getSinglePredecessor())
1041  if (BB == Pred->getSingleSuccessor()) {
1042  BB = Pred;
1043  continue;
1044  }
1045  break;
1046  } while (true);
1047 
1048  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1049  auto *Term = Pred->getTerminator();
1050 
1051  Value *Cond, *WC;
1052  BasicBlock *IfTrueBB, *IfFalseBB;
1053  if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) &&
1054  IfTrueBB == BB)
1055  return cast<BranchInst>(Term);
1056  }
1057  return nullptr;
1058 }
1059 
1060 /// Return the minimum of all analyzeable exit counts. This is an upper bound
1061 /// on the actual exit count. If there are not at least two analyzeable exits,
1062 /// returns SCEVCouldNotCompute.
1064  DominatorTree &DT,
1065  Loop *L) {
1066  SmallVector<BasicBlock *, 16> ExitingBlocks;
1067  L->getExitingBlocks(ExitingBlocks);
1068 
1069  SmallVector<const SCEV *, 4> ExitCounts;
1070  for (BasicBlock *ExitingBB : ExitingBlocks) {
1071  const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
1072  if (isa<SCEVCouldNotCompute>(ExitCount))
1073  continue;
1074  assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
1075  "We should only have known counts for exiting blocks that "
1076  "dominate latch!");
1077  ExitCounts.push_back(ExitCount);
1078  }
1079  if (ExitCounts.size() < 2)
1080  return SE.getCouldNotCompute();
1081  return SE.getUMinFromMismatchedTypes(ExitCounts);
1082 }
1083 
1084 /// This implements an analogous, but entirely distinct transform from the main
1085 /// loop predication transform. This one is phrased in terms of using a
1086 /// widenable branch *outside* the loop to allow us to simplify loop exits in a
1087 /// following loop. This is close in spirit to the IndVarSimplify transform
1088 /// of the same name, but is materially different widening loosens legality
1089 /// sharply.
1090 bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1091  // The transformation performed here aims to widen a widenable condition
1092  // above the loop such that all analyzeable exit leading to deopt are dead.
1093  // It assumes that the latch is the dominant exit for profitability and that
1094  // exits branching to deoptimizing blocks are rarely taken. It relies on the
1095  // semantics of widenable expressions for legality. (i.e. being able to fall
1096  // down the widenable path spuriously allows us to ignore exit order,
1097  // unanalyzeable exits, side effects, exceptional exits, and other challenges
1098  // which restrict the applicability of the non-WC based version of this
1099  // transform in IndVarSimplify.)
1100  //
1101  // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may
1102  // imply flags on the expression being hoisted and inserting new uses (flags
1103  // are only correct for current uses). The result is that we may be
1104  // inserting a branch on the value which can be either poison or undef. In
1105  // this case, the branch can legally go either way; we just need to avoid
1106  // introducing UB. This is achieved through the use of the freeze
1107  // instruction.
1108 
1109  SmallVector<BasicBlock *, 16> ExitingBlocks;
1110  L->getExitingBlocks(ExitingBlocks);
1111 
1112  if (ExitingBlocks.empty())
1113  return false; // Nothing to do.
1114 
1115  auto *Latch = L->getLoopLatch();
1116  if (!Latch)
1117  return false;
1118 
1119  auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI);
1120  if (!WidenableBR)
1121  return false;
1122 
1123  const SCEV *LatchEC = SE->getExitCount(L, Latch);
1124  if (isa<SCEVCouldNotCompute>(LatchEC))
1125  return false; // profitability - want hot exit in analyzeable set
1126 
1127  // At this point, we have found an analyzeable latch, and a widenable
1128  // condition above the loop. If we have a widenable exit within the loop
1129  // (for which we can't compute exit counts), drop the ability to further
1130  // widen so that we gain ability to analyze it's exit count and perform this
1131  // transform. TODO: It'd be nice to know for sure the exit became
1132  // analyzeable after dropping widenability.
1133  bool ChangedLoop = false;
1134 
1135  for (auto *ExitingBB : ExitingBlocks) {
1136  if (LI->getLoopFor(ExitingBB) != L)
1137  continue;
1138 
1139  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1140  if (!BI)
1141  continue;
1142 
1143  Use *Cond, *WC;
1144  BasicBlock *IfTrueBB, *IfFalseBB;
1145  if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
1146  L->contains(IfTrueBB)) {
1147  WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
1148  ChangedLoop = true;
1149  }
1150  }
1151  if (ChangedLoop)
1152  SE->forgetLoop(L);
1153 
1154  // The use of umin(all analyzeable exits) instead of latch is subtle, but
1155  // important for profitability. We may have a loop which hasn't been fully
1156  // canonicalized just yet. If the exit we chose to widen is provably never
1157  // taken, we want the widened form to *also* be provably never taken. We
1158  // can't guarantee this as a current unanalyzeable exit may later become
1159  // analyzeable, but we can at least avoid the obvious cases.
1160  const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
1161  if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
1162  !SE->isLoopInvariant(MinEC, L) ||
1163  !isSafeToExpandAt(MinEC, WidenableBR, *SE))
1164  return ChangedLoop;
1165 
1166  // Subtlety: We need to avoid inserting additional uses of the WC. We know
1167  // that it can only have one transitive use at the moment, and thus moving
1168  // that use to just before the branch and inserting code before it and then
1169  // modifying the operand is legal.
1170  auto *IP = cast<Instruction>(WidenableBR->getCondition());
1171  // Here we unconditionally modify the IR, so after this point we should return
1172  // only `true`!
1173  IP->moveBefore(WidenableBR);
1174  if (MSSAU)
1175  if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
1176  MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
1178  Rewriter.setInsertPoint(IP);
1179  IRBuilder<> B(IP);
1180 
1181  bool InvalidateLoop = false;
1182  Value *MinECV = nullptr; // lazily generated if needed
1183  for (BasicBlock *ExitingBB : ExitingBlocks) {
1184  // If our exiting block exits multiple loops, we can only rewrite the
1185  // innermost one. Otherwise, we're changing how many times the innermost
1186  // loop runs before it exits.
1187  if (LI->getLoopFor(ExitingBB) != L)
1188  continue;
1189 
1190  // Can't rewrite non-branch yet.
1191  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1192  if (!BI)
1193  continue;
1194 
1195  // If already constant, nothing to do.
1196  if (isa<Constant>(BI->getCondition()))
1197  continue;
1198 
1199  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1200  if (isa<SCEVCouldNotCompute>(ExitCount) ||
1201  ExitCount->getType()->isPointerTy() ||
1202  !isSafeToExpandAt(ExitCount, WidenableBR, *SE))
1203  continue;
1204 
1205  const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1206  BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
1207  if (!ExitBB->getPostdominatingDeoptimizeCall())
1208  continue;
1209 
1210  /// Here we can be fairly sure that executing this exit will most likely
1211  /// lead to executing llvm.experimental.deoptimize.
1212  /// This is a profitability heuristic, not a legality constraint.
1213 
1214  // If we found a widenable exit condition, do two things:
1215  // 1) fold the widened exit test into the widenable condition
1216  // 2) fold the branch to untaken - avoids infinite looping
1217 
1218  Value *ECV = Rewriter.expandCodeFor(ExitCount);
1219  if (!MinECV)
1220  MinECV = Rewriter.expandCodeFor(MinEC);
1221  Value *RHS = MinECV;
1222  if (ECV->getType() != RHS->getType()) {
1223  Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1224  ECV = B.CreateZExt(ECV, WiderTy);
1225  RHS = B.CreateZExt(RHS, WiderTy);
1226  }
1227  assert(!Latch || DT->dominates(ExitingBB, Latch));
1228  Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS);
1229  // Freeze poison or undef to an arbitrary bit pattern to ensure we can
1230  // branch without introducing UB. See NOTE ON POISON/UNDEF above for
1231  // context.
1232  NewCond = B.CreateFreeze(NewCond);
1233 
1234  widenWidenableBranch(WidenableBR, NewCond);
1235 
1236  Value *OldCond = BI->getCondition();
1237  BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
1238  InvalidateLoop = true;
1239  }
1240 
1241  if (InvalidateLoop)
1242  // We just mutated a bunch of loop exits changing there exit counts
1243  // widely. We need to force recomputation of the exit counts given these
1244  // changes. Note that all of the inserted exits are never taken, and
1245  // should be removed next time the CFG is modified.
1246  SE->forgetLoop(L);
1247 
1248  // Always return `true` since we have moved the WidenableBR's condition.
1249  return true;
1250 }
1251 
1252 bool LoopPredication::runOnLoop(Loop *Loop) {
1253  L = Loop;
1254 
1255  LLVM_DEBUG(dbgs() << "Analyzing ");
1256  LLVM_DEBUG(L->dump());
1257 
1258  Module *M = L->getHeader()->getModule();
1259 
1260  // There is nothing to do if the module doesn't use guards
1261  auto *GuardDecl =
1262  M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
1263  bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
1264  auto *WCDecl = M->getFunction(
1265  Intrinsic::getName(Intrinsic::experimental_widenable_condition));
1266  bool HasWidenableConditions =
1267  PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();
1268  if (!HasIntrinsicGuards && !HasWidenableConditions)
1269  return false;
1270 
1271  DL = &M->getDataLayout();
1272 
1273  Preheader = L->getLoopPreheader();
1274  if (!Preheader)
1275  return false;
1276 
1277  auto LatchCheckOpt = parseLoopLatchICmp();
1278  if (!LatchCheckOpt)
1279  return false;
1280  LatchCheck = *LatchCheckOpt;
1281 
1282  LLVM_DEBUG(dbgs() << "Latch check:\n");
1283  LLVM_DEBUG(LatchCheck.dump());
1284 
1285  if (!isLoopProfitableToPredicate()) {
1286  LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n");
1287  return false;
1288  }
1289  // Collect all the guards into a vector and process later, so as not
1290  // to invalidate the instruction iterator.
1292  SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;
1293  for (const auto BB : L->blocks()) {
1294  for (auto &I : *BB)
1295  if (isGuard(&I))
1296  Guards.push_back(cast<IntrinsicInst>(&I));
1298  isGuardAsWidenableBranch(BB->getTerminator()))
1299  GuardsAsWidenableBranches.push_back(
1300  cast<BranchInst>(BB->getTerminator()));
1301  }
1302 
1303  SCEVExpander Expander(*SE, *DL, "loop-predication");
1304  bool Changed = false;
1305  for (auto *Guard : Guards)
1306  Changed |= widenGuardConditions(Guard, Expander);
1307  for (auto *Guard : GuardsAsWidenableBranches)
1308  Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
1309  Changed |= predicateLoopExits(L, Expander);
1310 
1311  if (MSSAU && VerifyMemorySSA)
1312  MSSAU->getMemorySSA()->verifyMemorySSA();
1313  return Changed;
1314 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:523
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:851
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
generateLoopLatchCheck
static Optional< LoopICmp > generateLoopLatchCheck(const DataLayout &DL, ScalarEvolution &SE, const LoopICmp LatchCheck, Type *RangeCheckType)
Definition: LoopPredication.cpp:477
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::SCEV::isAllOnesValue
bool isAllOnesValue() const
Return true if the expression is a constant all-ones value.
Definition: ScalarEvolution.cpp:419
ScalarEvolutionExpander.h
Scalar.h
MemorySSAUpdater.h
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4813
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:65
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:835
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ScalarEvolution::getTruncateExpr
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1184
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.h:277
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::CmpInst::getFlippedStrictnessPredicate
Predicate getFlippedStrictnessPredicate() const
For predicate of kind "is X or equal to 0" returns the predicate "is X".
Definition: InstrTypes.h:917
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:748
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1280
Module.h
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::LoopBase::getExitEdges
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:148
llvm::Optional
Definition: APInt.h:33
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
llvm::SmallPtrSet< Value *, 4 >
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:751
getMinAnalyzeableBackedgeTakenCount
static const SCEV * getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE, DominatorTree &DT, Loop *L)
Return the minimum of all analyzeable exit counts.
Definition: LoopPredication.cpp:1063
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::LoopPredicationPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopPredication.cpp:362
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1143
AliasAnalysis.h
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::initializeLoopPredicationLegacyPassPass
void initializeLoopPredicationLegacyPassPass(PassRegistry &)
GlobalValue.h
EnableIVTruncation
static cl::opt< bool > EnableIVTruncation("loop-predication-enable-iv-truncation", cl::Hidden, cl::init(true))
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::BranchProbabilityInfoWrapperPass
Legacy analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:440
llvm::User
Definition: User.h:44
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:747
LoopPredication
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
GuardUtils.h
llvm::ScalarEvolution::getUMinFromMismatchedTypes
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
Definition: ScalarEvolution.cpp:4377
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
IP
Definition: NVPTXLowerArgs.cpp:166
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
SkipProfitabilityChecks
static cl::opt< bool > SkipProfitabilityChecks("loop-predication-skip-profitability-checks", cl::Hidden, cl::init(false))
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:9994
llvm::Instruction
Definition: Instruction.h:45
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3169
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1460
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:75
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
PatternMatch.h
llvm::None
const NoneType None
Definition: None.h:23
llvm::BasicBlock::getPostdominatingDeoptimizeCall
const CallInst * getPostdominatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize that is present either in current ...
Definition: BasicBlock.cpp:204
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3164
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication", "Loop predication", false, false) INITIALIZE_PASS_END(LoopPredicationLegacyPass
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1137
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
STATISTIC
STATISTIC(TotalConsidered, "Number of guards considered")
BranchProbabilityInfo.h
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1185
uint64_t
llvm::LoopPass
Definition: LoopPass.h:27
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1100
llvm::ScalarEvolution::getMonotonicPredicateType
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
Definition: ScalarEvolution.cpp:10053
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::widenWidenableBranch
void widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond)
Given a branch we know is widenable (defined per Analysis/GuardUtils.h), widen it such that condition...
Definition: GuardUtils.cpp:82
llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:745
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
predication
loop predication
Definition: LoopPredication.cpp:355
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:931
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:750
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::parseWidenableBranch
bool parseWidenableBranch(const User *U, Value *&Condition, Value *&WidenableCondition, BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB)
If U is widenable branch looking like: cond = ...
Definition: GuardUtils.cpp:44
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:746
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::BranchProbability
Definition: BranchProbability.h:30
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::Loop::hasLoopInvariantOperands
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:70
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
normalizePredicate
static void normalizePredicate(ScalarEvolution *SE, Loop *L, LoopICmp &RC)
Definition: LoopPredication.cpp:666
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:364
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:873
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::isGuardAsWidenableBranch
bool isGuardAsWidenableBranch(const User *U)
Returns true iff U has semantics of a guard expressed in a form of a widenable conditional branch to ...
Definition: GuardUtils.cpp:29
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:793
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Function.h
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:530
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::SCEV::isOne
bool isOne() const
Return true if the expression is a constant one.
Definition: ScalarEvolution.cpp:413
llvm::ScalarEvolution::getCouldNotCompute
const SCEV * getCouldNotCompute()
Definition: ScalarEvolution.cpp:4015
llvm::Loop::dump
void dump() const
Definition: LoopInfo.cpp:670
llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:749
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
isSafeToTruncateWideIVType
static bool isSafeToTruncateWideIVType(const DataLayout &DL, ScalarEvolution &SE, const LoopICmp LatchCheck, Type *RangeCheckType)
Definition: LoopPredication.cpp:442
GuardUtils.h
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
llvm::isSafeToExpandAt
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are d...
Definition: ScalarEvolutionExpander.cpp:2687
llvm::createLoopPredicationPass
Pass * createLoopPredicationPass()
Definition: LoopPredication.cpp:358
PredicateWidenableBranchGuards
static cl::opt< bool > PredicateWidenableBranchGuards("loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden, cl::desc("Whether or not we should predicate guards " "expressed as widenable branches to deoptimize blocks"), cl::init(true))
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:744
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:811
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:62
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl< Value * >
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:377
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3083
LoopPredication.h
FindWidenableTerminatorAboveLoop
static BranchInst * FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI)
If we can (cheaply) find a widenable branch which controls entry into the loop, return it.
Definition: LoopPredication.cpp:1029
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::ScalarEvolution::getExitCount
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
Definition: ScalarEvolution.cpp:7473
Debug.h
LatchExitProbabilityScale
static cl::opt< float > LatchExitProbabilityScale("loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0), cl::desc("scale factor for the latch probability. Value should be greater " "than 1. Lower values are ignored"))
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3162
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3176
EnableCountDownLoop
static cl::opt< bool > EnableCountDownLoop("loop-predication-enable-count-down-loop", cl::Hidden, cl::init(true))
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38