LLVM 22.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/Debug.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
93 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
98 "only replace exit value when the cost is cheap"),
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
119 cl::desc("Predicate conditions in read only loops"));
120
122 "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
123 cl::desc("Predicate conditions that trap in loops with only local writes"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
141 bool WidenIndVars;
142
143 bool RunUnswitching = false;
144
145 bool handleFloatingPointIV(Loop *L, PHINode *PH);
146 bool rewriteNonIntegerIVs(Loop *L);
147
148 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
149 /// Try to improve our exit conditions by converting condition from signed
150 /// to unsigned or rotating computation out of the loop.
151 /// (See inline comment about why this is duplicated from simplifyAndExtend)
152 bool canonicalizeExitCondition(Loop *L);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165public:
166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167 const DataLayout &DL, TargetLibraryInfo *TLI,
168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170 WidenIndVars(WidenIndVars) {
171 if (MSSA)
172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173 }
174
175 bool run(Loop *L);
176
177 bool runUnswitching() const { return RunUnswitching; }
178};
179
180} // end anonymous namespace
181
182//===----------------------------------------------------------------------===//
183// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
184//===----------------------------------------------------------------------===//
185
186/// Convert APF to an integer, if possible.
187static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
188 bool isExact = false;
189 // See if we can convert this to an int64_t
190 uint64_t UIntVal;
191 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
192 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
193 !isExact)
194 return false;
195 IntVal = UIntVal;
196 return true;
197}
198
199/// If the loop has floating induction variable then insert corresponding
200/// integer induction variable if possible.
201/// For example,
202/// for(double i = 0; i < 10000; ++i)
203/// bar(i)
204/// is converted into
205/// for(int i = 0; i < 10000; ++i)
206/// bar((double)i);
207bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
208 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
209 unsigned BackEdge = IncomingEdge^1;
210
211 // Check incoming value.
212 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
213
214 int64_t InitValue;
215 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
216 return false;
217
218 // Check IV increment. Reject this PN if increment operation is not
219 // an add or increment value can not be represented by an integer.
220 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
221 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
222
223 // If this is not an add of the PHI with a constantfp, or if the constant fp
224 // is not an integer, bail out.
225 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
226 int64_t IncValue;
227 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
228 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
229 return false;
230
231 // Check Incr uses. One user is PN and the other user is an exit condition
232 // used by the conditional terminator.
233 Value::user_iterator IncrUse = Incr->user_begin();
234 Instruction *U1 = cast<Instruction>(*IncrUse++);
235 if (IncrUse == Incr->user_end()) return false;
236 Instruction *U2 = cast<Instruction>(*IncrUse++);
237 if (IncrUse != Incr->user_end()) return false;
238
239 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
240 // only used by a branch, we can't transform it.
241 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
242 if (!Compare)
244 if (!Compare || !Compare->hasOneUse() ||
245 !isa<BranchInst>(Compare->user_back()))
246 return false;
247
248 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
249
250 // We need to verify that the branch actually controls the iteration count
251 // of the loop. If not, the new IV can overflow and no one will notice.
252 // The branch block must be in the loop and one of the successors must be out
253 // of the loop.
254 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
255 if (!L->contains(TheBr->getParent()) ||
256 (L->contains(TheBr->getSuccessor(0)) &&
257 L->contains(TheBr->getSuccessor(1))))
258 return false;
259
260 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
261 // transform it.
262 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
263 int64_t ExitValue;
264 if (ExitValueVal == nullptr ||
265 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
266 return false;
267
268 // Find new predicate for integer comparison.
270 switch (Compare->getPredicate()) {
271 default: return false; // Unknown comparison.
273 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
275 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
277 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
279 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
281 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
283 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
284 }
285
286 // We convert the floating point induction variable to a signed i32 value if
287 // we can. This is only safe if the comparison will not overflow in a way
288 // that won't be trapped by the integer equivalent operations. Check for this
289 // now.
290 // TODO: We could use i64 if it is native and the range requires it.
291
292 // The start/stride/exit values must all fit in signed i32.
293 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
294 return false;
295
296 // If not actually striding (add x, 0.0), avoid touching the code.
297 if (IncValue == 0)
298 return false;
299
300 // Positive and negative strides have different safety conditions.
301 if (IncValue > 0) {
302 // If we have a positive stride, we require the init to be less than the
303 // exit value.
304 if (InitValue >= ExitValue)
305 return false;
306
307 uint32_t Range = uint32_t(ExitValue-InitValue);
308 // Check for infinite loop, either:
309 // while (i <= Exit) or until (i > Exit)
310 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
311 if (++Range == 0) return false; // Range overflows.
312 }
313
314 unsigned Leftover = Range % uint32_t(IncValue);
315
316 // If this is an equality comparison, we require that the strided value
317 // exactly land on the exit value, otherwise the IV condition will wrap
318 // around and do things the fp IV wouldn't.
319 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
320 Leftover != 0)
321 return false;
322
323 // If the stride would wrap around the i32 before exiting, we can't
324 // transform the IV.
325 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
326 return false;
327 } else {
328 // If we have a negative stride, we require the init to be greater than the
329 // exit value.
330 if (InitValue <= ExitValue)
331 return false;
332
333 uint32_t Range = uint32_t(InitValue-ExitValue);
334 // Check for infinite loop, either:
335 // while (i >= Exit) or until (i < Exit)
336 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
337 if (++Range == 0) return false; // Range overflows.
338 }
339
340 unsigned Leftover = Range % uint32_t(-IncValue);
341
342 // If this is an equality comparison, we require that the strided value
343 // exactly land on the exit value, otherwise the IV condition will wrap
344 // around and do things the fp IV wouldn't.
345 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
346 Leftover != 0)
347 return false;
348
349 // If the stride would wrap around the i32 before exiting, we can't
350 // transform the IV.
351 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
352 return false;
353 }
354
355 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
356
357 // Insert new integer induction variable.
358 PHINode *NewPHI =
359 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
360 NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
361 PN->getIncomingBlock(IncomingEdge));
362 NewPHI->setDebugLoc(PN->getDebugLoc());
363
364 Instruction *NewAdd = BinaryOperator::CreateAdd(
365 NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
366 Incr->getName() + ".int", Incr->getIterator());
367 NewAdd->setDebugLoc(Incr->getDebugLoc());
368 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
369
370 ICmpInst *NewCompare = new ICmpInst(
371 TheBr->getIterator(), NewPred, NewAdd,
372 ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
373 NewCompare->setDebugLoc(Compare->getDebugLoc());
374
375 // In the following deletions, PN may become dead and may be deleted.
376 // Use a WeakTrackingVH to observe whether this happens.
377 WeakTrackingVH WeakPH = PN;
378
379 // Delete the old floating point exit comparison. The branch starts using the
380 // new comparison.
381 NewCompare->takeName(Compare);
382 Compare->replaceAllUsesWith(NewCompare);
383 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
384
385 // Delete the old floating point increment.
386 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
387 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
388
389 // If the FP induction variable still has uses, this is because something else
390 // in the loop uses its value. In order to canonicalize the induction
391 // variable, we chose to eliminate the IV and rewrite it in terms of an
392 // int->fp cast.
393 //
394 // We give preference to sitofp over uitofp because it is faster on most
395 // platforms.
396 if (WeakPH) {
397 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
398 PN->getParent()->getFirstInsertionPt());
399 Conv->setDebugLoc(PN->getDebugLoc());
400 PN->replaceAllUsesWith(Conv);
401 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
402 }
403 return true;
404}
405
406bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
407 // First step. Check to see if there are any floating-point recurrences.
408 // If there are, change them into integer recurrences, permitting analysis by
409 // the SCEV routines.
410 BasicBlock *Header = L->getHeader();
411
413
414 bool Changed = false;
415 for (WeakTrackingVH &PHI : PHIs)
416 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
417 Changed |= handleFloatingPointIV(L, PN);
418
419 // If the loop previously had floating-point IV, ScalarEvolution
420 // may not have been able to compute a trip count. Now that we've done some
421 // re-writing, the trip count may be computable.
422 if (Changed)
423 SE->forgetLoop(L);
424 return Changed;
425}
426
427//===---------------------------------------------------------------------===//
428// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
429// they will exit at the first iteration.
430//===---------------------------------------------------------------------===//
431
432/// Check to see if this loop has loop invariant conditions which lead to loop
433/// exits. If so, we know that if the exit path is taken, it is at the first
434/// loop iteration. This lets us predict exit values of PHI nodes that live in
435/// loop header.
436bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
437 // Verify the input to the pass is already in LCSSA form.
438 assert(L->isLCSSAForm(*DT));
439
440 SmallVector<BasicBlock *, 8> ExitBlocks;
441 L->getUniqueExitBlocks(ExitBlocks);
442
443 bool MadeAnyChanges = false;
444 for (auto *ExitBB : ExitBlocks) {
445 // If there are no more PHI nodes in this exit block, then no more
446 // values defined inside the loop are used on this path.
447 for (PHINode &PN : ExitBB->phis()) {
448 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
449 IncomingValIdx != E; ++IncomingValIdx) {
450 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
451
452 // Can we prove that the exit must run on the first iteration if it
453 // runs at all? (i.e. early exits are fine for our purposes, but
454 // traces which lead to this exit being taken on the 2nd iteration
455 // aren't.) Note that this is about whether the exit branch is
456 // executed, not about whether it is taken.
457 if (!L->getLoopLatch() ||
458 !DT->dominates(IncomingBB, L->getLoopLatch()))
459 continue;
460
461 // Get condition that leads to the exit path.
462 auto *TermInst = IncomingBB->getTerminator();
463
464 Value *Cond = nullptr;
465 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
466 // Must be a conditional branch, otherwise the block
467 // should not be in the loop.
468 Cond = BI->getCondition();
469 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
470 Cond = SI->getCondition();
471 else
472 continue;
473
474 if (!L->isLoopInvariant(Cond))
475 continue;
476
477 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
478
479 // Only deal with PHIs in the loop header.
480 if (!ExitVal || ExitVal->getParent() != L->getHeader())
481 continue;
482
483 // If ExitVal is a PHI on the loop header, then we know its
484 // value along this exit because the exit can only be taken
485 // on the first iteration.
486 auto *LoopPreheader = L->getLoopPreheader();
487 assert(LoopPreheader && "Invalid loop");
488 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
489 if (PreheaderIdx != -1) {
490 assert(ExitVal->getParent() == L->getHeader() &&
491 "ExitVal must be in loop header");
492 MadeAnyChanges = true;
493 PN.setIncomingValue(IncomingValIdx,
494 ExitVal->getIncomingValue(PreheaderIdx));
495 SE->forgetValue(&PN);
496 }
497 }
498 }
499 }
500 return MadeAnyChanges;
501}
502
503//===----------------------------------------------------------------------===//
504// IV Widening - Extend the width of an IV to cover its widest uses.
505//===----------------------------------------------------------------------===//
506
507/// Update information about the induction variable that is extended by this
508/// sign or zero extend operation. This is used to determine the final width of
509/// the IV before actually widening it.
510static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
511 ScalarEvolution *SE,
512 const TargetTransformInfo *TTI) {
513 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
514 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
515 return;
516
517 Type *Ty = Cast->getType();
518 uint64_t Width = SE->getTypeSizeInBits(Ty);
519 if (!Cast->getDataLayout().isLegalInteger(Width))
520 return;
521
522 // Check that `Cast` actually extends the induction variable (we rely on this
523 // later). This takes care of cases where `Cast` is extending a truncation of
524 // the narrow induction variable, and thus can end up being narrower than the
525 // "narrow" induction variable.
526 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
527 if (NarrowIVWidth >= Width)
528 return;
529
530 // Cast is either an sext or zext up to this point.
531 // We should not widen an indvar if arithmetics on the wider indvar are more
532 // expensive than those on the narrower indvar. We check only the cost of ADD
533 // because at least an ADD is required to increment the induction variable. We
534 // could compute more comprehensively the cost of all instructions on the
535 // induction variable when necessary.
536 if (TTI &&
537 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
538 TTI->getArithmeticInstrCost(Instruction::Add,
539 Cast->getOperand(0)->getType())) {
540 return;
541 }
542
543 if (!WI.WidestNativeType ||
544 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
546 WI.IsSigned = IsSigned;
547 return;
548 }
549
550 // We extend the IV to satisfy the sign of its user(s), or 'signed'
551 // if there are multiple users with both sign- and zero extensions,
552 // in order not to introduce nondeterministic behaviour based on the
553 // unspecified order of a PHI nodes' users-iterator.
554 WI.IsSigned |= IsSigned;
555}
556
557//===----------------------------------------------------------------------===//
558// Live IV Reduction - Minimize IVs live across the loop.
559//===----------------------------------------------------------------------===//
560
561//===----------------------------------------------------------------------===//
562// Simplification of IV users based on SCEV evaluation.
563//===----------------------------------------------------------------------===//
564
565namespace {
566
567class IndVarSimplifyVisitor : public IVVisitor {
568 ScalarEvolution *SE;
569 const TargetTransformInfo *TTI;
570 PHINode *IVPhi;
571
572public:
573 WideIVInfo WI;
574
575 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
576 const TargetTransformInfo *TTI,
577 const DominatorTree *DTree)
578 : SE(SCEV), TTI(TTI), IVPhi(IV) {
579 DT = DTree;
580 WI.NarrowIV = IVPhi;
581 }
582
583 // Implement the interface used by simplifyUsersOfIV.
584 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
585};
586
587} // end anonymous namespace
588
589/// Iteratively perform simplification on a worklist of IV users. Each
590/// successive simplification may push more users which may themselves be
591/// candidates for simplification.
592///
593/// Sign/Zero extend elimination is interleaved with IV simplification.
594bool IndVarSimplify::simplifyAndExtend(Loop *L,
595 SCEVExpander &Rewriter,
596 LoopInfo *LI) {
598
599 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
600 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
601 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
602
604 llvm::make_pointer_range(L->getHeader()->phis()));
605
606 // Each round of simplification iterates through the SimplifyIVUsers worklist
607 // for all current phis, then determines whether any IVs can be
608 // widened. Widening adds new phis to LoopPhis, inducing another round of
609 // simplification on the wide IVs.
610 bool Changed = false;
611 while (!LoopPhis.empty()) {
612 // Evaluate as many IV expressions as possible before widening any IVs. This
613 // forces SCEV to set no-wrap flags before evaluating sign/zero
614 // extension. The first time SCEV attempts to normalize sign/zero extension,
615 // the result becomes final. So for the most predictable results, we delay
616 // evaluation of sign/zero extend evaluation until needed, and avoid running
617 // other SCEV based analysis prior to simplifyAndExtend.
618 do {
619 PHINode *CurrIV = LoopPhis.pop_back_val();
620
621 // Information about sign/zero extensions of CurrIV.
622 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
623
624 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
625 Rewriter, &Visitor);
626
627 Changed |= C;
628 RunUnswitching |= U;
629 if (Visitor.WI.WidestNativeType) {
630 WideIVs.push_back(Visitor.WI);
631 }
632 } while(!LoopPhis.empty());
633
634 // Continue if we disallowed widening.
635 if (!WidenIndVars)
636 continue;
637
638 for (; !WideIVs.empty(); WideIVs.pop_back()) {
639 unsigned ElimExt;
640 unsigned Widened;
641 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
642 DT, DeadInsts, ElimExt, Widened,
643 HasGuards, UsePostIncrementRanges)) {
644 NumElimExt += ElimExt;
645 NumWidened += Widened;
646 Changed = true;
647 LoopPhis.push_back(WidePhi);
648 }
649 }
650 }
651 return Changed;
652}
653
654//===----------------------------------------------------------------------===//
655// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
656//===----------------------------------------------------------------------===//
657
658/// Given an Value which is hoped to be part of an add recurance in the given
659/// loop, return the associated Phi node if so. Otherwise, return null. Note
660/// that this is less general than SCEVs AddRec checking.
663 if (!IncI)
664 return nullptr;
665
666 switch (IncI->getOpcode()) {
667 case Instruction::Add:
668 case Instruction::Sub:
669 break;
670 case Instruction::GetElementPtr:
671 // An IV counter must preserve its type.
672 if (IncI->getNumOperands() == 2)
673 break;
674 [[fallthrough]];
675 default:
676 return nullptr;
677 }
678
679 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
680 if (Phi && Phi->getParent() == L->getHeader()) {
681 if (L->isLoopInvariant(IncI->getOperand(1)))
682 return Phi;
683 return nullptr;
684 }
685 if (IncI->getOpcode() == Instruction::GetElementPtr)
686 return nullptr;
687
688 // Allow add/sub to be commuted.
689 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
690 if (Phi && Phi->getParent() == L->getHeader()) {
691 if (L->isLoopInvariant(IncI->getOperand(0)))
692 return Phi;
693 }
694 return nullptr;
695}
696
697/// Whether the current loop exit test is based on this value. Currently this
698/// is limited to a direct use in the loop condition.
699static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
700 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
702 // TODO: Allow non-icmp loop test.
703 if (!ICmp)
704 return false;
705
706 // TODO: Allow indirect use.
707 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
708}
709
710/// linearFunctionTestReplace policy. Return true unless we can show that the
711/// current exit test is already sufficiently canonical.
712static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
713 assert(L->getLoopLatch() && "Must be in simplified form");
714
715 // Avoid converting a constant or loop invariant test back to a runtime
716 // test. This is critical for when SCEV's cached ExitCount is less precise
717 // than the current IR (such as after we've proven a particular exit is
718 // actually dead and thus the BE count never reaches our ExitCount.)
719 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
720 if (L->isLoopInvariant(BI->getCondition()))
721 return false;
722
723 // Do LFTR to simplify the exit condition to an ICMP.
725 if (!Cond)
726 return true;
727
728 // Do LFTR to simplify the exit ICMP to EQ/NE
729 ICmpInst::Predicate Pred = Cond->getPredicate();
730 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
731 return true;
732
733 // Look for a loop invariant RHS
734 Value *LHS = Cond->getOperand(0);
735 Value *RHS = Cond->getOperand(1);
736 if (!L->isLoopInvariant(RHS)) {
737 if (!L->isLoopInvariant(LHS))
738 return true;
739 std::swap(LHS, RHS);
740 }
741 // Look for a simple IV counter LHS
743 if (!Phi)
744 Phi = getLoopPhiForCounter(LHS, L);
745
746 if (!Phi)
747 return true;
748
749 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
750 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
751 if (Idx < 0)
752 return true;
753
754 // Do LFTR if the exit condition's IV is *not* a simple counter.
755 Value *IncV = Phi->getIncomingValue(Idx);
756 return Phi != getLoopPhiForCounter(IncV, L);
757}
758
759/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
760/// down to checking that all operands are constant and listing instructions
761/// that may hide undef.
763 unsigned Depth) {
764 if (isa<Constant>(V))
765 return !isa<UndefValue>(V);
766
767 if (Depth >= 6)
768 return false;
769
770 // Conservatively handle non-constant non-instructions. For example, Arguments
771 // may be undef.
773 if (!I)
774 return false;
775
776 // Load and return values may be undef.
777 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
778 return false;
779
780 // Optimistically handle other instructions.
781 for (Value *Op : I->operands()) {
782 if (!Visited.insert(Op).second)
783 continue;
784 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
785 return false;
786 }
787 return true;
788}
789
790/// Return true if the given value is concrete. We must prove that undef can
791/// never reach it.
792///
793/// TODO: If we decide that this is a good approach to checking for undef, we
794/// may factor it into a common location.
795static bool hasConcreteDef(Value *V) {
797 Visited.insert(V);
798 return hasConcreteDefImpl(V, Visited, 0);
799}
800
801/// Return true if the given phi is a "counter" in L. A counter is an
802/// add recurance (of integer or pointer type) with an arbitrary start, and a
803/// step of 1. Note that L must have exactly one latch.
804static bool isLoopCounter(PHINode* Phi, Loop *L,
805 ScalarEvolution *SE) {
806 assert(Phi->getParent() == L->getHeader());
807 assert(L->getLoopLatch());
808
809 if (!SE->isSCEVable(Phi->getType()))
810 return false;
811
812 const SCEV *S = SE->getSCEV(Phi);
814 return false;
815
816 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
817 Value *IncV = Phi->getIncomingValue(LatchIdx);
818 return (getLoopPhiForCounter(IncV, L) == Phi &&
819 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
820}
821
822/// Search the loop header for a loop counter (anadd rec w/step of one)
823/// suitable for use by LFTR. If multiple counters are available, select the
824/// "best" one based profitable heuristics.
825///
826/// BECount may be an i8* pointer type. The pointer difference is already
827/// valid count without scaling the address stride, so it remains a pointer
828/// expression as far as SCEV is concerned.
829static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
830 const SCEV *BECount,
832 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
833
834 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
835
836 // Loop over all of the PHI nodes, looking for a simple counter.
837 PHINode *BestPhi = nullptr;
838 const SCEV *BestInit = nullptr;
839 BasicBlock *LatchBlock = L->getLoopLatch();
840 assert(LatchBlock && "Must be in simplified form");
841 const DataLayout &DL = L->getHeader()->getDataLayout();
842
843 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
844 PHINode *Phi = cast<PHINode>(I);
845 if (!isLoopCounter(Phi, L, SE))
846 continue;
847
848 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
849
850 // AR may be a pointer type, while BECount is an integer type.
851 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
852 // AR may not be a narrower type, or we may never exit.
853 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
854 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
855 continue;
856
857 // Avoid reusing a potentially undef value to compute other values that may
858 // have originally had a concrete definition.
859 if (!hasConcreteDef(Phi)) {
860 // We explicitly allow unknown phis as long as they are already used by
861 // the loop exit test. This is legal since performing LFTR could not
862 // increase the number of undef users.
863 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
864 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
865 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
866 continue;
867 }
868
869 // Avoid introducing undefined behavior due to poison which didn't exist in
870 // the original program. (Annoyingly, the rules for poison and undef
871 // propagation are distinct, so this does NOT cover the undef case above.)
872 // We have to ensure that we don't introduce UB by introducing a use on an
873 // iteration where said IV produces poison. Our strategy here differs for
874 // pointers and integer IVs. For integers, we strip and reinfer as needed,
875 // see code in linearFunctionTestReplace. For pointers, we restrict
876 // transforms as there is no good way to reinfer inbounds once lost.
877 if (!Phi->getType()->isIntegerTy() &&
878 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
879 continue;
880
881 const SCEV *Init = AR->getStart();
882
883 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
884 // Don't force a live loop counter if another IV can be used.
885 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
886 continue;
887
888 // Prefer to count-from-zero. This is a more "canonical" counter form. It
889 // also prefers integer to pointer IVs.
890 if (BestInit->isZero() != Init->isZero()) {
891 if (BestInit->isZero())
892 continue;
893 }
894 // If two IVs both count from zero or both count from nonzero then the
895 // narrower is likely a dead phi that has been widened. Use the wider phi
896 // to allow the other to be eliminated.
897 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
898 continue;
899 }
900 BestPhi = Phi;
901 BestInit = Init;
902 }
903 return BestPhi;
904}
905
906/// Insert an IR expression which computes the value held by the IV IndVar
907/// (which must be an loop counter w/unit stride) after the backedge of loop L
908/// is taken ExitCount times.
909static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
910 const SCEV *ExitCount, bool UsePostInc, Loop *L,
911 SCEVExpander &Rewriter, ScalarEvolution *SE) {
912 assert(isLoopCounter(IndVar, L, SE));
913 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
914 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
915 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
916
917 // For integer IVs, truncate the IV before computing the limit unless we
918 // know apriori that the limit must be a constant when evaluated in the
919 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
920 // IV in the loop over a (potentially) expensive expansion of the widened
921 // exit count add(zext(add)) expression.
922 if (IndVar->getType()->isIntegerTy() &&
923 SE->getTypeSizeInBits(AR->getType()) >
924 SE->getTypeSizeInBits(ExitCount->getType())) {
925 const SCEV *IVInit = AR->getStart();
926 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
927 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
928 }
929
930 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
931 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
932 assert(SE->isLoopInvariant(IVLimit, L) &&
933 "Computed iteration count is not loop invariant!");
934 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
935 ExitingBB->getTerminator());
936}
937
938/// This method rewrites the exit condition of the loop to be a canonical !=
939/// comparison against the incremented loop induction variable. This pass is
940/// able to rewrite the exit tests of any loop where the SCEV analysis can
941/// determine a loop-invariant trip count of the loop, which is actually a much
942/// broader range than just linear tests.
943bool IndVarSimplify::
944linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
945 const SCEV *ExitCount,
946 PHINode *IndVar, SCEVExpander &Rewriter) {
947 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
948 assert(isLoopCounter(IndVar, L, SE));
949 Instruction * const IncVar =
950 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
951
952 // Initialize CmpIndVar to the preincremented IV.
953 Value *CmpIndVar = IndVar;
954 bool UsePostInc = false;
955
956 // If the exiting block is the same as the backedge block, we prefer to
957 // compare against the post-incremented value, otherwise we must compare
958 // against the preincremented value.
959 if (ExitingBB == L->getLoopLatch()) {
960 // For pointer IVs, we chose to not strip inbounds which requires us not
961 // to add a potentially UB introducing use. We need to either a) show
962 // the loop test we're modifying is already in post-inc form, or b) show
963 // that adding a use must not introduce UB.
964 bool SafeToPostInc =
965 IndVar->getType()->isIntegerTy() ||
966 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
967 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
968 if (SafeToPostInc) {
969 UsePostInc = true;
970 CmpIndVar = IncVar;
971 }
972 }
973
974 // It may be necessary to drop nowrap flags on the incrementing instruction
975 // if either LFTR moves from a pre-inc check to a post-inc check (in which
976 // case the increment might have previously been poison on the last iteration
977 // only) or if LFTR switches to a different IV that was previously dynamically
978 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
979 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
980 // check), because the pre-inc addrec flags may be adopted from the original
981 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
982 // TODO: This handling is inaccurate for one case: If we switch to a
983 // dynamically dead IV that wraps on the first loop iteration only, which is
984 // not covered by the post-inc addrec. (If the new IV was not dynamically
985 // dead, it could not be poison on the first iteration in the first place.)
986 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
987 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
988 if (BO->hasNoUnsignedWrap())
989 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
990 if (BO->hasNoSignedWrap())
991 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
992 }
993
994 Value *ExitCnt = genLoopLimit(
995 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
996 assert(ExitCnt->getType()->isPointerTy() ==
997 IndVar->getType()->isPointerTy() &&
998 "genLoopLimit missed a cast");
999
1000 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1001 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1002 ICmpInst::Predicate P;
1003 if (L->contains(BI->getSuccessor(0)))
1004 P = ICmpInst::ICMP_NE;
1005 else
1006 P = ICmpInst::ICMP_EQ;
1007
1008 IRBuilder<> Builder(BI);
1009
1010 // The new loop exit condition should reuse the debug location of the
1011 // original loop exit condition.
1012 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1013 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1014
1015 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1016 // avoid the expensive expansion of the limit expression in the wider type,
1017 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1018 // since we know (from the exit count bitwidth), that we can't self-wrap in
1019 // the narrower type.
1020 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1021 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1022 if (CmpIndVarSize > ExitCntSize) {
1023 assert(!CmpIndVar->getType()->isPointerTy() &&
1024 !ExitCnt->getType()->isPointerTy());
1025
1026 // Before resorting to actually inserting the truncate, use the same
1027 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1028 // the other side of the comparison instead. We still evaluate the limit
1029 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1030 // a truncate within in.
1031 bool Extended = false;
1032 const SCEV *IV = SE->getSCEV(CmpIndVar);
1033 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1034 const SCEV *ZExtTrunc =
1035 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1036
1037 if (ZExtTrunc == IV) {
1038 Extended = true;
1039 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1040 "wide.trip.count");
1041 } else {
1042 const SCEV *SExtTrunc =
1043 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1044 if (SExtTrunc == IV) {
1045 Extended = true;
1046 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1047 "wide.trip.count");
1048 }
1049 }
1050
1051 if (Extended) {
1052 bool Discard;
1053 L->makeLoopInvariant(ExitCnt, Discard);
1054 } else
1055 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1056 "lftr.wideiv");
1057 }
1058 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1059 << " LHS:" << *CmpIndVar << '\n'
1060 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1061 << "\n"
1062 << " RHS:\t" << *ExitCnt << "\n"
1063 << "ExitCount:\t" << *ExitCount << "\n"
1064 << " was: " << *BI->getCondition() << "\n");
1065
1066 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1067 Value *OrigCond = BI->getCondition();
1068 // It's tempting to use replaceAllUsesWith here to fully replace the old
1069 // comparison, but that's not immediately safe, since users of the old
1070 // comparison may not be dominated by the new comparison. Instead, just
1071 // update the branch to use the new comparison; in the common case this
1072 // will make old comparison dead.
1073 BI->setCondition(Cond);
1074 DeadInsts.emplace_back(OrigCond);
1075
1076 ++NumLFTR;
1077 return true;
1078}
1079
1080static void replaceExitCond(BranchInst *BI, Value *NewCond,
1082 auto *OldCond = BI->getCondition();
1083 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1084 << " with " << *NewCond << "\n");
1085 BI->setCondition(NewCond);
1086 if (OldCond->use_empty())
1087 DeadInsts.emplace_back(OldCond);
1088}
1089
1090static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1091 bool IsTaken) {
1092 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1093 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1094 auto *OldCond = BI->getCondition();
1095 return ConstantInt::get(OldCond->getType(),
1096 IsTaken ? ExitIfTrue : !ExitIfTrue);
1097}
1098
1099static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1101 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1102 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1103 replaceExitCond(BI, NewCond, DeadInsts);
1104}
1105
1107 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1108 ScalarEvolution &SE) {
1109 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1110 auto *LoopPreheader = L->getLoopPreheader();
1111 auto *LoopHeader = L->getHeader();
1113 for (auto &PN : LoopHeader->phis()) {
1114 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1115 for (User *U : PN.users())
1116 Worklist.push_back(cast<Instruction>(U));
1117 SE.forgetValue(&PN);
1118 PN.replaceAllUsesWith(PreheaderIncoming);
1119 DeadInsts.emplace_back(&PN);
1120 }
1121
1122 // Replacing with the preheader value will often allow IV users to simplify
1123 // (especially if the preheader value is a constant).
1125 while (!Worklist.empty()) {
1126 auto *I = cast<Instruction>(Worklist.pop_back_val());
1127 if (!Visited.insert(I).second)
1128 continue;
1129
1130 // Don't simplify instructions outside the loop.
1131 if (!L->contains(I))
1132 continue;
1133
1134 Value *Res = simplifyInstruction(I, I->getDataLayout());
1135 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1136 for (User *U : I->users())
1137 Worklist.push_back(cast<Instruction>(U));
1138 I->replaceAllUsesWith(Res);
1139 DeadInsts.emplace_back(I);
1140 }
1141 }
1142}
1143
1144static Value *
1147 SCEVExpander &Rewriter) {
1148 ICmpInst::Predicate InvariantPred = LIP.Pred;
1149 BasicBlock *Preheader = L->getLoopPreheader();
1150 assert(Preheader && "Preheader doesn't exist");
1151 Rewriter.setInsertPoint(Preheader->getTerminator());
1152 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1153 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1154 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1155 if (ExitIfTrue)
1156 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1157 IRBuilder<> Builder(Preheader->getTerminator());
1158 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1159 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1160 BI->getCondition()->getName());
1161}
1162
1163static std::optional<Value *>
1164createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1165 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1166 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1167 CmpPredicate Pred = ICmp->getCmpPredicate();
1168 Value *LHS = ICmp->getOperand(0);
1169 Value *RHS = ICmp->getOperand(1);
1170
1171 // 'LHS pred RHS' should now mean that we stay in loop.
1172 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1173 if (Inverted)
1175
1176 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1177 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1178 // Can we prove it to be trivially true or false?
1179 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1180 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1181
1182 auto *ARTy = LHSS->getType();
1183 auto *MaxIterTy = MaxIter->getType();
1184 // If possible, adjust types.
1185 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1186 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1187 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1188 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1189 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1190 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1191 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1192 }
1193
1194 if (SkipLastIter) {
1195 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1196 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1197 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1198 // So we manually construct umin(a - 1, b - 1).
1200 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1201 for (const SCEV *Op : UMin->operands())
1202 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1203 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1204 } else
1205 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1206 }
1207
1208 // Check if there is a loop-invariant predicate equivalent to our check.
1209 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1210 L, BI, MaxIter);
1211 if (!LIP)
1212 return std::nullopt;
1213
1214 // Can we prove it to be trivially true?
1215 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1216 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1217 else
1218 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1219}
1220
1222 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1223 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1225 assert(
1226 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1227 "Not a loop exit!");
1228
1229 // For branch that stays in loop by TRUE condition, go through AND. For branch
1230 // that stays in loop by FALSE condition, go through OR. Both gives the
1231 // similar logic: "stay in loop iff all conditions are true(false)".
1232 bool Inverted = L->contains(BI->getSuccessor(1));
1233 SmallVector<ICmpInst *, 4> LeafConditions;
1234 SmallVector<Value *, 4> Worklist;
1236 Value *OldCond = BI->getCondition();
1237 Visited.insert(OldCond);
1238 Worklist.push_back(OldCond);
1239
1240 auto GoThrough = [&](Value *V) {
1241 Value *LHS = nullptr, *RHS = nullptr;
1242 if (Inverted) {
1243 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1244 return false;
1245 } else {
1246 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1247 return false;
1248 }
1249 if (Visited.insert(LHS).second)
1250 Worklist.push_back(LHS);
1251 if (Visited.insert(RHS).second)
1252 Worklist.push_back(RHS);
1253 return true;
1254 };
1255
1256 do {
1257 Value *Curr = Worklist.pop_back_val();
1258 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1259 // those with one use, to avoid instruction duplication.
1260 if (Curr->hasOneUse())
1261 if (!GoThrough(Curr))
1262 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1263 LeafConditions.push_back(ICmp);
1264 } while (!Worklist.empty());
1265
1266 // If the current basic block has the same exit count as the whole loop, and
1267 // it consists of multiple icmp's, try to collect all icmp's that give exact
1268 // same exit count. For all other icmp's, we could use one less iteration,
1269 // because their value on the last iteration doesn't really matter.
1270 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1271 if (!SkipLastIter && LeafConditions.size() > 1 &&
1272 SE->getExitCount(L, ExitingBB,
1274 MaxIter)
1275 for (auto *ICmp : LeafConditions) {
1276 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1277 /*ControlsExit*/ false);
1278 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1279 if (isa<SCEVCouldNotCompute>(ExitMax))
1280 continue;
1281 // They could be of different types (specifically this happens after
1282 // IV widening).
1283 auto *WiderType =
1284 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1285 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1286 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1287 if (WideExitMax == WideMaxIter)
1288 ICmpsFailingOnLastIter.insert(ICmp);
1289 }
1290
1291 bool Changed = false;
1292 for (auto *OldCond : LeafConditions) {
1293 // Skip last iteration for this icmp under one of two conditions:
1294 // - We do it for all conditions;
1295 // - There is another ICmp that would fail on last iter, so this one doesn't
1296 // really matter.
1297 bool OptimisticSkipLastIter = SkipLastIter;
1298 if (!OptimisticSkipLastIter) {
1299 if (ICmpsFailingOnLastIter.size() > 1)
1300 OptimisticSkipLastIter = true;
1301 else if (ICmpsFailingOnLastIter.size() == 1)
1302 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1303 }
1304 if (auto Replaced =
1305 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1306 OptimisticSkipLastIter, SE, Rewriter)) {
1307 Changed = true;
1308 auto *NewCond = *Replaced;
1309 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1310 NCI->setName(OldCond->getName() + ".first_iter");
1311 }
1312 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1313 << " with " << *NewCond << "\n");
1314 assert(OldCond->hasOneUse() && "Must be!");
1315 OldCond->replaceAllUsesWith(NewCond);
1316 DeadInsts.push_back(OldCond);
1317 // Make sure we no longer consider this condition as failing on last
1318 // iteration.
1319 ICmpsFailingOnLastIter.erase(OldCond);
1320 }
1321 }
1322 return Changed;
1323}
1324
1325bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1326 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1327 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1328 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1329 // it already has flags. The alternative to this would be to extending the
1330 // set of "interesting" IV users to include the icmp, but doing that
1331 // regresses results in practice by querying SCEVs before trip counts which
1332 // rely on them which results in SCEV caching sub-optimal answers. The
1333 // concern about caching sub-optimal results is why we only query SCEVs of
1334 // the loop invariant RHS here.
1335 SmallVector<BasicBlock*, 16> ExitingBlocks;
1336 L->getExitingBlocks(ExitingBlocks);
1337 bool Changed = false;
1338 for (auto *ExitingBB : ExitingBlocks) {
1339 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1340 if (!BI)
1341 continue;
1342 assert(BI->isConditional() && "exit branch must be conditional");
1343
1344 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1345 if (!ICmp || !ICmp->hasOneUse())
1346 continue;
1347
1348 auto *LHS = ICmp->getOperand(0);
1349 auto *RHS = ICmp->getOperand(1);
1350 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1351 // poisoning cache with sub-optimal results. For the must-execute case,
1352 // this is a neccessary precondition for correctness.
1353 if (!L->isLoopInvariant(RHS)) {
1354 if (!L->isLoopInvariant(LHS))
1355 continue;
1356 // Same logic applies for the inverse case
1357 std::swap(LHS, RHS);
1358 }
1359
1360 // Match (icmp signed-cond zext, RHS)
1361 Value *LHSOp = nullptr;
1362 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1363 continue;
1364
1365 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1366 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1367 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1368 FullCR = FullCR.zeroExtend(OuterBitWidth);
1369 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1370 if (FullCR.contains(RHSCR)) {
1371 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1372 // replace the signed condition with the unsigned version.
1373 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1374 Changed = true;
1375 // Note: No SCEV invalidation needed. We've changed the predicate, but
1376 // have not changed exit counts, or the values produced by the compare.
1377 continue;
1378 }
1379 }
1380
1381 // Now that we've canonicalized the condition to match the extend,
1382 // see if we can rotate the extend out of the loop.
1383 for (auto *ExitingBB : ExitingBlocks) {
1384 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1385 if (!BI)
1386 continue;
1387 assert(BI->isConditional() && "exit branch must be conditional");
1388
1389 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1390 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1391 continue;
1392
1393 bool Swapped = false;
1394 auto *LHS = ICmp->getOperand(0);
1395 auto *RHS = ICmp->getOperand(1);
1396 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1397 // Nothing to rotate
1398 continue;
1399 if (L->isLoopInvariant(LHS)) {
1400 // Same logic applies for the inverse case until we actually pick
1401 // which operand of the compare to update.
1402 Swapped = true;
1403 std::swap(LHS, RHS);
1404 }
1405 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1406
1407 // Match (icmp unsigned-cond zext, RHS)
1408 // TODO: Extend to handle corresponding sext/signed-cmp case
1409 // TODO: Extend to other invertible functions
1410 Value *LHSOp = nullptr;
1411 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1412 continue;
1413
1414 // In general, we only rotate if we can do so without increasing the number
1415 // of instructions. The exception is when we have an zext(add-rec). The
1416 // reason for allowing this exception is that we know we need to get rid
1417 // of the zext for SCEV to be able to compute a trip count for said loops;
1418 // we consider the new trip count valuable enough to increase instruction
1419 // count by one.
1420 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1421 continue;
1422
1423 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1424 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1425 // when zext is loop varying and RHS is loop invariant. This converts
1426 // loop varying work to loop-invariant work.
1427 auto doRotateTransform = [&]() {
1428 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1429 auto *NewRHS = CastInst::Create(
1430 Instruction::Trunc, RHS, LHSOp->getType(), "",
1431 L->getLoopPreheader()->getTerminator()->getIterator());
1432 // NewRHS is an operation that has been hoisted out of the loop, and
1433 // therefore should have a dropped location.
1434 NewRHS->setDebugLoc(DebugLoc::getDropped());
1435 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1436 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1437 // Samesign flag cannot be preserved after narrowing the compare.
1438 ICmp->setSameSign(false);
1439 if (LHS->use_empty())
1440 DeadInsts.push_back(LHS);
1441 };
1442
1443 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1444 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1445 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1446 FullCR = FullCR.zeroExtend(OuterBitWidth);
1447 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1448 if (FullCR.contains(RHSCR)) {
1449 doRotateTransform();
1450 Changed = true;
1451 // Note, we are leaving SCEV in an unfortunately imprecise case here
1452 // as rotation tends to reveal information about trip counts not
1453 // previously visible.
1454 continue;
1455 }
1456 }
1457
1458 return Changed;
1459}
1460
1461bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1462 SmallVector<BasicBlock*, 16> ExitingBlocks;
1463 L->getExitingBlocks(ExitingBlocks);
1464
1465 // Remove all exits which aren't both rewriteable and execute on every
1466 // iteration.
1467 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1468 // If our exitting block exits multiple loops, we can only rewrite the
1469 // innermost one. Otherwise, we're changing how many times the innermost
1470 // loop runs before it exits.
1471 if (LI->getLoopFor(ExitingBB) != L)
1472 return true;
1473
1474 // Can't rewrite non-branch yet.
1475 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1476 if (!BI)
1477 return true;
1478
1479 // Likewise, the loop latch must be dominated by the exiting BB.
1480 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1481 return true;
1482
1483 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1484 // If already constant, nothing to do. However, if this is an
1485 // unconditional exit, we can still replace header phis with their
1486 // preheader value.
1487 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1488 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1489 return true;
1490 }
1491
1492 return false;
1493 });
1494
1495 if (ExitingBlocks.empty())
1496 return false;
1497
1498 // Get a symbolic upper bound on the loop backedge taken count.
1499 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1500 if (isa<SCEVCouldNotCompute>(MaxBECount))
1501 return false;
1502
1503 // Visit our exit blocks in order of dominance. We know from the fact that
1504 // all exits must dominate the latch, so there is a total dominance order
1505 // between them.
1506 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1507 // std::sort sorts in ascending order, so we want the inverse of
1508 // the normal dominance relation.
1509 if (A == B) return false;
1510 if (DT->properlyDominates(A, B))
1511 return true;
1512 else {
1513 assert(DT->properlyDominates(B, A) &&
1514 "expected total dominance order!");
1515 return false;
1516 }
1517 });
1518#ifdef ASSERT
1519 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1520 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1521 }
1522#endif
1523
1524 bool Changed = false;
1525 bool SkipLastIter = false;
1526 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1527 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1528 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1529 return;
1530 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1531 CurrMaxExit = MaxExitCount;
1532 else
1533 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1534 // If the loop has more than 1 iteration, all further checks will be
1535 // executed 1 iteration less.
1536 if (CurrMaxExit == MaxBECount)
1537 SkipLastIter = true;
1538 };
1539 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1540 for (BasicBlock *ExitingBB : ExitingBlocks) {
1541 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1542 const SCEV *MaxExitCount = SE->getExitCount(
1543 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1544 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1545 // Okay, we do not know the exit count here. Can we at least prove that it
1546 // will remain the same within iteration space?
1547 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1548 auto OptimizeCond = [&](bool SkipLastIter) {
1549 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1550 MaxBECount, SkipLastIter,
1551 SE, Rewriter, DeadInsts);
1552 };
1553
1554 // TODO: We might have proved that we can skip the last iteration for
1555 // this check. In this case, we only want to check the condition on the
1556 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1557 // corner case:
1558 //
1559 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1560 //
1561 // If we could not prove that len != 0, then we also could not prove that
1562 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1563 // OptimizeCond will likely not prove anything for it, even if it could
1564 // prove the same fact for len.
1565 //
1566 // As a temporary solution, we query both last and pre-last iterations in
1567 // hope that we will be able to prove triviality for at least one of
1568 // them. We can stop querying MaxBECount for this case once SCEV
1569 // understands that (MaxBECount - 1) will not overflow here.
1570 if (OptimizeCond(false))
1571 Changed = true;
1572 else if (SkipLastIter && OptimizeCond(true))
1573 Changed = true;
1574 UpdateSkipLastIter(MaxExitCount);
1575 continue;
1576 }
1577
1578 UpdateSkipLastIter(ExactExitCount);
1579
1580 // If we know we'd exit on the first iteration, rewrite the exit to
1581 // reflect this. This does not imply the loop must exit through this
1582 // exit; there may be an earlier one taken on the first iteration.
1583 // We know that the backedge can't be taken, so we replace all
1584 // the header PHIs with values coming from the preheader.
1585 if (ExactExitCount->isZero()) {
1586 foldExit(L, ExitingBB, true, DeadInsts);
1587 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1588 Changed = true;
1589 continue;
1590 }
1591
1592 assert(ExactExitCount->getType()->isIntegerTy() &&
1593 MaxBECount->getType()->isIntegerTy() &&
1594 "Exit counts must be integers");
1595
1596 Type *WiderType =
1597 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1598 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1599 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1600 assert(MaxBECount->getType() == ExactExitCount->getType());
1601
1602 // Can we prove that some other exit must be taken strictly before this
1603 // one?
1604 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1605 ExactExitCount)) {
1606 foldExit(L, ExitingBB, false, DeadInsts);
1607 Changed = true;
1608 continue;
1609 }
1610
1611 // As we run, keep track of which exit counts we've encountered. If we
1612 // find a duplicate, we've found an exit which would have exited on the
1613 // exiting iteration, but (from the visit order) strictly follows another
1614 // which does the same and is thus dead.
1615 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1616 foldExit(L, ExitingBB, false, DeadInsts);
1617 Changed = true;
1618 continue;
1619 }
1620
1621 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1622 // here. If we kept track of the min of dominanting exits so far, we could
1623 // discharge exits with EC >= MDEC. This is less powerful than the existing
1624 // transform (since later exits aren't considered), but potentially more
1625 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1626 // or a >u b. Such a case is not currently known.
1627 }
1628 return Changed;
1629}
1630
1631static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1632 return llvm::all_of(BB, [](const Instruction &I) {
1633 // TODO: for now this is overly restrictive, to make sure nothing in this
1634 // BB can depend on the loop body.
1635 // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1636 // load does not have a side effect, but we could have
1637 // %a = load ptr, ptr %ptr
1638 // %b = load i32, ptr %a
1639 // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1640 // dereference by skipping over loop iterations.
1641 if (const auto *CB = dyn_cast<CallBase>(&I)) {
1642 if (CB->onlyAccessesInaccessibleMemory())
1643 return true;
1644 }
1645 return isa<UnreachableInst>(I);
1646 });
1647}
1648
1649bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1650 SmallVector<BasicBlock*, 16> ExitingBlocks;
1651 L->getExitingBlocks(ExitingBlocks);
1652
1653 // Finally, see if we can rewrite our exit conditions into a loop invariant
1654 // form. If we have a read-only loop, and we can tell that we must exit down
1655 // a path which does not need any of the values computed within the loop, we
1656 // can rewrite the loop to exit on the first iteration. Note that this
1657 // doesn't either a) tell us the loop exits on the first iteration (unless
1658 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1659 // This transformation looks a lot like a restricted form of dead loop
1660 // elimination, but restricted to read-only loops and without neccesssarily
1661 // needing to kill the loop entirely.
1662 if (!LoopPredication)
1663 return false;
1664
1665 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1666 // through *explicit* control flow. We have to eliminate the possibility of
1667 // implicit exits (see below) before we know it's truly exact.
1668 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1669 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1670 return false;
1671
1672 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1673 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1674
1675 auto BadExit = [&](BasicBlock *ExitingBB) {
1676 // If our exiting block exits multiple loops, we can only rewrite the
1677 // innermost one. Otherwise, we're changing how many times the innermost
1678 // loop runs before it exits.
1679 if (LI->getLoopFor(ExitingBB) != L)
1680 return true;
1681
1682 // Can't rewrite non-branch yet.
1683 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1684 if (!BI)
1685 return true;
1686
1687 // If already constant, nothing to do.
1688 if (isa<Constant>(BI->getCondition()))
1689 return true;
1690
1691 // If the exit block has phis, we need to be able to compute the values
1692 // within the loop which contains them. This assumes trivially lcssa phis
1693 // have already been removed; TODO: generalize
1694 BasicBlock *ExitBlock =
1695 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1696 if (!ExitBlock->phis().empty())
1697 return true;
1698
1699 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1700 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1701 !Rewriter.isSafeToExpand(ExitCount))
1702 return true;
1703
1704 assert(SE->isLoopInvariant(ExitCount, L) &&
1705 "Exit count must be loop invariant");
1706 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1707 return false;
1708 };
1709
1710 // Make sure all exits dominate the latch. This means there is a linear chain
1711 // of exits. We check this before sorting so we have a total order.
1712 BasicBlock *Latch = L->getLoopLatch();
1713 for (BasicBlock *ExitingBB : ExitingBlocks)
1714 if (!DT->dominates(ExitingBB, Latch))
1715 return false;
1716
1717 // If we have any exits which can't be predicated themselves, than we can't
1718 // predicate any exit which isn't guaranteed to execute before it. Consider
1719 // two exits (a) and (b) which would both exit on the same iteration. If we
1720 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1721 // we could convert a loop from exiting through (a) to one exiting through
1722 // (b). Note that this problem exists only for exits with the same exit
1723 // count, and we could be more aggressive when exit counts are known inequal.
1724 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1725 // llvm::sort sorts in ascending order, so we want the inverse of
1726 // the normal dominance relation.
1727 if (A == B)
1728 return false;
1729 if (DT->properlyDominates(A, B))
1730 return true;
1731 if (DT->properlyDominates(B, A))
1732 return false;
1733 llvm_unreachable("Should have total dominance order");
1734 });
1735
1736 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1737 // exits before the backedge).
1738 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1739 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1740 "Not sorted by dominance");
1741
1742 // Given our sorted total order, we know that exit[j] must be evaluated
1743 // after all exit[i] such j > i.
1744 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1745 if (BadExit(ExitingBlocks[i])) {
1746 ExitingBlocks.resize(i);
1747 break;
1748 }
1749
1750 if (ExitingBlocks.empty())
1751 return false;
1752
1753 // At this point, ExitingBlocks consists of only those blocks which are
1754 // predicatable. Given that, we know we have at least one exit we can
1755 // predicate if the loop is doesn't have side effects and doesn't have any
1756 // implicit exits (because then our exact BTC isn't actually exact).
1757 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1758 // suggestions on how to improve this? I can obviously bail out for outer
1759 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1760 // is that enough for *all* side effects?
1761 bool HasThreadLocalSideEffects = false;
1762 for (BasicBlock *BB : L->blocks())
1763 for (auto &I : *BB)
1764 // TODO:isGuaranteedToTransfer
1765 if (I.mayHaveSideEffects()) {
1767 return false;
1768 HasThreadLocalSideEffects = true;
1769 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1770 // Simple stores cannot be observed by other threads.
1771 // If HasThreadLocalSideEffects is set, we check
1772 // crashingBBWithoutEffect to make sure that the crashing BB cannot
1773 // observe them either.
1774 if (!SI->isSimple())
1775 return false;
1776 } else {
1777 return false;
1778 }
1779 }
1780
1781 bool Changed = false;
1782 // Finally, do the actual predication for all predicatable blocks. A couple
1783 // of notes here:
1784 // 1) We don't bother to constant fold dominated exits with identical exit
1785 // counts; that's simply a form of CSE/equality propagation and we leave
1786 // it for dedicated passes.
1787 // 2) We insert the comparison at the branch. Hoisting introduces additional
1788 // legality constraints and we leave that to dedicated logic. We want to
1789 // predicate even if we can't insert a loop invariant expression as
1790 // peeling or unrolling will likely reduce the cost of the otherwise loop
1791 // varying check.
1792 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1793 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1794 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1795 for (BasicBlock *ExitingBB : ExitingBlocks) {
1796 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1797
1798 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1799 if (HasThreadLocalSideEffects) {
1800 const BasicBlock *Unreachable = nullptr;
1801 for (const BasicBlock *Succ : BI->successors()) {
1802 if (isa<UnreachableInst>(Succ->getTerminator()))
1803 Unreachable = Succ;
1804 }
1805 // Exit BB which have one branch back into the loop and another one to
1806 // a trap can still be optimized, because local side effects cannot
1807 // be observed in the exit case (the trap). We could be smarter about
1808 // this, but for now lets pattern match common cases that directly trap.
1809 if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
1810 return Changed;
1811 }
1812 Value *NewCond;
1813 if (ExitCount == ExactBTC) {
1814 NewCond = L->contains(BI->getSuccessor(0)) ?
1815 B.getFalse() : B.getTrue();
1816 } else {
1817 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1818 if (!ExactBTCV)
1819 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1820 Value *RHS = ExactBTCV;
1821 if (ECV->getType() != RHS->getType()) {
1822 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1823 ECV = B.CreateZExt(ECV, WiderTy);
1824 RHS = B.CreateZExt(RHS, WiderTy);
1825 }
1826 auto Pred = L->contains(BI->getSuccessor(0)) ?
1827 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1828 NewCond = B.CreateICmp(Pred, ECV, RHS);
1829 }
1830 Value *OldCond = BI->getCondition();
1831 BI->setCondition(NewCond);
1832 if (OldCond->use_empty())
1833 DeadInsts.emplace_back(OldCond);
1834 Changed = true;
1835 RunUnswitching = true;
1836 }
1837
1838 return Changed;
1839}
1840
1841//===----------------------------------------------------------------------===//
1842// IndVarSimplify driver. Manage several subpasses of IV simplification.
1843//===----------------------------------------------------------------------===//
1844
1845bool IndVarSimplify::run(Loop *L) {
1846 // We need (and expect!) the incoming loop to be in LCSSA.
1847 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1848 "LCSSA required to run indvars!");
1849
1850 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1851 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1852 // canonicalization can be a pessimization without LSR to "clean up"
1853 // afterwards.
1854 // - We depend on having a preheader; in particular,
1855 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1856 // and we're in trouble if we can't find the induction variable even when
1857 // we've manually inserted one.
1858 // - LFTR relies on having a single backedge.
1859 if (!L->isLoopSimplifyForm())
1860 return false;
1861
1862 bool Changed = false;
1863 // If there are any floating-point recurrences, attempt to
1864 // transform them to use integer recurrences.
1865 Changed |= rewriteNonIntegerIVs(L);
1866
1867 // Create a rewriter object which we'll use to transform the code with.
1868 SCEVExpander Rewriter(*SE, DL, "indvars");
1869#if LLVM_ENABLE_ABI_BREAKING_CHECKS
1870 Rewriter.setDebugType(DEBUG_TYPE);
1871#endif
1872
1873 // Eliminate redundant IV users.
1874 //
1875 // Simplification works best when run before other consumers of SCEV. We
1876 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1877 // other expressions involving loop IVs have been evaluated. This helps SCEV
1878 // set no-wrap flags before normalizing sign/zero extension.
1879 Rewriter.disableCanonicalMode();
1880 Changed |= simplifyAndExtend(L, Rewriter, LI);
1881
1882 // Check to see if we can compute the final value of any expressions
1883 // that are recurrent in the loop, and substitute the exit values from the
1884 // loop into any instructions outside of the loop that use the final values
1885 // of the current expressions.
1886 if (ReplaceExitValue != NeverRepl) {
1887 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1888 ReplaceExitValue, DeadInsts)) {
1889 NumReplaced += Rewrites;
1890 Changed = true;
1891 }
1892 }
1893
1894 // Eliminate redundant IV cycles.
1895 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1896
1897 // Try to convert exit conditions to unsigned and rotate computation
1898 // out of the loop. Note: Handles invalidation internally if needed.
1899 Changed |= canonicalizeExitCondition(L);
1900
1901 // Try to eliminate loop exits based on analyzeable exit counts
1902 if (optimizeLoopExits(L, Rewriter)) {
1903 Changed = true;
1904 // Given we've changed exit counts, notify SCEV
1905 // Some nested loops may share same folded exit basic block,
1906 // thus we need to notify top most loop.
1907 SE->forgetTopmostLoop(L);
1908 }
1909
1910 // Try to form loop invariant tests for loop exits by changing how many
1911 // iterations of the loop run when that is unobservable.
1912 if (predicateLoopExits(L, Rewriter)) {
1913 Changed = true;
1914 // Given we've changed exit counts, notify SCEV
1915 SE->forgetLoop(L);
1916 }
1917
1918 // If we have a trip count expression, rewrite the loop's exit condition
1919 // using it.
1920 if (!DisableLFTR) {
1921 BasicBlock *PreHeader = L->getLoopPreheader();
1922
1923 SmallVector<BasicBlock*, 16> ExitingBlocks;
1924 L->getExitingBlocks(ExitingBlocks);
1925 for (BasicBlock *ExitingBB : ExitingBlocks) {
1926 // Can't rewrite non-branch yet.
1927 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1928 continue;
1929
1930 // If our exitting block exits multiple loops, we can only rewrite the
1931 // innermost one. Otherwise, we're changing how many times the innermost
1932 // loop runs before it exits.
1933 if (LI->getLoopFor(ExitingBB) != L)
1934 continue;
1935
1936 if (!needsLFTR(L, ExitingBB))
1937 continue;
1938
1939 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1940 if (isa<SCEVCouldNotCompute>(ExitCount))
1941 continue;
1942
1943 // This was handled above, but as we form SCEVs, we can sometimes refine
1944 // existing ones; this allows exit counts to be folded to zero which
1945 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1946 // until stable to handle cases like this better.
1947 if (ExitCount->isZero())
1948 continue;
1949
1950 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1951 if (!IndVar)
1952 continue;
1953
1954 // Avoid high cost expansions. Note: This heuristic is questionable in
1955 // that our definition of "high cost" is not exactly principled.
1956 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1957 TTI, PreHeader->getTerminator()))
1958 continue;
1959
1960 if (!Rewriter.isSafeToExpand(ExitCount))
1961 continue;
1962
1963 Changed |= linearFunctionTestReplace(L, ExitingBB,
1964 ExitCount, IndVar,
1965 Rewriter);
1966 }
1967 }
1968 // Clear the rewriter cache, because values that are in the rewriter's cache
1969 // can be deleted in the loop below, causing the AssertingVH in the cache to
1970 // trigger.
1971 Rewriter.clear();
1972
1973 // Now that we're done iterating through lists, clean up any instructions
1974 // which are now dead.
1975 while (!DeadInsts.empty()) {
1976 Value *V = DeadInsts.pop_back_val();
1977
1978 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1979 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1980 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1981 Changed |=
1982 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1983 }
1984
1985 // The Rewriter may not be used from this point on.
1986
1987 // rewriteFirstIterationLoopExitValues does not rely on the computation of
1988 // trip count and therefore can further simplify exit values in addition to
1989 // rewriteLoopExitValues.
1990 Changed |= rewriteFirstIterationLoopExitValues(L);
1991
1992 // Clean up dead instructions.
1993 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1994
1995 // Check a post-condition.
1996 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1997 "Indvars did not preserve LCSSA!");
1998 if (VerifyMemorySSA && MSSAU)
1999 MSSAU->getMemorySSA()->verifyMemorySSA();
2000
2001 return Changed;
2002}
2003
2006 LPMUpdater &) {
2007 Function *F = L.getHeader()->getParent();
2008 const DataLayout &DL = F->getDataLayout();
2009
2010 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2011 WidenIndVars && AllowIVWidening);
2012 if (!IVS.run(&L))
2013 return PreservedAnalyses::all();
2014
2015 auto PA = getLoopPassPreservedAnalyses();
2016 PA.preserveSet<CFGAnalyses>();
2017 if (IVS.runUnswitching()) {
2019 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2020 }
2021
2022 if (AR.MSSA)
2023 PA.preserve<MemorySSAAnalysis>();
2024 return PA;
2025}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define DEBUG_TYPE
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > LoopPredicationTraps("indvars-predicate-loop-traps", cl::Hidden, cl::init(true), cl::desc("Predicate conditions that trap in loops with only local writes"))
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool crashingBBWithoutEffect(const BasicBlock &BB)
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
const APFloat & getValueAPF() const
Definition Constants.h:320
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition DataLayout.h:229
static DebugLoc getDropped()
Definition DebugLoc.h:164
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
CmpPredicate getInverseCmpPredicate() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_ABI const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
@ SymbolicMaximum
An expression which provides an upper bound on the exact trip count.
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition Local.cpp:641
@ UnusedIndVarInLoop
Definition LoopUtils.h:554
@ OnlyCheapRepl
Definition LoopUtils.h:552
@ NeverRepl
Definition LoopUtils.h:551
@ NoHardUse
Definition LoopUtils.h:553
@ AlwaysRepl
Definition LoopUtils.h:555
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A marker analysis to determine if SimpleLoopUnswitch should run again on a given loop.
Collect information about induction variables that are used by sign/zero extend operations.