LLVM 19.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PassManager.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
78#include <cassert>
79#include <cstdint>
80#include <utility>
81
82using namespace llvm;
83using namespace PatternMatch;
84
85#define DEBUG_TYPE "indvars"
86
87STATISTIC(NumWidened , "Number of indvars widened");
88STATISTIC(NumReplaced , "Number of exit values replaced");
89STATISTIC(NumLFTR , "Number of loop exit tests replaced");
90STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
91STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
92
94 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
95 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
97 clEnumValN(NeverRepl, "never", "never replace exit value"),
99 "only replace exit value when the cost is cheap"),
101 UnusedIndVarInLoop, "unusedindvarinloop",
102 "only replace exit value when it is an unused "
103 "induction variable in the loop and has cheap replacement cost"),
104 clEnumValN(NoHardUse, "noharduse",
105 "only replace exit values when loop def likely dead"),
106 clEnumValN(AlwaysRepl, "always",
107 "always replace exit value whenever possible")));
108
110 "indvars-post-increment-ranges", cl::Hidden,
111 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
112 cl::init(true));
113
114static cl::opt<bool>
115DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
116 cl::desc("Disable Linear Function Test Replace optimization"));
117
118static cl::opt<bool>
119LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
120 cl::desc("Predicate conditions in read only loops"));
121
122static cl::opt<bool>
123AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
124 cl::desc("Allow widening of indvars to eliminate s/zext"));
125
126namespace {
127
128class IndVarSimplify {
129 LoopInfo *LI;
130 ScalarEvolution *SE;
131 DominatorTree *DT;
132 const DataLayout &DL;
135 std::unique_ptr<MemorySSAUpdater> MSSAU;
136
138 bool WidenIndVars;
139
140 bool handleFloatingPointIV(Loop *L, PHINode *PH);
141 bool rewriteNonIntegerIVs(Loop *L);
142
143 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144 /// Try to improve our exit conditions by converting condition from signed
145 /// to unsigned or rotating computation out of the loop.
146 /// (See inline comment about why this is duplicated from simplifyAndExtend)
147 bool canonicalizeExitCondition(Loop *L);
148 /// Try to eliminate loop exits based on analyzeable exit counts
149 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150 /// Try to form loop invariant tests for loop exits by changing how many
151 /// iterations of the loop run when that is unobservable.
152 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153
154 bool rewriteFirstIterationLoopExitValues(Loop *L);
155
156 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157 const SCEV *ExitCount,
158 PHINode *IndVar, SCEVExpander &Rewriter);
159
160 bool sinkUnusedInvariants(Loop *L);
161
162public:
163 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164 const DataLayout &DL, TargetLibraryInfo *TLI,
165 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
166 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
167 WidenIndVars(WidenIndVars) {
168 if (MSSA)
169 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
170 }
171
172 bool run(Loop *L);
173};
174
175} // end anonymous namespace
176
177//===----------------------------------------------------------------------===//
178// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
179//===----------------------------------------------------------------------===//
180
181/// Convert APF to an integer, if possible.
182static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
183 bool isExact = false;
184 // See if we can convert this to an int64_t
185 uint64_t UIntVal;
186 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
187 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
188 !isExact)
189 return false;
190 IntVal = UIntVal;
191 return true;
192}
193
194/// If the loop has floating induction variable then insert corresponding
195/// integer induction variable if possible.
196/// For example,
197/// for(double i = 0; i < 10000; ++i)
198/// bar(i)
199/// is converted into
200/// for(int i = 0; i < 10000; ++i)
201/// bar((double)i);
202bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
203 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
204 unsigned BackEdge = IncomingEdge^1;
205
206 // Check incoming value.
207 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
208
209 int64_t InitValue;
210 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
211 return false;
212
213 // Check IV increment. Reject this PN if increment operation is not
214 // an add or increment value can not be represented by an integer.
215 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
216 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
217
218 // If this is not an add of the PHI with a constantfp, or if the constant fp
219 // is not an integer, bail out.
220 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
221 int64_t IncValue;
222 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
223 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
224 return false;
225
226 // Check Incr uses. One user is PN and the other user is an exit condition
227 // used by the conditional terminator.
228 Value::user_iterator IncrUse = Incr->user_begin();
229 Instruction *U1 = cast<Instruction>(*IncrUse++);
230 if (IncrUse == Incr->user_end()) return false;
231 Instruction *U2 = cast<Instruction>(*IncrUse++);
232 if (IncrUse != Incr->user_end()) return false;
233
234 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
235 // only used by a branch, we can't transform it.
236 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
237 if (!Compare)
238 Compare = dyn_cast<FCmpInst>(U2);
239 if (!Compare || !Compare->hasOneUse() ||
240 !isa<BranchInst>(Compare->user_back()))
241 return false;
242
243 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
244
245 // We need to verify that the branch actually controls the iteration count
246 // of the loop. If not, the new IV can overflow and no one will notice.
247 // The branch block must be in the loop and one of the successors must be out
248 // of the loop.
249 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
250 if (!L->contains(TheBr->getParent()) ||
251 (L->contains(TheBr->getSuccessor(0)) &&
252 L->contains(TheBr->getSuccessor(1))))
253 return false;
254
255 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
256 // transform it.
257 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
258 int64_t ExitValue;
259 if (ExitValueVal == nullptr ||
260 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
261 return false;
262
263 // Find new predicate for integer comparison.
265 switch (Compare->getPredicate()) {
266 default: return false; // Unknown comparison.
268 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
270 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
272 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
274 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
276 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
278 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
279 }
280
281 // We convert the floating point induction variable to a signed i32 value if
282 // we can. This is only safe if the comparison will not overflow in a way
283 // that won't be trapped by the integer equivalent operations. Check for this
284 // now.
285 // TODO: We could use i64 if it is native and the range requires it.
286
287 // The start/stride/exit values must all fit in signed i32.
288 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
289 return false;
290
291 // If not actually striding (add x, 0.0), avoid touching the code.
292 if (IncValue == 0)
293 return false;
294
295 // Positive and negative strides have different safety conditions.
296 if (IncValue > 0) {
297 // If we have a positive stride, we require the init to be less than the
298 // exit value.
299 if (InitValue >= ExitValue)
300 return false;
301
302 uint32_t Range = uint32_t(ExitValue-InitValue);
303 // Check for infinite loop, either:
304 // while (i <= Exit) or until (i > Exit)
305 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
306 if (++Range == 0) return false; // Range overflows.
307 }
308
309 unsigned Leftover = Range % uint32_t(IncValue);
310
311 // If this is an equality comparison, we require that the strided value
312 // exactly land on the exit value, otherwise the IV condition will wrap
313 // around and do things the fp IV wouldn't.
314 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
315 Leftover != 0)
316 return false;
317
318 // If the stride would wrap around the i32 before exiting, we can't
319 // transform the IV.
320 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
321 return false;
322 } else {
323 // If we have a negative stride, we require the init to be greater than the
324 // exit value.
325 if (InitValue <= ExitValue)
326 return false;
327
328 uint32_t Range = uint32_t(InitValue-ExitValue);
329 // Check for infinite loop, either:
330 // while (i >= Exit) or until (i < Exit)
331 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
332 if (++Range == 0) return false; // Range overflows.
333 }
334
335 unsigned Leftover = Range % uint32_t(-IncValue);
336
337 // If this is an equality comparison, we require that the strided value
338 // exactly land on the exit value, otherwise the IV condition will wrap
339 // around and do things the fp IV wouldn't.
340 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
341 Leftover != 0)
342 return false;
343
344 // If the stride would wrap around the i32 before exiting, we can't
345 // transform the IV.
346 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
347 return false;
348 }
349
351
352 // Insert new integer induction variable.
353 PHINode *NewPHI =
354 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
355 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
356 PN->getIncomingBlock(IncomingEdge));
357
358 Value *NewAdd =
359 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
360 Incr->getName() + ".int", Incr->getIterator());
361 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
362
363 ICmpInst *NewCompare =
364 new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
365 ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
366
367 // In the following deletions, PN may become dead and may be deleted.
368 // Use a WeakTrackingVH to observe whether this happens.
369 WeakTrackingVH WeakPH = PN;
370
371 // Delete the old floating point exit comparison. The branch starts using the
372 // new comparison.
373 NewCompare->takeName(Compare);
374 Compare->replaceAllUsesWith(NewCompare);
375 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
376
377 // Delete the old floating point increment.
378 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
379 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
380
381 // If the FP induction variable still has uses, this is because something else
382 // in the loop uses its value. In order to canonicalize the induction
383 // variable, we chose to eliminate the IV and rewrite it in terms of an
384 // int->fp cast.
385 //
386 // We give preference to sitofp over uitofp because it is faster on most
387 // platforms.
388 if (WeakPH) {
389 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
391 PN->replaceAllUsesWith(Conv);
392 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
393 }
394 return true;
395}
396
397bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
398 // First step. Check to see if there are any floating-point recurrences.
399 // If there are, change them into integer recurrences, permitting analysis by
400 // the SCEV routines.
401 BasicBlock *Header = L->getHeader();
402
404 for (PHINode &PN : Header->phis())
405 PHIs.push_back(&PN);
406
407 bool Changed = false;
408 for (WeakTrackingVH &PHI : PHIs)
409 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
410 Changed |= handleFloatingPointIV(L, PN);
411
412 // If the loop previously had floating-point IV, ScalarEvolution
413 // may not have been able to compute a trip count. Now that we've done some
414 // re-writing, the trip count may be computable.
415 if (Changed)
416 SE->forgetLoop(L);
417 return Changed;
418}
419
420//===---------------------------------------------------------------------===//
421// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
422// they will exit at the first iteration.
423//===---------------------------------------------------------------------===//
424
425/// Check to see if this loop has loop invariant conditions which lead to loop
426/// exits. If so, we know that if the exit path is taken, it is at the first
427/// loop iteration. This lets us predict exit values of PHI nodes that live in
428/// loop header.
429bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
430 // Verify the input to the pass is already in LCSSA form.
431 assert(L->isLCSSAForm(*DT));
432
434 L->getUniqueExitBlocks(ExitBlocks);
435
436 bool MadeAnyChanges = false;
437 for (auto *ExitBB : ExitBlocks) {
438 // If there are no more PHI nodes in this exit block, then no more
439 // values defined inside the loop are used on this path.
440 for (PHINode &PN : ExitBB->phis()) {
441 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
442 IncomingValIdx != E; ++IncomingValIdx) {
443 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
444
445 // Can we prove that the exit must run on the first iteration if it
446 // runs at all? (i.e. early exits are fine for our purposes, but
447 // traces which lead to this exit being taken on the 2nd iteration
448 // aren't.) Note that this is about whether the exit branch is
449 // executed, not about whether it is taken.
450 if (!L->getLoopLatch() ||
451 !DT->dominates(IncomingBB, L->getLoopLatch()))
452 continue;
453
454 // Get condition that leads to the exit path.
455 auto *TermInst = IncomingBB->getTerminator();
456
457 Value *Cond = nullptr;
458 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
459 // Must be a conditional branch, otherwise the block
460 // should not be in the loop.
461 Cond = BI->getCondition();
462 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
463 Cond = SI->getCondition();
464 else
465 continue;
466
467 if (!L->isLoopInvariant(Cond))
468 continue;
469
470 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
471
472 // Only deal with PHIs in the loop header.
473 if (!ExitVal || ExitVal->getParent() != L->getHeader())
474 continue;
475
476 // If ExitVal is a PHI on the loop header, then we know its
477 // value along this exit because the exit can only be taken
478 // on the first iteration.
479 auto *LoopPreheader = L->getLoopPreheader();
480 assert(LoopPreheader && "Invalid loop");
481 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
482 if (PreheaderIdx != -1) {
483 assert(ExitVal->getParent() == L->getHeader() &&
484 "ExitVal must be in loop header");
485 MadeAnyChanges = true;
486 PN.setIncomingValue(IncomingValIdx,
487 ExitVal->getIncomingValue(PreheaderIdx));
488 SE->forgetValue(&PN);
489 }
490 }
491 }
492 }
493 return MadeAnyChanges;
494}
495
496//===----------------------------------------------------------------------===//
497// IV Widening - Extend the width of an IV to cover its widest uses.
498//===----------------------------------------------------------------------===//
499
500/// Update information about the induction variable that is extended by this
501/// sign or zero extend operation. This is used to determine the final width of
502/// the IV before actually widening it.
503static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
504 ScalarEvolution *SE,
505 const TargetTransformInfo *TTI) {
506 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
507 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
508 return;
509
510 Type *Ty = Cast->getType();
511 uint64_t Width = SE->getTypeSizeInBits(Ty);
512 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
513 return;
514
515 // Check that `Cast` actually extends the induction variable (we rely on this
516 // later). This takes care of cases where `Cast` is extending a truncation of
517 // the narrow induction variable, and thus can end up being narrower than the
518 // "narrow" induction variable.
519 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
520 if (NarrowIVWidth >= Width)
521 return;
522
523 // Cast is either an sext or zext up to this point.
524 // We should not widen an indvar if arithmetics on the wider indvar are more
525 // expensive than those on the narrower indvar. We check only the cost of ADD
526 // because at least an ADD is required to increment the induction variable. We
527 // could compute more comprehensively the cost of all instructions on the
528 // induction variable when necessary.
529 if (TTI &&
530 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
531 TTI->getArithmeticInstrCost(Instruction::Add,
532 Cast->getOperand(0)->getType())) {
533 return;
534 }
535
536 if (!WI.WidestNativeType ||
537 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
539 WI.IsSigned = IsSigned;
540 return;
541 }
542
543 // We extend the IV to satisfy the sign of its user(s), or 'signed'
544 // if there are multiple users with both sign- and zero extensions,
545 // in order not to introduce nondeterministic behaviour based on the
546 // unspecified order of a PHI nodes' users-iterator.
547 WI.IsSigned |= IsSigned;
548}
549
550//===----------------------------------------------------------------------===//
551// Live IV Reduction - Minimize IVs live across the loop.
552//===----------------------------------------------------------------------===//
553
554//===----------------------------------------------------------------------===//
555// Simplification of IV users based on SCEV evaluation.
556//===----------------------------------------------------------------------===//
557
558namespace {
559
560class IndVarSimplifyVisitor : public IVVisitor {
561 ScalarEvolution *SE;
563 PHINode *IVPhi;
564
565public:
566 WideIVInfo WI;
567
568 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
570 const DominatorTree *DTree)
571 : SE(SCEV), TTI(TTI), IVPhi(IV) {
572 DT = DTree;
573 WI.NarrowIV = IVPhi;
574 }
575
576 // Implement the interface used by simplifyUsersOfIV.
577 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
578};
579
580} // end anonymous namespace
581
582/// Iteratively perform simplification on a worklist of IV users. Each
583/// successive simplification may push more users which may themselves be
584/// candidates for simplification.
585///
586/// Sign/Zero extend elimination is interleaved with IV simplification.
587bool IndVarSimplify::simplifyAndExtend(Loop *L,
588 SCEVExpander &Rewriter,
589 LoopInfo *LI) {
591
592 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
593 Intrinsic::getName(Intrinsic::experimental_guard));
594 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
595
597 for (PHINode &PN : L->getHeader()->phis())
598 LoopPhis.push_back(&PN);
599
600 // Each round of simplification iterates through the SimplifyIVUsers worklist
601 // for all current phis, then determines whether any IVs can be
602 // widened. Widening adds new phis to LoopPhis, inducing another round of
603 // simplification on the wide IVs.
604 bool Changed = false;
605 while (!LoopPhis.empty()) {
606 // Evaluate as many IV expressions as possible before widening any IVs. This
607 // forces SCEV to set no-wrap flags before evaluating sign/zero
608 // extension. The first time SCEV attempts to normalize sign/zero extension,
609 // the result becomes final. So for the most predictable results, we delay
610 // evaluation of sign/zero extend evaluation until needed, and avoid running
611 // other SCEV based analysis prior to simplifyAndExtend.
612 do {
613 PHINode *CurrIV = LoopPhis.pop_back_val();
614
615 // Information about sign/zero extensions of CurrIV.
616 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
617
618 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
619 &Visitor);
620
621 if (Visitor.WI.WidestNativeType) {
622 WideIVs.push_back(Visitor.WI);
623 }
624 } while(!LoopPhis.empty());
625
626 // Continue if we disallowed widening.
627 if (!WidenIndVars)
628 continue;
629
630 for (; !WideIVs.empty(); WideIVs.pop_back()) {
631 unsigned ElimExt;
632 unsigned Widened;
633 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
634 DT, DeadInsts, ElimExt, Widened,
635 HasGuards, UsePostIncrementRanges)) {
636 NumElimExt += ElimExt;
637 NumWidened += Widened;
638 Changed = true;
639 LoopPhis.push_back(WidePhi);
640 }
641 }
642 }
643 return Changed;
644}
645
646//===----------------------------------------------------------------------===//
647// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
648//===----------------------------------------------------------------------===//
649
650/// Given an Value which is hoped to be part of an add recurance in the given
651/// loop, return the associated Phi node if so. Otherwise, return null. Note
652/// that this is less general than SCEVs AddRec checking.
654 Instruction *IncI = dyn_cast<Instruction>(IncV);
655 if (!IncI)
656 return nullptr;
657
658 switch (IncI->getOpcode()) {
659 case Instruction::Add:
660 case Instruction::Sub:
661 break;
662 case Instruction::GetElementPtr:
663 // An IV counter must preserve its type.
664 if (IncI->getNumOperands() == 2)
665 break;
666 [[fallthrough]];
667 default:
668 return nullptr;
669 }
670
671 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
672 if (Phi && Phi->getParent() == L->getHeader()) {
673 if (L->isLoopInvariant(IncI->getOperand(1)))
674 return Phi;
675 return nullptr;
676 }
677 if (IncI->getOpcode() == Instruction::GetElementPtr)
678 return nullptr;
679
680 // Allow add/sub to be commuted.
681 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
682 if (Phi && Phi->getParent() == L->getHeader()) {
683 if (L->isLoopInvariant(IncI->getOperand(0)))
684 return Phi;
685 }
686 return nullptr;
687}
688
689/// Whether the current loop exit test is based on this value. Currently this
690/// is limited to a direct use in the loop condition.
691static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
692 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
693 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
694 // TODO: Allow non-icmp loop test.
695 if (!ICmp)
696 return false;
697
698 // TODO: Allow indirect use.
699 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
700}
701
702/// linearFunctionTestReplace policy. Return true unless we can show that the
703/// current exit test is already sufficiently canonical.
704static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
705 assert(L->getLoopLatch() && "Must be in simplified form");
706
707 // Avoid converting a constant or loop invariant test back to a runtime
708 // test. This is critical for when SCEV's cached ExitCount is less precise
709 // than the current IR (such as after we've proven a particular exit is
710 // actually dead and thus the BE count never reaches our ExitCount.)
711 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
712 if (L->isLoopInvariant(BI->getCondition()))
713 return false;
714
715 // Do LFTR to simplify the exit condition to an ICMP.
716 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
717 if (!Cond)
718 return true;
719
720 // Do LFTR to simplify the exit ICMP to EQ/NE
721 ICmpInst::Predicate Pred = Cond->getPredicate();
722 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
723 return true;
724
725 // Look for a loop invariant RHS
726 Value *LHS = Cond->getOperand(0);
727 Value *RHS = Cond->getOperand(1);
728 if (!L->isLoopInvariant(RHS)) {
729 if (!L->isLoopInvariant(LHS))
730 return true;
731 std::swap(LHS, RHS);
732 }
733 // Look for a simple IV counter LHS
734 PHINode *Phi = dyn_cast<PHINode>(LHS);
735 if (!Phi)
736 Phi = getLoopPhiForCounter(LHS, L);
737
738 if (!Phi)
739 return true;
740
741 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
742 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
743 if (Idx < 0)
744 return true;
745
746 // Do LFTR if the exit condition's IV is *not* a simple counter.
747 Value *IncV = Phi->getIncomingValue(Idx);
748 return Phi != getLoopPhiForCounter(IncV, L);
749}
750
751/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
752/// down to checking that all operands are constant and listing instructions
753/// that may hide undef.
755 unsigned Depth) {
756 if (isa<Constant>(V))
757 return !isa<UndefValue>(V);
758
759 if (Depth >= 6)
760 return false;
761
762 // Conservatively handle non-constant non-instructions. For example, Arguments
763 // may be undef.
764 Instruction *I = dyn_cast<Instruction>(V);
765 if (!I)
766 return false;
767
768 // Load and return values may be undef.
769 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
770 return false;
771
772 // Optimistically handle other instructions.
773 for (Value *Op : I->operands()) {
774 if (!Visited.insert(Op).second)
775 continue;
776 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
777 return false;
778 }
779 return true;
780}
781
782/// Return true if the given value is concrete. We must prove that undef can
783/// never reach it.
784///
785/// TODO: If we decide that this is a good approach to checking for undef, we
786/// may factor it into a common location.
787static bool hasConcreteDef(Value *V) {
789 Visited.insert(V);
790 return hasConcreteDefImpl(V, Visited, 0);
791}
792
793/// Return true if the given phi is a "counter" in L. A counter is an
794/// add recurance (of integer or pointer type) with an arbitrary start, and a
795/// step of 1. Note that L must have exactly one latch.
796static bool isLoopCounter(PHINode* Phi, Loop *L,
797 ScalarEvolution *SE) {
798 assert(Phi->getParent() == L->getHeader());
799 assert(L->getLoopLatch());
800
801 if (!SE->isSCEVable(Phi->getType()))
802 return false;
803
804 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
805 if (!AR || AR->getLoop() != L || !AR->isAffine())
806 return false;
807
808 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
809 if (!Step || !Step->isOne())
810 return false;
811
812 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
813 Value *IncV = Phi->getIncomingValue(LatchIdx);
814 return (getLoopPhiForCounter(IncV, L) == Phi &&
815 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
816}
817
818/// Search the loop header for a loop counter (anadd rec w/step of one)
819/// suitable for use by LFTR. If multiple counters are available, select the
820/// "best" one based profitable heuristics.
821///
822/// BECount may be an i8* pointer type. The pointer difference is already
823/// valid count without scaling the address stride, so it remains a pointer
824/// expression as far as SCEV is concerned.
825static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
826 const SCEV *BECount,
828 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
829
830 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
831
832 // Loop over all of the PHI nodes, looking for a simple counter.
833 PHINode *BestPhi = nullptr;
834 const SCEV *BestInit = nullptr;
835 BasicBlock *LatchBlock = L->getLoopLatch();
836 assert(LatchBlock && "Must be in simplified form");
837 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
838
839 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
840 PHINode *Phi = cast<PHINode>(I);
841 if (!isLoopCounter(Phi, L, SE))
842 continue;
843
844 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
845
846 // AR may be a pointer type, while BECount is an integer type.
847 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
848 // AR may not be a narrower type, or we may never exit.
849 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
850 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
851 continue;
852
853 // Avoid reusing a potentially undef value to compute other values that may
854 // have originally had a concrete definition.
855 if (!hasConcreteDef(Phi)) {
856 // We explicitly allow unknown phis as long as they are already used by
857 // the loop exit test. This is legal since performing LFTR could not
858 // increase the number of undef users.
859 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
860 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
861 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
862 continue;
863 }
864
865 // Avoid introducing undefined behavior due to poison which didn't exist in
866 // the original program. (Annoyingly, the rules for poison and undef
867 // propagation are distinct, so this does NOT cover the undef case above.)
868 // We have to ensure that we don't introduce UB by introducing a use on an
869 // iteration where said IV produces poison. Our strategy here differs for
870 // pointers and integer IVs. For integers, we strip and reinfer as needed,
871 // see code in linearFunctionTestReplace. For pointers, we restrict
872 // transforms as there is no good way to reinfer inbounds once lost.
873 if (!Phi->getType()->isIntegerTy() &&
874 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
875 continue;
876
877 const SCEV *Init = AR->getStart();
878
879 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
880 // Don't force a live loop counter if another IV can be used.
881 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
882 continue;
883
884 // Prefer to count-from-zero. This is a more "canonical" counter form. It
885 // also prefers integer to pointer IVs.
886 if (BestInit->isZero() != Init->isZero()) {
887 if (BestInit->isZero())
888 continue;
889 }
890 // If two IVs both count from zero or both count from nonzero then the
891 // narrower is likely a dead phi that has been widened. Use the wider phi
892 // to allow the other to be eliminated.
893 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
894 continue;
895 }
896 BestPhi = Phi;
897 BestInit = Init;
898 }
899 return BestPhi;
900}
901
902/// Insert an IR expression which computes the value held by the IV IndVar
903/// (which must be an loop counter w/unit stride) after the backedge of loop L
904/// is taken ExitCount times.
905static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
906 const SCEV *ExitCount, bool UsePostInc, Loop *L,
907 SCEVExpander &Rewriter, ScalarEvolution *SE) {
908 assert(isLoopCounter(IndVar, L, SE));
909 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
910 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
911 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
912
913 // For integer IVs, truncate the IV before computing the limit unless we
914 // know apriori that the limit must be a constant when evaluated in the
915 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
916 // IV in the loop over a (potentially) expensive expansion of the widened
917 // exit count add(zext(add)) expression.
918 if (IndVar->getType()->isIntegerTy() &&
919 SE->getTypeSizeInBits(AR->getType()) >
920 SE->getTypeSizeInBits(ExitCount->getType())) {
921 const SCEV *IVInit = AR->getStart();
922 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
923 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
924 }
925
926 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
927 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
928 assert(SE->isLoopInvariant(IVLimit, L) &&
929 "Computed iteration count is not loop invariant!");
930 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
931 ExitingBB->getTerminator());
932}
933
934/// This method rewrites the exit condition of the loop to be a canonical !=
935/// comparison against the incremented loop induction variable. This pass is
936/// able to rewrite the exit tests of any loop where the SCEV analysis can
937/// determine a loop-invariant trip count of the loop, which is actually a much
938/// broader range than just linear tests.
939bool IndVarSimplify::
940linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
941 const SCEV *ExitCount,
942 PHINode *IndVar, SCEVExpander &Rewriter) {
943 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
944 assert(isLoopCounter(IndVar, L, SE));
945 Instruction * const IncVar =
946 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
947
948 // Initialize CmpIndVar to the preincremented IV.
949 Value *CmpIndVar = IndVar;
950 bool UsePostInc = false;
951
952 // If the exiting block is the same as the backedge block, we prefer to
953 // compare against the post-incremented value, otherwise we must compare
954 // against the preincremented value.
955 if (ExitingBB == L->getLoopLatch()) {
956 // For pointer IVs, we chose to not strip inbounds which requires us not
957 // to add a potentially UB introducing use. We need to either a) show
958 // the loop test we're modifying is already in post-inc form, or b) show
959 // that adding a use must not introduce UB.
960 bool SafeToPostInc =
961 IndVar->getType()->isIntegerTy() ||
962 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
963 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
964 if (SafeToPostInc) {
965 UsePostInc = true;
966 CmpIndVar = IncVar;
967 }
968 }
969
970 // It may be necessary to drop nowrap flags on the incrementing instruction
971 // if either LFTR moves from a pre-inc check to a post-inc check (in which
972 // case the increment might have previously been poison on the last iteration
973 // only) or if LFTR switches to a different IV that was previously dynamically
974 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
975 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
976 // check), because the pre-inc addrec flags may be adopted from the original
977 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
978 // TODO: This handling is inaccurate for one case: If we switch to a
979 // dynamically dead IV that wraps on the first loop iteration only, which is
980 // not covered by the post-inc addrec. (If the new IV was not dynamically
981 // dead, it could not be poison on the first iteration in the first place.)
982 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
983 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
984 if (BO->hasNoUnsignedWrap())
985 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
986 if (BO->hasNoSignedWrap())
987 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
988 }
989
990 Value *ExitCnt = genLoopLimit(
991 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
992 assert(ExitCnt->getType()->isPointerTy() ==
993 IndVar->getType()->isPointerTy() &&
994 "genLoopLimit missed a cast");
995
996 // Insert a new icmp_ne or icmp_eq instruction before the branch.
997 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
999 if (L->contains(BI->getSuccessor(0)))
1000 P = ICmpInst::ICMP_NE;
1001 else
1002 P = ICmpInst::ICMP_EQ;
1003
1004 IRBuilder<> Builder(BI);
1005
1006 // The new loop exit condition should reuse the debug location of the
1007 // original loop exit condition.
1008 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1009 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1010
1011 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1012 // avoid the expensive expansion of the limit expression in the wider type,
1013 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1014 // since we know (from the exit count bitwidth), that we can't self-wrap in
1015 // the narrower type.
1016 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1017 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1018 if (CmpIndVarSize > ExitCntSize) {
1019 assert(!CmpIndVar->getType()->isPointerTy() &&
1020 !ExitCnt->getType()->isPointerTy());
1021
1022 // Before resorting to actually inserting the truncate, use the same
1023 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1024 // the other side of the comparison instead. We still evaluate the limit
1025 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1026 // a truncate within in.
1027 bool Extended = false;
1028 const SCEV *IV = SE->getSCEV(CmpIndVar);
1029 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1030 const SCEV *ZExtTrunc =
1031 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1032
1033 if (ZExtTrunc == IV) {
1034 Extended = true;
1035 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1036 "wide.trip.count");
1037 } else {
1038 const SCEV *SExtTrunc =
1039 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1040 if (SExtTrunc == IV) {
1041 Extended = true;
1042 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1043 "wide.trip.count");
1044 }
1045 }
1046
1047 if (Extended) {
1048 bool Discard;
1049 L->makeLoopInvariant(ExitCnt, Discard);
1050 } else
1051 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1052 "lftr.wideiv");
1053 }
1054 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1055 << " LHS:" << *CmpIndVar << '\n'
1056 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1057 << "\n"
1058 << " RHS:\t" << *ExitCnt << "\n"
1059 << "ExitCount:\t" << *ExitCount << "\n"
1060 << " was: " << *BI->getCondition() << "\n");
1061
1062 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1063 Value *OrigCond = BI->getCondition();
1064 // It's tempting to use replaceAllUsesWith here to fully replace the old
1065 // comparison, but that's not immediately safe, since users of the old
1066 // comparison may not be dominated by the new comparison. Instead, just
1067 // update the branch to use the new comparison; in the common case this
1068 // will make old comparison dead.
1069 BI->setCondition(Cond);
1070 DeadInsts.emplace_back(OrigCond);
1071
1072 ++NumLFTR;
1073 return true;
1074}
1075
1076//===----------------------------------------------------------------------===//
1077// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1078//===----------------------------------------------------------------------===//
1079
1080/// If there's a single exit block, sink any loop-invariant values that
1081/// were defined in the preheader but not used inside the loop into the
1082/// exit block to reduce register pressure in the loop.
1083bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1084 BasicBlock *ExitBlock = L->getExitBlock();
1085 if (!ExitBlock) return false;
1086
1087 BasicBlock *Preheader = L->getLoopPreheader();
1088 if (!Preheader) return false;
1089
1090 bool MadeAnyChanges = false;
1091 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1092 BasicBlock::iterator I(Preheader->getTerminator());
1093 while (I != Preheader->begin()) {
1094 --I;
1095 // New instructions were inserted at the end of the preheader.
1096 if (isa<PHINode>(I))
1097 break;
1098
1099 // Don't move instructions which might have side effects, since the side
1100 // effects need to complete before instructions inside the loop. Also don't
1101 // move instructions which might read memory, since the loop may modify
1102 // memory. Note that it's okay if the instruction might have undefined
1103 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1104 // block.
1105 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1106 continue;
1107
1108 // Skip debug info intrinsics.
1109 if (isa<DbgInfoIntrinsic>(I))
1110 continue;
1111
1112 // Skip eh pad instructions.
1113 if (I->isEHPad())
1114 continue;
1115
1116 // Don't sink alloca: we never want to sink static alloca's out of the
1117 // entry block, and correctly sinking dynamic alloca's requires
1118 // checks for stacksave/stackrestore intrinsics.
1119 // FIXME: Refactor this check somehow?
1120 if (isa<AllocaInst>(I))
1121 continue;
1122
1123 // Determine if there is a use in or before the loop (direct or
1124 // otherwise).
1125 bool UsedInLoop = false;
1126 for (Use &U : I->uses()) {
1127 Instruction *User = cast<Instruction>(U.getUser());
1128 BasicBlock *UseBB = User->getParent();
1129 if (PHINode *P = dyn_cast<PHINode>(User)) {
1130 unsigned i =
1132 UseBB = P->getIncomingBlock(i);
1133 }
1134 if (UseBB == Preheader || L->contains(UseBB)) {
1135 UsedInLoop = true;
1136 break;
1137 }
1138 }
1139
1140 // If there is, the def must remain in the preheader.
1141 if (UsedInLoop)
1142 continue;
1143
1144 // Otherwise, sink it to the exit block.
1145 Instruction *ToMove = &*I;
1146 bool Done = false;
1147
1148 if (I != Preheader->begin()) {
1149 // Skip debug info intrinsics.
1150 do {
1151 --I;
1152 } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1153
1154 if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1155 Done = true;
1156 } else {
1157 Done = true;
1158 }
1159
1160 MadeAnyChanges = true;
1161 ToMove->moveBefore(*ExitBlock, InsertPt);
1162 SE->forgetValue(ToMove);
1163 if (Done) break;
1164 InsertPt = ToMove->getIterator();
1165 }
1166
1167 return MadeAnyChanges;
1168}
1169
1170static void replaceExitCond(BranchInst *BI, Value *NewCond,
1172 auto *OldCond = BI->getCondition();
1173 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1174 << " with " << *NewCond << "\n");
1175 BI->setCondition(NewCond);
1176 if (OldCond->use_empty())
1177 DeadInsts.emplace_back(OldCond);
1178}
1179
1180static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1181 bool IsTaken) {
1182 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1183 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1184 auto *OldCond = BI->getCondition();
1185 return ConstantInt::get(OldCond->getType(),
1186 IsTaken ? ExitIfTrue : !ExitIfTrue);
1187}
1188
1189static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1191 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1192 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1193 replaceExitCond(BI, NewCond, DeadInsts);
1194}
1195
1197 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1198 ScalarEvolution &SE) {
1199 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1200 auto *LoopPreheader = L->getLoopPreheader();
1201 auto *LoopHeader = L->getHeader();
1203 for (auto &PN : LoopHeader->phis()) {
1204 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1205 for (User *U : PN.users())
1206 Worklist.push_back(cast<Instruction>(U));
1207 SE.forgetValue(&PN);
1208 PN.replaceAllUsesWith(PreheaderIncoming);
1209 DeadInsts.emplace_back(&PN);
1210 }
1211
1212 // Replacing with the preheader value will often allow IV users to simplify
1213 // (especially if the preheader value is a constant).
1215 while (!Worklist.empty()) {
1216 auto *I = cast<Instruction>(Worklist.pop_back_val());
1217 if (!Visited.insert(I).second)
1218 continue;
1219
1220 // Don't simplify instructions outside the loop.
1221 if (!L->contains(I))
1222 continue;
1223
1224 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1225 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1226 for (User *U : I->users())
1227 Worklist.push_back(cast<Instruction>(U));
1228 I->replaceAllUsesWith(Res);
1229 DeadInsts.emplace_back(I);
1230 }
1231 }
1232}
1233
1234static Value *
1237 SCEVExpander &Rewriter) {
1238 ICmpInst::Predicate InvariantPred = LIP.Pred;
1239 BasicBlock *Preheader = L->getLoopPreheader();
1240 assert(Preheader && "Preheader doesn't exist");
1241 Rewriter.setInsertPoint(Preheader->getTerminator());
1242 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1243 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1244 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1245 if (ExitIfTrue)
1246 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1247 IRBuilder<> Builder(Preheader->getTerminator());
1248 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1249 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1250 BI->getCondition()->getName());
1251}
1252
1253static std::optional<Value *>
1254createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1255 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1256 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1257 ICmpInst::Predicate Pred = ICmp->getPredicate();
1258 Value *LHS = ICmp->getOperand(0);
1259 Value *RHS = ICmp->getOperand(1);
1260
1261 // 'LHS pred RHS' should now mean that we stay in loop.
1262 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1263 if (Inverted)
1264 Pred = CmpInst::getInversePredicate(Pred);
1265
1266 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1267 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1268 // Can we prove it to be trivially true or false?
1269 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1270 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1271
1272 auto *ARTy = LHSS->getType();
1273 auto *MaxIterTy = MaxIter->getType();
1274 // If possible, adjust types.
1275 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1276 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1277 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1278 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1279 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1280 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1281 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1282 }
1283
1284 if (SkipLastIter) {
1285 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1286 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1287 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1288 // So we manually construct umin(a - 1, b - 1).
1290 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1291 for (auto *Op : UMin->operands())
1292 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1293 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1294 } else
1295 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1296 }
1297
1298 // Check if there is a loop-invariant predicate equivalent to our check.
1299 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1300 L, BI, MaxIter);
1301 if (!LIP)
1302 return std::nullopt;
1303
1304 // Can we prove it to be trivially true?
1305 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1306 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1307 else
1308 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1309}
1310
1312 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1313 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1315 assert(
1316 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1317 "Not a loop exit!");
1318
1319 // For branch that stays in loop by TRUE condition, go through AND. For branch
1320 // that stays in loop by FALSE condition, go through OR. Both gives the
1321 // similar logic: "stay in loop iff all conditions are true(false)".
1322 bool Inverted = L->contains(BI->getSuccessor(1));
1323 SmallVector<ICmpInst *, 4> LeafConditions;
1324 SmallVector<Value *, 4> Worklist;
1326 Value *OldCond = BI->getCondition();
1327 Visited.insert(OldCond);
1328 Worklist.push_back(OldCond);
1329
1330 auto GoThrough = [&](Value *V) {
1331 Value *LHS = nullptr, *RHS = nullptr;
1332 if (Inverted) {
1333 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1334 return false;
1335 } else {
1336 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1337 return false;
1338 }
1339 if (Visited.insert(LHS).second)
1340 Worklist.push_back(LHS);
1341 if (Visited.insert(RHS).second)
1342 Worklist.push_back(RHS);
1343 return true;
1344 };
1345
1346 do {
1347 Value *Curr = Worklist.pop_back_val();
1348 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1349 // those with one use, to avoid instruction duplication.
1350 if (Curr->hasOneUse())
1351 if (!GoThrough(Curr))
1352 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1353 LeafConditions.push_back(ICmp);
1354 } while (!Worklist.empty());
1355
1356 // If the current basic block has the same exit count as the whole loop, and
1357 // it consists of multiple icmp's, try to collect all icmp's that give exact
1358 // same exit count. For all other icmp's, we could use one less iteration,
1359 // because their value on the last iteration doesn't really matter.
1360 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1361 if (!SkipLastIter && LeafConditions.size() > 1 &&
1362 SE->getExitCount(L, ExitingBB,
1363 ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1364 MaxIter)
1365 for (auto *ICmp : LeafConditions) {
1366 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1367 /*ControlsExit*/ false);
1368 auto *ExitMax = EL.SymbolicMaxNotTaken;
1369 if (isa<SCEVCouldNotCompute>(ExitMax))
1370 continue;
1371 // They could be of different types (specifically this happens after
1372 // IV widening).
1373 auto *WiderType =
1374 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1375 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1376 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1377 if (WideExitMax == WideMaxIter)
1378 ICmpsFailingOnLastIter.insert(ICmp);
1379 }
1380
1381 bool Changed = false;
1382 for (auto *OldCond : LeafConditions) {
1383 // Skip last iteration for this icmp under one of two conditions:
1384 // - We do it for all conditions;
1385 // - There is another ICmp that would fail on last iter, so this one doesn't
1386 // really matter.
1387 bool OptimisticSkipLastIter = SkipLastIter;
1388 if (!OptimisticSkipLastIter) {
1389 if (ICmpsFailingOnLastIter.size() > 1)
1390 OptimisticSkipLastIter = true;
1391 else if (ICmpsFailingOnLastIter.size() == 1)
1392 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1393 }
1394 if (auto Replaced =
1395 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1396 OptimisticSkipLastIter, SE, Rewriter)) {
1397 Changed = true;
1398 auto *NewCond = *Replaced;
1399 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1400 NCI->setName(OldCond->getName() + ".first_iter");
1401 }
1402 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1403 << " with " << *NewCond << "\n");
1404 assert(OldCond->hasOneUse() && "Must be!");
1405 OldCond->replaceAllUsesWith(NewCond);
1406 DeadInsts.push_back(OldCond);
1407 // Make sure we no longer consider this condition as failing on last
1408 // iteration.
1409 ICmpsFailingOnLastIter.erase(OldCond);
1410 }
1411 }
1412 return Changed;
1413}
1414
1415bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1416 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1417 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1418 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1419 // it already has flags. The alternative to this would be to extending the
1420 // set of "interesting" IV users to include the icmp, but doing that
1421 // regresses results in practice by querying SCEVs before trip counts which
1422 // rely on them which results in SCEV caching sub-optimal answers. The
1423 // concern about caching sub-optimal results is why we only query SCEVs of
1424 // the loop invariant RHS here.
1425 SmallVector<BasicBlock*, 16> ExitingBlocks;
1426 L->getExitingBlocks(ExitingBlocks);
1427 bool Changed = false;
1428 for (auto *ExitingBB : ExitingBlocks) {
1429 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1430 if (!BI)
1431 continue;
1432 assert(BI->isConditional() && "exit branch must be conditional");
1433
1434 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1435 if (!ICmp || !ICmp->hasOneUse())
1436 continue;
1437
1438 auto *LHS = ICmp->getOperand(0);
1439 auto *RHS = ICmp->getOperand(1);
1440 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1441 // poisoning cache with sub-optimal results. For the must-execute case,
1442 // this is a neccessary precondition for correctness.
1443 if (!L->isLoopInvariant(RHS)) {
1444 if (!L->isLoopInvariant(LHS))
1445 continue;
1446 // Same logic applies for the inverse case
1447 std::swap(LHS, RHS);
1448 }
1449
1450 // Match (icmp signed-cond zext, RHS)
1451 Value *LHSOp = nullptr;
1452 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1453 continue;
1454
1455 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1456 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1457 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1458 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1459 FullCR = FullCR.zeroExtend(OuterBitWidth);
1460 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1461 if (FullCR.contains(RHSCR)) {
1462 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1463 // replace the signed condition with the unsigned version.
1464 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1465 Changed = true;
1466 // Note: No SCEV invalidation needed. We've changed the predicate, but
1467 // have not changed exit counts, or the values produced by the compare.
1468 continue;
1469 }
1470 }
1471
1472 // Now that we've canonicalized the condition to match the extend,
1473 // see if we can rotate the extend out of the loop.
1474 for (auto *ExitingBB : ExitingBlocks) {
1475 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1476 if (!BI)
1477 continue;
1478 assert(BI->isConditional() && "exit branch must be conditional");
1479
1480 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1481 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1482 continue;
1483
1484 bool Swapped = false;
1485 auto *LHS = ICmp->getOperand(0);
1486 auto *RHS = ICmp->getOperand(1);
1487 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1488 // Nothing to rotate
1489 continue;
1490 if (L->isLoopInvariant(LHS)) {
1491 // Same logic applies for the inverse case until we actually pick
1492 // which operand of the compare to update.
1493 Swapped = true;
1494 std::swap(LHS, RHS);
1495 }
1496 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1497
1498 // Match (icmp unsigned-cond zext, RHS)
1499 // TODO: Extend to handle corresponding sext/signed-cmp case
1500 // TODO: Extend to other invertible functions
1501 Value *LHSOp = nullptr;
1502 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1503 continue;
1504
1505 // In general, we only rotate if we can do so without increasing the number
1506 // of instructions. The exception is when we have an zext(add-rec). The
1507 // reason for allowing this exception is that we know we need to get rid
1508 // of the zext for SCEV to be able to compute a trip count for said loops;
1509 // we consider the new trip count valuable enough to increase instruction
1510 // count by one.
1511 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1512 continue;
1513
1514 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1515 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1516 // when zext is loop varying and RHS is loop invariant. This converts
1517 // loop varying work to loop-invariant work.
1518 auto doRotateTransform = [&]() {
1519 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1520 auto *NewRHS = CastInst::Create(
1521 Instruction::Trunc, RHS, LHSOp->getType(), "",
1522 L->getLoopPreheader()->getTerminator()->getIterator());
1523 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1524 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1525 if (LHS->use_empty())
1526 DeadInsts.push_back(LHS);
1527 };
1528
1529
1530 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1531 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1532 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1533 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1534 FullCR = FullCR.zeroExtend(OuterBitWidth);
1535 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1536 if (FullCR.contains(RHSCR)) {
1537 doRotateTransform();
1538 Changed = true;
1539 // Note, we are leaving SCEV in an unfortunately imprecise case here
1540 // as rotation tends to reveal information about trip counts not
1541 // previously visible.
1542 continue;
1543 }
1544 }
1545
1546 return Changed;
1547}
1548
1549bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1550 SmallVector<BasicBlock*, 16> ExitingBlocks;
1551 L->getExitingBlocks(ExitingBlocks);
1552
1553 // Remove all exits which aren't both rewriteable and execute on every
1554 // iteration.
1555 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1556 // If our exitting block exits multiple loops, we can only rewrite the
1557 // innermost one. Otherwise, we're changing how many times the innermost
1558 // loop runs before it exits.
1559 if (LI->getLoopFor(ExitingBB) != L)
1560 return true;
1561
1562 // Can't rewrite non-branch yet.
1563 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1564 if (!BI)
1565 return true;
1566
1567 // Likewise, the loop latch must be dominated by the exiting BB.
1568 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1569 return true;
1570
1571 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1572 // If already constant, nothing to do. However, if this is an
1573 // unconditional exit, we can still replace header phis with their
1574 // preheader value.
1575 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1576 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1577 return true;
1578 }
1579
1580 return false;
1581 });
1582
1583 if (ExitingBlocks.empty())
1584 return false;
1585
1586 // Get a symbolic upper bound on the loop backedge taken count.
1587 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1588 if (isa<SCEVCouldNotCompute>(MaxBECount))
1589 return false;
1590
1591 // Visit our exit blocks in order of dominance. We know from the fact that
1592 // all exits must dominate the latch, so there is a total dominance order
1593 // between them.
1594 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1595 // std::sort sorts in ascending order, so we want the inverse of
1596 // the normal dominance relation.
1597 if (A == B) return false;
1598 if (DT->properlyDominates(A, B))
1599 return true;
1600 else {
1601 assert(DT->properlyDominates(B, A) &&
1602 "expected total dominance order!");
1603 return false;
1604 }
1605 });
1606#ifdef ASSERT
1607 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1608 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1609 }
1610#endif
1611
1612 bool Changed = false;
1613 bool SkipLastIter = false;
1614 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1615 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1616 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1617 return;
1618 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1619 CurrMaxExit = MaxExitCount;
1620 else
1621 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1622 // If the loop has more than 1 iteration, all further checks will be
1623 // executed 1 iteration less.
1624 if (CurrMaxExit == MaxBECount)
1625 SkipLastIter = true;
1626 };
1627 SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1628 for (BasicBlock *ExitingBB : ExitingBlocks) {
1629 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1630 const SCEV *MaxExitCount = SE->getExitCount(
1631 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1632 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1633 // Okay, we do not know the exit count here. Can we at least prove that it
1634 // will remain the same within iteration space?
1635 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1636 auto OptimizeCond = [&](bool SkipLastIter) {
1637 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1638 MaxBECount, SkipLastIter,
1639 SE, Rewriter, DeadInsts);
1640 };
1641
1642 // TODO: We might have proved that we can skip the last iteration for
1643 // this check. In this case, we only want to check the condition on the
1644 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1645 // corner case:
1646 //
1647 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1648 //
1649 // If we could not prove that len != 0, then we also could not prove that
1650 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1651 // OptimizeCond will likely not prove anything for it, even if it could
1652 // prove the same fact for len.
1653 //
1654 // As a temporary solution, we query both last and pre-last iterations in
1655 // hope that we will be able to prove triviality for at least one of
1656 // them. We can stop querying MaxBECount for this case once SCEV
1657 // understands that (MaxBECount - 1) will not overflow here.
1658 if (OptimizeCond(false))
1659 Changed = true;
1660 else if (SkipLastIter && OptimizeCond(true))
1661 Changed = true;
1662 UpdateSkipLastIter(MaxExitCount);
1663 continue;
1664 }
1665
1666 UpdateSkipLastIter(ExactExitCount);
1667
1668 // If we know we'd exit on the first iteration, rewrite the exit to
1669 // reflect this. This does not imply the loop must exit through this
1670 // exit; there may be an earlier one taken on the first iteration.
1671 // We know that the backedge can't be taken, so we replace all
1672 // the header PHIs with values coming from the preheader.
1673 if (ExactExitCount->isZero()) {
1674 foldExit(L, ExitingBB, true, DeadInsts);
1675 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1676 Changed = true;
1677 continue;
1678 }
1679
1680 assert(ExactExitCount->getType()->isIntegerTy() &&
1681 MaxBECount->getType()->isIntegerTy() &&
1682 "Exit counts must be integers");
1683
1684 Type *WiderType =
1685 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1686 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1687 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1688 assert(MaxBECount->getType() == ExactExitCount->getType());
1689
1690 // Can we prove that some other exit must be taken strictly before this
1691 // one?
1692 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1693 ExactExitCount)) {
1694 foldExit(L, ExitingBB, false, DeadInsts);
1695 Changed = true;
1696 continue;
1697 }
1698
1699 // As we run, keep track of which exit counts we've encountered. If we
1700 // find a duplicate, we've found an exit which would have exited on the
1701 // exiting iteration, but (from the visit order) strictly follows another
1702 // which does the same and is thus dead.
1703 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1704 foldExit(L, ExitingBB, false, DeadInsts);
1705 Changed = true;
1706 continue;
1707 }
1708
1709 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1710 // here. If we kept track of the min of dominanting exits so far, we could
1711 // discharge exits with EC >= MDEC. This is less powerful than the existing
1712 // transform (since later exits aren't considered), but potentially more
1713 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1714 // or a >u b. Such a case is not currently known.
1715 }
1716 return Changed;
1717}
1718
1719bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1720 SmallVector<BasicBlock*, 16> ExitingBlocks;
1721 L->getExitingBlocks(ExitingBlocks);
1722
1723 // Finally, see if we can rewrite our exit conditions into a loop invariant
1724 // form. If we have a read-only loop, and we can tell that we must exit down
1725 // a path which does not need any of the values computed within the loop, we
1726 // can rewrite the loop to exit on the first iteration. Note that this
1727 // doesn't either a) tell us the loop exits on the first iteration (unless
1728 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1729 // This transformation looks a lot like a restricted form of dead loop
1730 // elimination, but restricted to read-only loops and without neccesssarily
1731 // needing to kill the loop entirely.
1732 if (!LoopPredication)
1733 return false;
1734
1735 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1736 // through *explicit* control flow. We have to eliminate the possibility of
1737 // implicit exits (see below) before we know it's truly exact.
1738 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1739 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1740 return false;
1741
1742 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1743 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1744
1745 auto BadExit = [&](BasicBlock *ExitingBB) {
1746 // If our exiting block exits multiple loops, we can only rewrite the
1747 // innermost one. Otherwise, we're changing how many times the innermost
1748 // loop runs before it exits.
1749 if (LI->getLoopFor(ExitingBB) != L)
1750 return true;
1751
1752 // Can't rewrite non-branch yet.
1753 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1754 if (!BI)
1755 return true;
1756
1757 // If already constant, nothing to do.
1758 if (isa<Constant>(BI->getCondition()))
1759 return true;
1760
1761 // If the exit block has phis, we need to be able to compute the values
1762 // within the loop which contains them. This assumes trivially lcssa phis
1763 // have already been removed; TODO: generalize
1764 BasicBlock *ExitBlock =
1765 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1766 if (!ExitBlock->phis().empty())
1767 return true;
1768
1769 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1770 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1771 !Rewriter.isSafeToExpand(ExitCount))
1772 return true;
1773
1774 assert(SE->isLoopInvariant(ExitCount, L) &&
1775 "Exit count must be loop invariant");
1776 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1777 return false;
1778 };
1779
1780 // If we have any exits which can't be predicated themselves, than we can't
1781 // predicate any exit which isn't guaranteed to execute before it. Consider
1782 // two exits (a) and (b) which would both exit on the same iteration. If we
1783 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1784 // we could convert a loop from exiting through (a) to one exiting through
1785 // (b). Note that this problem exists only for exits with the same exit
1786 // count, and we could be more aggressive when exit counts are known inequal.
1787 llvm::sort(ExitingBlocks,
1788 [&](BasicBlock *A, BasicBlock *B) {
1789 // std::sort sorts in ascending order, so we want the inverse of
1790 // the normal dominance relation, plus a tie breaker for blocks
1791 // unordered by dominance.
1792 if (DT->properlyDominates(A, B)) return true;
1793 if (DT->properlyDominates(B, A)) return false;
1794 return A->getName() < B->getName();
1795 });
1796 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1797 // exits before the backedge). If they aren't, reasoning about reachability
1798 // is complicated and we choose not to for now.
1799 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1800 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1801 return false;
1802
1803 // Given our sorted total order, we know that exit[j] must be evaluated
1804 // after all exit[i] such j > i.
1805 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1806 if (BadExit(ExitingBlocks[i])) {
1807 ExitingBlocks.resize(i);
1808 break;
1809 }
1810
1811 if (ExitingBlocks.empty())
1812 return false;
1813
1814 // We rely on not being able to reach an exiting block on a later iteration
1815 // then it's statically compute exit count. The implementaton of
1816 // getExitCount currently has this invariant, but assert it here so that
1817 // breakage is obvious if this ever changes..
1818 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1819 return DT->dominates(ExitingBB, L->getLoopLatch());
1820 }));
1821
1822 // At this point, ExitingBlocks consists of only those blocks which are
1823 // predicatable. Given that, we know we have at least one exit we can
1824 // predicate if the loop is doesn't have side effects and doesn't have any
1825 // implicit exits (because then our exact BTC isn't actually exact).
1826 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1827 // suggestions on how to improve this? I can obviously bail out for outer
1828 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1829 // is that enough for *all* side effects?
1830 for (BasicBlock *BB : L->blocks())
1831 for (auto &I : *BB)
1832 // TODO:isGuaranteedToTransfer
1833 if (I.mayHaveSideEffects())
1834 return false;
1835
1836 bool Changed = false;
1837 // Finally, do the actual predication for all predicatable blocks. A couple
1838 // of notes here:
1839 // 1) We don't bother to constant fold dominated exits with identical exit
1840 // counts; that's simply a form of CSE/equality propagation and we leave
1841 // it for dedicated passes.
1842 // 2) We insert the comparison at the branch. Hoisting introduces additional
1843 // legality constraints and we leave that to dedicated logic. We want to
1844 // predicate even if we can't insert a loop invariant expression as
1845 // peeling or unrolling will likely reduce the cost of the otherwise loop
1846 // varying check.
1847 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1848 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1849 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1850 for (BasicBlock *ExitingBB : ExitingBlocks) {
1851 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1852
1853 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1854 Value *NewCond;
1855 if (ExitCount == ExactBTC) {
1856 NewCond = L->contains(BI->getSuccessor(0)) ?
1857 B.getFalse() : B.getTrue();
1858 } else {
1859 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1860 if (!ExactBTCV)
1861 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1862 Value *RHS = ExactBTCV;
1863 if (ECV->getType() != RHS->getType()) {
1864 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1865 ECV = B.CreateZExt(ECV, WiderTy);
1866 RHS = B.CreateZExt(RHS, WiderTy);
1867 }
1868 auto Pred = L->contains(BI->getSuccessor(0)) ?
1869 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1870 NewCond = B.CreateICmp(Pred, ECV, RHS);
1871 }
1872 Value *OldCond = BI->getCondition();
1873 BI->setCondition(NewCond);
1874 if (OldCond->use_empty())
1875 DeadInsts.emplace_back(OldCond);
1876 Changed = true;
1877 }
1878
1879 return Changed;
1880}
1881
1882//===----------------------------------------------------------------------===//
1883// IndVarSimplify driver. Manage several subpasses of IV simplification.
1884//===----------------------------------------------------------------------===//
1885
1886bool IndVarSimplify::run(Loop *L) {
1887 // We need (and expect!) the incoming loop to be in LCSSA.
1888 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1889 "LCSSA required to run indvars!");
1890
1891 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1892 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1893 // canonicalization can be a pessimization without LSR to "clean up"
1894 // afterwards.
1895 // - We depend on having a preheader; in particular,
1896 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1897 // and we're in trouble if we can't find the induction variable even when
1898 // we've manually inserted one.
1899 // - LFTR relies on having a single backedge.
1900 if (!L->isLoopSimplifyForm())
1901 return false;
1902
1903 bool Changed = false;
1904 // If there are any floating-point recurrences, attempt to
1905 // transform them to use integer recurrences.
1906 Changed |= rewriteNonIntegerIVs(L);
1907
1908 // Create a rewriter object which we'll use to transform the code with.
1909 SCEVExpander Rewriter(*SE, DL, "indvars");
1910#ifndef NDEBUG
1911 Rewriter.setDebugType(DEBUG_TYPE);
1912#endif
1913
1914 // Eliminate redundant IV users.
1915 //
1916 // Simplification works best when run before other consumers of SCEV. We
1917 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1918 // other expressions involving loop IVs have been evaluated. This helps SCEV
1919 // set no-wrap flags before normalizing sign/zero extension.
1920 Rewriter.disableCanonicalMode();
1921 Changed |= simplifyAndExtend(L, Rewriter, LI);
1922
1923 // Check to see if we can compute the final value of any expressions
1924 // that are recurrent in the loop, and substitute the exit values from the
1925 // loop into any instructions outside of the loop that use the final values
1926 // of the current expressions.
1927 if (ReplaceExitValue != NeverRepl) {
1928 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1929 ReplaceExitValue, DeadInsts)) {
1930 NumReplaced += Rewrites;
1931 Changed = true;
1932 }
1933 }
1934
1935 // Eliminate redundant IV cycles.
1936 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1937
1938 // Try to convert exit conditions to unsigned and rotate computation
1939 // out of the loop. Note: Handles invalidation internally if needed.
1940 Changed |= canonicalizeExitCondition(L);
1941
1942 // Try to eliminate loop exits based on analyzeable exit counts
1943 if (optimizeLoopExits(L, Rewriter)) {
1944 Changed = true;
1945 // Given we've changed exit counts, notify SCEV
1946 // Some nested loops may share same folded exit basic block,
1947 // thus we need to notify top most loop.
1948 SE->forgetTopmostLoop(L);
1949 }
1950
1951 // Try to form loop invariant tests for loop exits by changing how many
1952 // iterations of the loop run when that is unobservable.
1953 if (predicateLoopExits(L, Rewriter)) {
1954 Changed = true;
1955 // Given we've changed exit counts, notify SCEV
1956 SE->forgetLoop(L);
1957 }
1958
1959 // If we have a trip count expression, rewrite the loop's exit condition
1960 // using it.
1961 if (!DisableLFTR) {
1962 BasicBlock *PreHeader = L->getLoopPreheader();
1963
1964 SmallVector<BasicBlock*, 16> ExitingBlocks;
1965 L->getExitingBlocks(ExitingBlocks);
1966 for (BasicBlock *ExitingBB : ExitingBlocks) {
1967 // Can't rewrite non-branch yet.
1968 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1969 continue;
1970
1971 // If our exitting block exits multiple loops, we can only rewrite the
1972 // innermost one. Otherwise, we're changing how many times the innermost
1973 // loop runs before it exits.
1974 if (LI->getLoopFor(ExitingBB) != L)
1975 continue;
1976
1977 if (!needsLFTR(L, ExitingBB))
1978 continue;
1979
1980 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1981 if (isa<SCEVCouldNotCompute>(ExitCount))
1982 continue;
1983
1984 // This was handled above, but as we form SCEVs, we can sometimes refine
1985 // existing ones; this allows exit counts to be folded to zero which
1986 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1987 // until stable to handle cases like this better.
1988 if (ExitCount->isZero())
1989 continue;
1990
1991 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1992 if (!IndVar)
1993 continue;
1994
1995 // Avoid high cost expansions. Note: This heuristic is questionable in
1996 // that our definition of "high cost" is not exactly principled.
1997 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1998 TTI, PreHeader->getTerminator()))
1999 continue;
2000
2001 if (!Rewriter.isSafeToExpand(ExitCount))
2002 continue;
2003
2004 Changed |= linearFunctionTestReplace(L, ExitingBB,
2005 ExitCount, IndVar,
2006 Rewriter);
2007 }
2008 }
2009 // Clear the rewriter cache, because values that are in the rewriter's cache
2010 // can be deleted in the loop below, causing the AssertingVH in the cache to
2011 // trigger.
2012 Rewriter.clear();
2013
2014 // Now that we're done iterating through lists, clean up any instructions
2015 // which are now dead.
2016 while (!DeadInsts.empty()) {
2017 Value *V = DeadInsts.pop_back_val();
2018
2019 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2020 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2021 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2022 Changed |=
2023 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2024 }
2025
2026 // The Rewriter may not be used from this point on.
2027
2028 // Loop-invariant instructions in the preheader that aren't used in the
2029 // loop may be sunk below the loop to reduce register pressure.
2030 Changed |= sinkUnusedInvariants(L);
2031
2032 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2033 // trip count and therefore can further simplify exit values in addition to
2034 // rewriteLoopExitValues.
2035 Changed |= rewriteFirstIterationLoopExitValues(L);
2036
2037 // Clean up dead instructions.
2038 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2039
2040 // Check a post-condition.
2041 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2042 "Indvars did not preserve LCSSA!");
2043 if (VerifyMemorySSA && MSSAU)
2044 MSSAU->getMemorySSA()->verifyMemorySSA();
2045
2046 return Changed;
2047}
2048
2051 LPMUpdater &) {
2052 Function *F = L.getHeader()->getParent();
2053 const DataLayout &DL = F->getParent()->getDataLayout();
2054
2055 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2056 WidenIndVars && AllowIVWidening);
2057 if (!IVS.run(&L))
2058 return PreservedAnalyses::all();
2059
2060 auto PA = getLoopPassPreservedAnalyses();
2061 PA.preserveSet<CFGAnalyses>();
2062 if (AR.MSSA)
2063 PA.preserve<MemorySSAAnalysis>();
2064 return PA;
2065}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
#define DEBUG_TYPE
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
if(VerifyEach)
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:276
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:908
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:968
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:994
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:995
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:971
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:980
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:969
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:970
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:992
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:979
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:973
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:976
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:990
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:977
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:972
@ ICMP_EQ
equal
Definition: InstrTypes.h:986
@ ICMP_NE
not equal
Definition: InstrTypes.h:987
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:993
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:981
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:978
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1090
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1066
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:267
const APFloat & getValueAPF() const
Definition: Constants.h:310
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2334
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void visitCast(CastInst *Cast)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:151
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Class to represent integer types.
Definition: DerivedTypes.h:40
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
Type * getWiderType(Type *Ty1, Type *Ty2) const
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
std::optional< bool > evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
self_iterator getIterator()
Definition: ilist_node.h:109
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1017
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
@ Done
Definition: Threading.h:61
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
TargetTransformInfo TTI
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:469
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1416
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:644
@ UnusedIndVarInLoop
Definition: LoopUtils.h:465
@ OnlyCheapRepl
Definition: LoopUtils.h:463
@ NeverRepl
Definition: LoopUtils.h:462
@ NoHardUse
Definition: LoopUtils.h:464
@ AlwaysRepl
Definition: LoopUtils.h:466
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Collect information about induction variables that are used by sign/zero extend operations.
PHINode * NarrowIV