LLVM 17.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PassManager.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
68#include "llvm/Pass.h"
72#include "llvm/Support/Debug.h"
81#include <cassert>
82#include <cstdint>
83#include <utility>
84
85using namespace llvm;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "indvars"
89
90STATISTIC(NumWidened , "Number of indvars widened");
91STATISTIC(NumReplaced , "Number of exit values replaced");
92STATISTIC(NumLFTR , "Number of loop exit tests replaced");
93STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
94STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
95
97 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
98 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
100 clEnumValN(NeverRepl, "never", "never replace exit value"),
101 clEnumValN(OnlyCheapRepl, "cheap",
102 "only replace exit value when the cost is cheap"),
104 UnusedIndVarInLoop, "unusedindvarinloop",
105 "only replace exit value when it is an unused "
106 "induction variable in the loop and has cheap replacement cost"),
107 clEnumValN(NoHardUse, "noharduse",
108 "only replace exit values when loop def likely dead"),
109 clEnumValN(AlwaysRepl, "always",
110 "always replace exit value whenever possible")));
111
113 "indvars-post-increment-ranges", cl::Hidden,
114 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
115 cl::init(true));
116
117static cl::opt<bool>
118DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
119 cl::desc("Disable Linear Function Test Replace optimization"));
120
121static cl::opt<bool>
122LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
123 cl::desc("Predicate conditions in read only loops"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
141 bool WidenIndVars;
142
143 bool handleFloatingPointIV(Loop *L, PHINode *PH);
144 bool rewriteNonIntegerIVs(Loop *L);
145
146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147 /// Try to improve our exit conditions by converting condition from signed
148 /// to unsigned or rotating computation out of the loop.
149 /// (See inline comment about why this is duplicated from simplifyAndExtend)
150 bool canonicalizeExitCondition(Loop *L);
151 /// Try to eliminate loop exits based on analyzeable exit counts
152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153 /// Try to form loop invariant tests for loop exits by changing how many
154 /// iterations of the loop run when that is unobservable.
155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156
157 bool rewriteFirstIterationLoopExitValues(Loop *L);
158
159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160 const SCEV *ExitCount,
161 PHINode *IndVar, SCEVExpander &Rewriter);
162
163 bool sinkUnusedInvariants(Loop *L);
164
165public:
166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167 const DataLayout &DL, TargetLibraryInfo *TLI,
168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170 WidenIndVars(WidenIndVars) {
171 if (MSSA)
172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173 }
174
175 bool run(Loop *L);
176};
177
178} // end anonymous namespace
179
180//===----------------------------------------------------------------------===//
181// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
182//===----------------------------------------------------------------------===//
183
184/// Convert APF to an integer, if possible.
185static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
186 bool isExact = false;
187 // See if we can convert this to an int64_t
188 uint64_t UIntVal;
189 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
190 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
191 !isExact)
192 return false;
193 IntVal = UIntVal;
194 return true;
195}
196
197/// If the loop has floating induction variable then insert corresponding
198/// integer induction variable if possible.
199/// For example,
200/// for(double i = 0; i < 10000; ++i)
201/// bar(i)
202/// is converted into
203/// for(int i = 0; i < 10000; ++i)
204/// bar((double)i);
205bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
206 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
207 unsigned BackEdge = IncomingEdge^1;
208
209 // Check incoming value.
210 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
211
212 int64_t InitValue;
213 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
214 return false;
215
216 // Check IV increment. Reject this PN if increment operation is not
217 // an add or increment value can not be represented by an integer.
218 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
219 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
220
221 // If this is not an add of the PHI with a constantfp, or if the constant fp
222 // is not an integer, bail out.
223 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
224 int64_t IncValue;
225 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
226 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
227 return false;
228
229 // Check Incr uses. One user is PN and the other user is an exit condition
230 // used by the conditional terminator.
231 Value::user_iterator IncrUse = Incr->user_begin();
232 Instruction *U1 = cast<Instruction>(*IncrUse++);
233 if (IncrUse == Incr->user_end()) return false;
234 Instruction *U2 = cast<Instruction>(*IncrUse++);
235 if (IncrUse != Incr->user_end()) return false;
236
237 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
238 // only used by a branch, we can't transform it.
239 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
240 if (!Compare)
241 Compare = dyn_cast<FCmpInst>(U2);
242 if (!Compare || !Compare->hasOneUse() ||
243 !isa<BranchInst>(Compare->user_back()))
244 return false;
245
246 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
247
248 // We need to verify that the branch actually controls the iteration count
249 // of the loop. If not, the new IV can overflow and no one will notice.
250 // The branch block must be in the loop and one of the successors must be out
251 // of the loop.
252 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
253 if (!L->contains(TheBr->getParent()) ||
254 (L->contains(TheBr->getSuccessor(0)) &&
255 L->contains(TheBr->getSuccessor(1))))
256 return false;
257
258 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
259 // transform it.
260 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
261 int64_t ExitValue;
262 if (ExitValueVal == nullptr ||
263 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
264 return false;
265
266 // Find new predicate for integer comparison.
268 switch (Compare->getPredicate()) {
269 default: return false; // Unknown comparison.
271 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
273 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
275 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
277 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
279 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
281 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
282 }
283
284 // We convert the floating point induction variable to a signed i32 value if
285 // we can. This is only safe if the comparison will not overflow in a way
286 // that won't be trapped by the integer equivalent operations. Check for this
287 // now.
288 // TODO: We could use i64 if it is native and the range requires it.
289
290 // The start/stride/exit values must all fit in signed i32.
291 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
292 return false;
293
294 // If not actually striding (add x, 0.0), avoid touching the code.
295 if (IncValue == 0)
296 return false;
297
298 // Positive and negative strides have different safety conditions.
299 if (IncValue > 0) {
300 // If we have a positive stride, we require the init to be less than the
301 // exit value.
302 if (InitValue >= ExitValue)
303 return false;
304
305 uint32_t Range = uint32_t(ExitValue-InitValue);
306 // Check for infinite loop, either:
307 // while (i <= Exit) or until (i > Exit)
308 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
309 if (++Range == 0) return false; // Range overflows.
310 }
311
312 unsigned Leftover = Range % uint32_t(IncValue);
313
314 // If this is an equality comparison, we require that the strided value
315 // exactly land on the exit value, otherwise the IV condition will wrap
316 // around and do things the fp IV wouldn't.
317 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
318 Leftover != 0)
319 return false;
320
321 // If the stride would wrap around the i32 before exiting, we can't
322 // transform the IV.
323 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
324 return false;
325 } else {
326 // If we have a negative stride, we require the init to be greater than the
327 // exit value.
328 if (InitValue <= ExitValue)
329 return false;
330
331 uint32_t Range = uint32_t(InitValue-ExitValue);
332 // Check for infinite loop, either:
333 // while (i >= Exit) or until (i < Exit)
334 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
335 if (++Range == 0) return false; // Range overflows.
336 }
337
338 unsigned Leftover = Range % uint32_t(-IncValue);
339
340 // If this is an equality comparison, we require that the strided value
341 // exactly land on the exit value, otherwise the IV condition will wrap
342 // around and do things the fp IV wouldn't.
343 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
344 Leftover != 0)
345 return false;
346
347 // If the stride would wrap around the i32 before exiting, we can't
348 // transform the IV.
349 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
350 return false;
351 }
352
354
355 // Insert new integer induction variable.
356 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
357 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
358 PN->getIncomingBlock(IncomingEdge));
359
360 Value *NewAdd =
361 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
362 Incr->getName()+".int", Incr);
363 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
364
365 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
366 ConstantInt::get(Int32Ty, ExitValue),
367 Compare->getName());
368
369 // In the following deletions, PN may become dead and may be deleted.
370 // Use a WeakTrackingVH to observe whether this happens.
371 WeakTrackingVH WeakPH = PN;
372
373 // Delete the old floating point exit comparison. The branch starts using the
374 // new comparison.
375 NewCompare->takeName(Compare);
376 Compare->replaceAllUsesWith(NewCompare);
377 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
378
379 // Delete the old floating point increment.
380 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
381 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
382
383 // If the FP induction variable still has uses, this is because something else
384 // in the loop uses its value. In order to canonicalize the induction
385 // variable, we chose to eliminate the IV and rewrite it in terms of an
386 // int->fp cast.
387 //
388 // We give preference to sitofp over uitofp because it is faster on most
389 // platforms.
390 if (WeakPH) {
391 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
392 &*PN->getParent()->getFirstInsertionPt());
393 PN->replaceAllUsesWith(Conv);
394 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
395 }
396 return true;
397}
398
399bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
400 // First step. Check to see if there are any floating-point recurrences.
401 // If there are, change them into integer recurrences, permitting analysis by
402 // the SCEV routines.
403 BasicBlock *Header = L->getHeader();
404
406 for (PHINode &PN : Header->phis())
407 PHIs.push_back(&PN);
408
409 bool Changed = false;
410 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
411 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
412 Changed |= handleFloatingPointIV(L, PN);
413
414 // If the loop previously had floating-point IV, ScalarEvolution
415 // may not have been able to compute a trip count. Now that we've done some
416 // re-writing, the trip count may be computable.
417 if (Changed)
418 SE->forgetLoop(L);
419 return Changed;
420}
421
422//===---------------------------------------------------------------------===//
423// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
424// they will exit at the first iteration.
425//===---------------------------------------------------------------------===//
426
427/// Check to see if this loop has loop invariant conditions which lead to loop
428/// exits. If so, we know that if the exit path is taken, it is at the first
429/// loop iteration. This lets us predict exit values of PHI nodes that live in
430/// loop header.
431bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
432 // Verify the input to the pass is already in LCSSA form.
433 assert(L->isLCSSAForm(*DT));
434
436 L->getUniqueExitBlocks(ExitBlocks);
437
438 bool MadeAnyChanges = false;
439 for (auto *ExitBB : ExitBlocks) {
440 // If there are no more PHI nodes in this exit block, then no more
441 // values defined inside the loop are used on this path.
442 for (PHINode &PN : ExitBB->phis()) {
443 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
444 IncomingValIdx != E; ++IncomingValIdx) {
445 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
446
447 // Can we prove that the exit must run on the first iteration if it
448 // runs at all? (i.e. early exits are fine for our purposes, but
449 // traces which lead to this exit being taken on the 2nd iteration
450 // aren't.) Note that this is about whether the exit branch is
451 // executed, not about whether it is taken.
452 if (!L->getLoopLatch() ||
453 !DT->dominates(IncomingBB, L->getLoopLatch()))
454 continue;
455
456 // Get condition that leads to the exit path.
457 auto *TermInst = IncomingBB->getTerminator();
458
459 Value *Cond = nullptr;
460 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
461 // Must be a conditional branch, otherwise the block
462 // should not be in the loop.
463 Cond = BI->getCondition();
464 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
465 Cond = SI->getCondition();
466 else
467 continue;
468
469 if (!L->isLoopInvariant(Cond))
470 continue;
471
472 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
473
474 // Only deal with PHIs in the loop header.
475 if (!ExitVal || ExitVal->getParent() != L->getHeader())
476 continue;
477
478 // If ExitVal is a PHI on the loop header, then we know its
479 // value along this exit because the exit can only be taken
480 // on the first iteration.
481 auto *LoopPreheader = L->getLoopPreheader();
482 assert(LoopPreheader && "Invalid loop");
483 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
484 if (PreheaderIdx != -1) {
485 assert(ExitVal->getParent() == L->getHeader() &&
486 "ExitVal must be in loop header");
487 MadeAnyChanges = true;
488 PN.setIncomingValue(IncomingValIdx,
489 ExitVal->getIncomingValue(PreheaderIdx));
490 SE->forgetValue(&PN);
491 }
492 }
493 }
494 }
495 return MadeAnyChanges;
496}
497
498//===----------------------------------------------------------------------===//
499// IV Widening - Extend the width of an IV to cover its widest uses.
500//===----------------------------------------------------------------------===//
501
502/// Update information about the induction variable that is extended by this
503/// sign or zero extend operation. This is used to determine the final width of
504/// the IV before actually widening it.
505static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
506 ScalarEvolution *SE,
507 const TargetTransformInfo *TTI) {
508 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
509 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
510 return;
511
512 Type *Ty = Cast->getType();
513 uint64_t Width = SE->getTypeSizeInBits(Ty);
514 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
515 return;
516
517 // Check that `Cast` actually extends the induction variable (we rely on this
518 // later). This takes care of cases where `Cast` is extending a truncation of
519 // the narrow induction variable, and thus can end up being narrower than the
520 // "narrow" induction variable.
521 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
522 if (NarrowIVWidth >= Width)
523 return;
524
525 // Cast is either an sext or zext up to this point.
526 // We should not widen an indvar if arithmetics on the wider indvar are more
527 // expensive than those on the narrower indvar. We check only the cost of ADD
528 // because at least an ADD is required to increment the induction variable. We
529 // could compute more comprehensively the cost of all instructions on the
530 // induction variable when necessary.
531 if (TTI &&
532 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
533 TTI->getArithmeticInstrCost(Instruction::Add,
534 Cast->getOperand(0)->getType())) {
535 return;
536 }
537
538 if (!WI.WidestNativeType ||
539 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
541 WI.IsSigned = IsSigned;
542 return;
543 }
544
545 // We extend the IV to satisfy the sign of its user(s), or 'signed'
546 // if there are multiple users with both sign- and zero extensions,
547 // in order not to introduce nondeterministic behaviour based on the
548 // unspecified order of a PHI nodes' users-iterator.
549 WI.IsSigned |= IsSigned;
550}
551
552//===----------------------------------------------------------------------===//
553// Live IV Reduction - Minimize IVs live across the loop.
554//===----------------------------------------------------------------------===//
555
556//===----------------------------------------------------------------------===//
557// Simplification of IV users based on SCEV evaluation.
558//===----------------------------------------------------------------------===//
559
560namespace {
561
562class IndVarSimplifyVisitor : public IVVisitor {
563 ScalarEvolution *SE;
565 PHINode *IVPhi;
566
567public:
568 WideIVInfo WI;
569
570 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
572 const DominatorTree *DTree)
573 : SE(SCEV), TTI(TTI), IVPhi(IV) {
574 DT = DTree;
575 WI.NarrowIV = IVPhi;
576 }
577
578 // Implement the interface used by simplifyUsersOfIV.
579 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
580};
581
582} // end anonymous namespace
583
584/// Iteratively perform simplification on a worklist of IV users. Each
585/// successive simplification may push more users which may themselves be
586/// candidates for simplification.
587///
588/// Sign/Zero extend elimination is interleaved with IV simplification.
589bool IndVarSimplify::simplifyAndExtend(Loop *L,
590 SCEVExpander &Rewriter,
591 LoopInfo *LI) {
593
594 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
595 Intrinsic::getName(Intrinsic::experimental_guard));
596 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
597
599 for (PHINode &PN : L->getHeader()->phis())
600 LoopPhis.push_back(&PN);
601
602 // Each round of simplification iterates through the SimplifyIVUsers worklist
603 // for all current phis, then determines whether any IVs can be
604 // widened. Widening adds new phis to LoopPhis, inducing another round of
605 // simplification on the wide IVs.
606 bool Changed = false;
607 while (!LoopPhis.empty()) {
608 // Evaluate as many IV expressions as possible before widening any IVs. This
609 // forces SCEV to set no-wrap flags before evaluating sign/zero
610 // extension. The first time SCEV attempts to normalize sign/zero extension,
611 // the result becomes final. So for the most predictable results, we delay
612 // evaluation of sign/zero extend evaluation until needed, and avoid running
613 // other SCEV based analysis prior to simplifyAndExtend.
614 do {
615 PHINode *CurrIV = LoopPhis.pop_back_val();
616
617 // Information about sign/zero extensions of CurrIV.
618 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
619
620 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
621 &Visitor);
622
623 if (Visitor.WI.WidestNativeType) {
624 WideIVs.push_back(Visitor.WI);
625 }
626 } while(!LoopPhis.empty());
627
628 // Continue if we disallowed widening.
629 if (!WidenIndVars)
630 continue;
631
632 for (; !WideIVs.empty(); WideIVs.pop_back()) {
633 unsigned ElimExt;
634 unsigned Widened;
635 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
636 DT, DeadInsts, ElimExt, Widened,
637 HasGuards, UsePostIncrementRanges)) {
638 NumElimExt += ElimExt;
639 NumWidened += Widened;
640 Changed = true;
641 LoopPhis.push_back(WidePhi);
642 }
643 }
644 }
645 return Changed;
646}
647
648//===----------------------------------------------------------------------===//
649// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
650//===----------------------------------------------------------------------===//
651
652/// Given an Value which is hoped to be part of an add recurance in the given
653/// loop, return the associated Phi node if so. Otherwise, return null. Note
654/// that this is less general than SCEVs AddRec checking.
656 Instruction *IncI = dyn_cast<Instruction>(IncV);
657 if (!IncI)
658 return nullptr;
659
660 switch (IncI->getOpcode()) {
661 case Instruction::Add:
662 case Instruction::Sub:
663 break;
664 case Instruction::GetElementPtr:
665 // An IV counter must preserve its type.
666 if (IncI->getNumOperands() == 2)
667 break;
668 [[fallthrough]];
669 default:
670 return nullptr;
671 }
672
673 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
674 if (Phi && Phi->getParent() == L->getHeader()) {
675 if (L->isLoopInvariant(IncI->getOperand(1)))
676 return Phi;
677 return nullptr;
678 }
679 if (IncI->getOpcode() == Instruction::GetElementPtr)
680 return nullptr;
681
682 // Allow add/sub to be commuted.
683 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
684 if (Phi && Phi->getParent() == L->getHeader()) {
685 if (L->isLoopInvariant(IncI->getOperand(0)))
686 return Phi;
687 }
688 return nullptr;
689}
690
691/// Whether the current loop exit test is based on this value. Currently this
692/// is limited to a direct use in the loop condition.
693static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
694 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
695 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
696 // TODO: Allow non-icmp loop test.
697 if (!ICmp)
698 return false;
699
700 // TODO: Allow indirect use.
701 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
702}
703
704/// linearFunctionTestReplace policy. Return true unless we can show that the
705/// current exit test is already sufficiently canonical.
706static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
707 assert(L->getLoopLatch() && "Must be in simplified form");
708
709 // Avoid converting a constant or loop invariant test back to a runtime
710 // test. This is critical for when SCEV's cached ExitCount is less precise
711 // than the current IR (such as after we've proven a particular exit is
712 // actually dead and thus the BE count never reaches our ExitCount.)
713 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
714 if (L->isLoopInvariant(BI->getCondition()))
715 return false;
716
717 // Do LFTR to simplify the exit condition to an ICMP.
718 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
719 if (!Cond)
720 return true;
721
722 // Do LFTR to simplify the exit ICMP to EQ/NE
723 ICmpInst::Predicate Pred = Cond->getPredicate();
724 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
725 return true;
726
727 // Look for a loop invariant RHS
728 Value *LHS = Cond->getOperand(0);
729 Value *RHS = Cond->getOperand(1);
730 if (!L->isLoopInvariant(RHS)) {
731 if (!L->isLoopInvariant(LHS))
732 return true;
733 std::swap(LHS, RHS);
734 }
735 // Look for a simple IV counter LHS
736 PHINode *Phi = dyn_cast<PHINode>(LHS);
737 if (!Phi)
738 Phi = getLoopPhiForCounter(LHS, L);
739
740 if (!Phi)
741 return true;
742
743 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
744 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
745 if (Idx < 0)
746 return true;
747
748 // Do LFTR if the exit condition's IV is *not* a simple counter.
749 Value *IncV = Phi->getIncomingValue(Idx);
750 return Phi != getLoopPhiForCounter(IncV, L);
751}
752
753/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
754/// down to checking that all operands are constant and listing instructions
755/// that may hide undef.
757 unsigned Depth) {
758 if (isa<Constant>(V))
759 return !isa<UndefValue>(V);
760
761 if (Depth >= 6)
762 return false;
763
764 // Conservatively handle non-constant non-instructions. For example, Arguments
765 // may be undef.
766 Instruction *I = dyn_cast<Instruction>(V);
767 if (!I)
768 return false;
769
770 // Load and return values may be undef.
771 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
772 return false;
773
774 // Optimistically handle other instructions.
775 for (Value *Op : I->operands()) {
776 if (!Visited.insert(Op).second)
777 continue;
778 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
779 return false;
780 }
781 return true;
782}
783
784/// Return true if the given value is concrete. We must prove that undef can
785/// never reach it.
786///
787/// TODO: If we decide that this is a good approach to checking for undef, we
788/// may factor it into a common location.
789static bool hasConcreteDef(Value *V) {
791 Visited.insert(V);
792 return hasConcreteDefImpl(V, Visited, 0);
793}
794
795/// Return true if the given phi is a "counter" in L. A counter is an
796/// add recurance (of integer or pointer type) with an arbitrary start, and a
797/// step of 1. Note that L must have exactly one latch.
798static bool isLoopCounter(PHINode* Phi, Loop *L,
799 ScalarEvolution *SE) {
800 assert(Phi->getParent() == L->getHeader());
801 assert(L->getLoopLatch());
802
803 if (!SE->isSCEVable(Phi->getType()))
804 return false;
805
806 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
807 if (!AR || AR->getLoop() != L || !AR->isAffine())
808 return false;
809
810 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
811 if (!Step || !Step->isOne())
812 return false;
813
814 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
815 Value *IncV = Phi->getIncomingValue(LatchIdx);
816 return (getLoopPhiForCounter(IncV, L) == Phi &&
817 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
818}
819
820/// Search the loop header for a loop counter (anadd rec w/step of one)
821/// suitable for use by LFTR. If multiple counters are available, select the
822/// "best" one based profitable heuristics.
823///
824/// BECount may be an i8* pointer type. The pointer difference is already
825/// valid count without scaling the address stride, so it remains a pointer
826/// expression as far as SCEV is concerned.
827static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
828 const SCEV *BECount,
830 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
831
832 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
833
834 // Loop over all of the PHI nodes, looking for a simple counter.
835 PHINode *BestPhi = nullptr;
836 const SCEV *BestInit = nullptr;
837 BasicBlock *LatchBlock = L->getLoopLatch();
838 assert(LatchBlock && "Must be in simplified form");
839 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
840
841 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
842 PHINode *Phi = cast<PHINode>(I);
843 if (!isLoopCounter(Phi, L, SE))
844 continue;
845
846 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
847
848 // AR may be a pointer type, while BECount is an integer type.
849 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
850 // AR may not be a narrower type, or we may never exit.
851 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
852 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
853 continue;
854
855 // Avoid reusing a potentially undef value to compute other values that may
856 // have originally had a concrete definition.
857 if (!hasConcreteDef(Phi)) {
858 // We explicitly allow unknown phis as long as they are already used by
859 // the loop exit test. This is legal since performing LFTR could not
860 // increase the number of undef users.
861 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
862 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
863 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
864 continue;
865 }
866
867 // Avoid introducing undefined behavior due to poison which didn't exist in
868 // the original program. (Annoyingly, the rules for poison and undef
869 // propagation are distinct, so this does NOT cover the undef case above.)
870 // We have to ensure that we don't introduce UB by introducing a use on an
871 // iteration where said IV produces poison. Our strategy here differs for
872 // pointers and integer IVs. For integers, we strip and reinfer as needed,
873 // see code in linearFunctionTestReplace. For pointers, we restrict
874 // transforms as there is no good way to reinfer inbounds once lost.
875 if (!Phi->getType()->isIntegerTy() &&
876 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
877 continue;
878
879 const SCEV *Init = AR->getStart();
880
881 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
882 // Don't force a live loop counter if another IV can be used.
883 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
884 continue;
885
886 // Prefer to count-from-zero. This is a more "canonical" counter form. It
887 // also prefers integer to pointer IVs.
888 if (BestInit->isZero() != Init->isZero()) {
889 if (BestInit->isZero())
890 continue;
891 }
892 // If two IVs both count from zero or both count from nonzero then the
893 // narrower is likely a dead phi that has been widened. Use the wider phi
894 // to allow the other to be eliminated.
895 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
896 continue;
897 }
898 BestPhi = Phi;
899 BestInit = Init;
900 }
901 return BestPhi;
902}
903
904/// Insert an IR expression which computes the value held by the IV IndVar
905/// (which must be an loop counter w/unit stride) after the backedge of loop L
906/// is taken ExitCount times.
907static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
908 const SCEV *ExitCount, bool UsePostInc, Loop *L,
909 SCEVExpander &Rewriter, ScalarEvolution *SE) {
910 assert(isLoopCounter(IndVar, L, SE));
911 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
912 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
913 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
914
915 // For integer IVs, truncate the IV before computing the limit unless we
916 // know apriori that the limit must be a constant when evaluated in the
917 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
918 // IV in the loop over a (potentially) expensive expansion of the widened
919 // exit count add(zext(add)) expression.
920 if (IndVar->getType()->isIntegerTy() &&
921 SE->getTypeSizeInBits(AR->getType()) >
922 SE->getTypeSizeInBits(ExitCount->getType())) {
923 const SCEV *IVInit = AR->getStart();
924 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
925 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
926 }
927
928 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
929 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
930 assert(SE->isLoopInvariant(IVLimit, L) &&
931 "Computed iteration count is not loop invariant!");
932 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
933 ExitingBB->getTerminator());
934}
935
936/// This method rewrites the exit condition of the loop to be a canonical !=
937/// comparison against the incremented loop induction variable. This pass is
938/// able to rewrite the exit tests of any loop where the SCEV analysis can
939/// determine a loop-invariant trip count of the loop, which is actually a much
940/// broader range than just linear tests.
941bool IndVarSimplify::
942linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
943 const SCEV *ExitCount,
944 PHINode *IndVar, SCEVExpander &Rewriter) {
945 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
946 assert(isLoopCounter(IndVar, L, SE));
947 Instruction * const IncVar =
948 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
949
950 // Initialize CmpIndVar to the preincremented IV.
951 Value *CmpIndVar = IndVar;
952 bool UsePostInc = false;
953
954 // If the exiting block is the same as the backedge block, we prefer to
955 // compare against the post-incremented value, otherwise we must compare
956 // against the preincremented value.
957 if (ExitingBB == L->getLoopLatch()) {
958 // For pointer IVs, we chose to not strip inbounds which requires us not
959 // to add a potentially UB introducing use. We need to either a) show
960 // the loop test we're modifying is already in post-inc form, or b) show
961 // that adding a use must not introduce UB.
962 bool SafeToPostInc =
963 IndVar->getType()->isIntegerTy() ||
964 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
965 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
966 if (SafeToPostInc) {
967 UsePostInc = true;
968 CmpIndVar = IncVar;
969 }
970 }
971
972 // It may be necessary to drop nowrap flags on the incrementing instruction
973 // if either LFTR moves from a pre-inc check to a post-inc check (in which
974 // case the increment might have previously been poison on the last iteration
975 // only) or if LFTR switches to a different IV that was previously dynamically
976 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
977 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
978 // check), because the pre-inc addrec flags may be adopted from the original
979 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
980 // TODO: This handling is inaccurate for one case: If we switch to a
981 // dynamically dead IV that wraps on the first loop iteration only, which is
982 // not covered by the post-inc addrec. (If the new IV was not dynamically
983 // dead, it could not be poison on the first iteration in the first place.)
984 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
985 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
986 if (BO->hasNoUnsignedWrap())
987 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
988 if (BO->hasNoSignedWrap())
989 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
990 }
991
992 Value *ExitCnt = genLoopLimit(
993 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
994 assert(ExitCnt->getType()->isPointerTy() ==
995 IndVar->getType()->isPointerTy() &&
996 "genLoopLimit missed a cast");
997
998 // Insert a new icmp_ne or icmp_eq instruction before the branch.
999 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1001 if (L->contains(BI->getSuccessor(0)))
1002 P = ICmpInst::ICMP_NE;
1003 else
1004 P = ICmpInst::ICMP_EQ;
1005
1006 IRBuilder<> Builder(BI);
1007
1008 // The new loop exit condition should reuse the debug location of the
1009 // original loop exit condition.
1010 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1011 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1012
1013 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1014 // avoid the expensive expansion of the limit expression in the wider type,
1015 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1016 // since we know (from the exit count bitwidth), that we can't self-wrap in
1017 // the narrower type.
1018 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1019 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1020 if (CmpIndVarSize > ExitCntSize) {
1021 assert(!CmpIndVar->getType()->isPointerTy() &&
1022 !ExitCnt->getType()->isPointerTy());
1023
1024 // Before resorting to actually inserting the truncate, use the same
1025 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1026 // the other side of the comparison instead. We still evaluate the limit
1027 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1028 // a truncate within in.
1029 bool Extended = false;
1030 const SCEV *IV = SE->getSCEV(CmpIndVar);
1031 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1032 const SCEV *ZExtTrunc =
1033 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1034
1035 if (ZExtTrunc == IV) {
1036 Extended = true;
1037 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1038 "wide.trip.count");
1039 } else {
1040 const SCEV *SExtTrunc =
1041 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1042 if (SExtTrunc == IV) {
1043 Extended = true;
1044 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1045 "wide.trip.count");
1046 }
1047 }
1048
1049 if (Extended) {
1050 bool Discard;
1051 L->makeLoopInvariant(ExitCnt, Discard);
1052 } else
1053 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1054 "lftr.wideiv");
1055 }
1056 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1057 << " LHS:" << *CmpIndVar << '\n'
1058 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1059 << "\n"
1060 << " RHS:\t" << *ExitCnt << "\n"
1061 << "ExitCount:\t" << *ExitCount << "\n"
1062 << " was: " << *BI->getCondition() << "\n");
1063
1064 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1065 Value *OrigCond = BI->getCondition();
1066 // It's tempting to use replaceAllUsesWith here to fully replace the old
1067 // comparison, but that's not immediately safe, since users of the old
1068 // comparison may not be dominated by the new comparison. Instead, just
1069 // update the branch to use the new comparison; in the common case this
1070 // will make old comparison dead.
1071 BI->setCondition(Cond);
1072 DeadInsts.emplace_back(OrigCond);
1073
1074 ++NumLFTR;
1075 return true;
1076}
1077
1078//===----------------------------------------------------------------------===//
1079// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1080//===----------------------------------------------------------------------===//
1081
1082/// If there's a single exit block, sink any loop-invariant values that
1083/// were defined in the preheader but not used inside the loop into the
1084/// exit block to reduce register pressure in the loop.
1085bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1086 BasicBlock *ExitBlock = L->getExitBlock();
1087 if (!ExitBlock) return false;
1088
1089 BasicBlock *Preheader = L->getLoopPreheader();
1090 if (!Preheader) return false;
1091
1092 bool MadeAnyChanges = false;
1093 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1094 BasicBlock::iterator I(Preheader->getTerminator());
1095 while (I != Preheader->begin()) {
1096 --I;
1097 // New instructions were inserted at the end of the preheader.
1098 if (isa<PHINode>(I))
1099 break;
1100
1101 // Don't move instructions which might have side effects, since the side
1102 // effects need to complete before instructions inside the loop. Also don't
1103 // move instructions which might read memory, since the loop may modify
1104 // memory. Note that it's okay if the instruction might have undefined
1105 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1106 // block.
1107 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1108 continue;
1109
1110 // Skip debug info intrinsics.
1111 if (isa<DbgInfoIntrinsic>(I))
1112 continue;
1113
1114 // Skip eh pad instructions.
1115 if (I->isEHPad())
1116 continue;
1117
1118 // Don't sink alloca: we never want to sink static alloca's out of the
1119 // entry block, and correctly sinking dynamic alloca's requires
1120 // checks for stacksave/stackrestore intrinsics.
1121 // FIXME: Refactor this check somehow?
1122 if (isa<AllocaInst>(I))
1123 continue;
1124
1125 // Determine if there is a use in or before the loop (direct or
1126 // otherwise).
1127 bool UsedInLoop = false;
1128 for (Use &U : I->uses()) {
1129 Instruction *User = cast<Instruction>(U.getUser());
1130 BasicBlock *UseBB = User->getParent();
1131 if (PHINode *P = dyn_cast<PHINode>(User)) {
1132 unsigned i =
1134 UseBB = P->getIncomingBlock(i);
1135 }
1136 if (UseBB == Preheader || L->contains(UseBB)) {
1137 UsedInLoop = true;
1138 break;
1139 }
1140 }
1141
1142 // If there is, the def must remain in the preheader.
1143 if (UsedInLoop)
1144 continue;
1145
1146 // Otherwise, sink it to the exit block.
1147 Instruction *ToMove = &*I;
1148 bool Done = false;
1149
1150 if (I != Preheader->begin()) {
1151 // Skip debug info intrinsics.
1152 do {
1153 --I;
1154 } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1155
1156 if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1157 Done = true;
1158 } else {
1159 Done = true;
1160 }
1161
1162 MadeAnyChanges = true;
1163 ToMove->moveBefore(*ExitBlock, InsertPt);
1164 SE->forgetValue(ToMove);
1165 if (Done) break;
1166 InsertPt = ToMove->getIterator();
1167 }
1168
1169 return MadeAnyChanges;
1170}
1171
1172static void replaceExitCond(BranchInst *BI, Value *NewCond,
1174 auto *OldCond = BI->getCondition();
1175 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1176 << " with " << *NewCond << "\n");
1177 BI->setCondition(NewCond);
1178 if (OldCond->use_empty())
1179 DeadInsts.emplace_back(OldCond);
1180}
1181
1182static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1183 bool IsTaken) {
1184 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1185 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1186 auto *OldCond = BI->getCondition();
1187 return ConstantInt::get(OldCond->getType(),
1188 IsTaken ? ExitIfTrue : !ExitIfTrue);
1189}
1190
1191static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1193 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1194 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1195 replaceExitCond(BI, NewCond, DeadInsts);
1196}
1197
1199 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1200 ScalarEvolution &SE) {
1201 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1202 auto *LoopPreheader = L->getLoopPreheader();
1203 auto *LoopHeader = L->getHeader();
1205 for (auto &PN : LoopHeader->phis()) {
1206 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1207 for (User *U : PN.users())
1208 Worklist.push_back(cast<Instruction>(U));
1209 SE.forgetValue(&PN);
1210 PN.replaceAllUsesWith(PreheaderIncoming);
1211 DeadInsts.emplace_back(&PN);
1212 }
1213
1214 // Replacing with the preheader value will often allow IV users to simplify
1215 // (especially if the preheader value is a constant).
1217 while (!Worklist.empty()) {
1218 auto *I = cast<Instruction>(Worklist.pop_back_val());
1219 if (!Visited.insert(I).second)
1220 continue;
1221
1222 // Don't simplify instructions outside the loop.
1223 if (!L->contains(I))
1224 continue;
1225
1226 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1227 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1228 for (User *U : I->users())
1229 Worklist.push_back(cast<Instruction>(U));
1230 I->replaceAllUsesWith(Res);
1231 DeadInsts.emplace_back(I);
1232 }
1233 }
1234}
1235
1236static Value *
1239 SCEVExpander &Rewriter) {
1240 ICmpInst::Predicate InvariantPred = LIP.Pred;
1241 BasicBlock *Preheader = L->getLoopPreheader();
1242 assert(Preheader && "Preheader doesn't exist");
1243 Rewriter.setInsertPoint(Preheader->getTerminator());
1244 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1245 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1246 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1247 if (ExitIfTrue)
1248 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1249 IRBuilder<> Builder(Preheader->getTerminator());
1250 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1251 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1252 BI->getCondition()->getName());
1253}
1254
1255static std::optional<Value *>
1256createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1257 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1258 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1259 ICmpInst::Predicate Pred = ICmp->getPredicate();
1260 Value *LHS = ICmp->getOperand(0);
1261 Value *RHS = ICmp->getOperand(1);
1262
1263 // 'LHS pred RHS' should now mean that we stay in loop.
1264 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1265 if (Inverted)
1266 Pred = CmpInst::getInversePredicate(Pred);
1267
1268 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1269 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1270 // Can we prove it to be trivially true or false?
1271 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1272 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1273
1274 auto *ARTy = LHSS->getType();
1275 auto *MaxIterTy = MaxIter->getType();
1276 // If possible, adjust types.
1277 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1278 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1279 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1280 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1281 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1282 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1283 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1284 }
1285
1286 if (SkipLastIter) {
1287 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1288 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1289 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1290 // So we manually construct umin(a - 1, b - 1).
1292 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1293 for (auto *Op : UMin->operands())
1294 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1295 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1296 } else
1297 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1298 }
1299
1300 // Check if there is a loop-invariant predicate equivalent to our check.
1301 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1302 L, BI, MaxIter);
1303 if (!LIP)
1304 return std::nullopt;
1305
1306 // Can we prove it to be trivially true?
1307 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1308 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1309 else
1310 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1311}
1312
1314 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1315 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1317 assert(
1318 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1319 "Not a loop exit!");
1320
1321 // For branch that stays in loop by TRUE condition, go through AND. For branch
1322 // that stays in loop by FALSE condition, go through OR. Both gives the
1323 // similar logic: "stay in loop iff all conditions are true(false)".
1324 bool Inverted = L->contains(BI->getSuccessor(1));
1325 SmallVector<ICmpInst *, 4> LeafConditions;
1326 SmallVector<Value *, 4> Worklist;
1328 Value *OldCond = BI->getCondition();
1329 Visited.insert(OldCond);
1330 Worklist.push_back(OldCond);
1331
1332 auto GoThrough = [&](Value *V) {
1333 Value *LHS = nullptr, *RHS = nullptr;
1334 if (Inverted) {
1335 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1336 return false;
1337 } else {
1338 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1339 return false;
1340 }
1341 if (Visited.insert(LHS).second)
1342 Worklist.push_back(LHS);
1343 if (Visited.insert(RHS).second)
1344 Worklist.push_back(RHS);
1345 return true;
1346 };
1347
1348 do {
1349 Value *Curr = Worklist.pop_back_val();
1350 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1351 // those with one use, to avoid instruction duplication.
1352 if (Curr->hasOneUse())
1353 if (!GoThrough(Curr))
1354 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1355 LeafConditions.push_back(ICmp);
1356 } while (!Worklist.empty());
1357
1358 // If the current basic block has the same exit count as the whole loop, and
1359 // it consists of multiple icmp's, try to collect all icmp's that give exact
1360 // same exit count. For all other icmp's, we could use one less iteration,
1361 // because their value on the last iteration doesn't really matter.
1362 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1363 if (!SkipLastIter && LeafConditions.size() > 1 &&
1364 SE->getExitCount(L, ExitingBB,
1365 ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1366 MaxIter)
1367 for (auto *ICmp : LeafConditions) {
1368 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1369 /*ControlsExit*/ false);
1370 auto *ExitMax = EL.SymbolicMaxNotTaken;
1371 if (isa<SCEVCouldNotCompute>(ExitMax))
1372 continue;
1373 // They could be of different types (specifically this happens after
1374 // IV widening).
1375 auto *WiderType =
1376 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1377 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1378 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1379 if (WideExitMax == WideMaxIter)
1380 ICmpsFailingOnLastIter.insert(ICmp);
1381 }
1382
1383 bool Changed = false;
1384 for (auto *OldCond : LeafConditions) {
1385 // Skip last iteration for this icmp under one of two conditions:
1386 // - We do it for all conditions;
1387 // - There is another ICmp that would fail on last iter, so this one doesn't
1388 // really matter.
1389 bool OptimisticSkipLastIter = SkipLastIter;
1390 if (!OptimisticSkipLastIter) {
1391 if (ICmpsFailingOnLastIter.size() > 1)
1392 OptimisticSkipLastIter = true;
1393 else if (ICmpsFailingOnLastIter.size() == 1)
1394 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1395 }
1396 if (auto Replaced =
1397 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1398 OptimisticSkipLastIter, SE, Rewriter)) {
1399 Changed = true;
1400 auto *NewCond = *Replaced;
1401 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1402 NCI->setName(OldCond->getName() + ".first_iter");
1403 }
1404 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1405 << " with " << *NewCond << "\n");
1406 assert(OldCond->hasOneUse() && "Must be!");
1407 OldCond->replaceAllUsesWith(NewCond);
1408 DeadInsts.push_back(OldCond);
1409 // Make sure we no longer consider this condition as failing on last
1410 // iteration.
1411 ICmpsFailingOnLastIter.erase(OldCond);
1412 }
1413 }
1414 return Changed;
1415}
1416
1417bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1418 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1419 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1420 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1421 // it already has flags. The alternative to this would be to extending the
1422 // set of "interesting" IV users to include the icmp, but doing that
1423 // regresses results in practice by querying SCEVs before trip counts which
1424 // rely on them which results in SCEV caching sub-optimal answers. The
1425 // concern about caching sub-optimal results is why we only query SCEVs of
1426 // the loop invariant RHS here.
1427 SmallVector<BasicBlock*, 16> ExitingBlocks;
1428 L->getExitingBlocks(ExitingBlocks);
1429 bool Changed = false;
1430 for (auto *ExitingBB : ExitingBlocks) {
1431 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1432 if (!BI)
1433 continue;
1434 assert(BI->isConditional() && "exit branch must be conditional");
1435
1436 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1437 if (!ICmp || !ICmp->hasOneUse())
1438 continue;
1439
1440 auto *LHS = ICmp->getOperand(0);
1441 auto *RHS = ICmp->getOperand(1);
1442 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1443 // poisoning cache with sub-optimal results. For the must-execute case,
1444 // this is a neccessary precondition for correctness.
1445 if (!L->isLoopInvariant(RHS)) {
1446 if (!L->isLoopInvariant(LHS))
1447 continue;
1448 // Same logic applies for the inverse case
1449 std::swap(LHS, RHS);
1450 }
1451
1452 // Match (icmp signed-cond zext, RHS)
1453 Value *LHSOp = nullptr;
1454 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1455 continue;
1456
1457 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1458 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1459 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1460 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1461 FullCR = FullCR.zeroExtend(OuterBitWidth);
1462 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1463 if (FullCR.contains(RHSCR)) {
1464 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1465 // replace the signed condition with the unsigned version.
1466 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1467 Changed = true;
1468 // Note: No SCEV invalidation needed. We've changed the predicate, but
1469 // have not changed exit counts, or the values produced by the compare.
1470 continue;
1471 }
1472 }
1473
1474 // Now that we've canonicalized the condition to match the extend,
1475 // see if we can rotate the extend out of the loop.
1476 for (auto *ExitingBB : ExitingBlocks) {
1477 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1478 if (!BI)
1479 continue;
1480 assert(BI->isConditional() && "exit branch must be conditional");
1481
1482 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1483 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1484 continue;
1485
1486 bool Swapped = false;
1487 auto *LHS = ICmp->getOperand(0);
1488 auto *RHS = ICmp->getOperand(1);
1489 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1490 // Nothing to rotate
1491 continue;
1492 if (L->isLoopInvariant(LHS)) {
1493 // Same logic applies for the inverse case until we actually pick
1494 // which operand of the compare to update.
1495 Swapped = true;
1496 std::swap(LHS, RHS);
1497 }
1498 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1499
1500 // Match (icmp unsigned-cond zext, RHS)
1501 // TODO: Extend to handle corresponding sext/signed-cmp case
1502 // TODO: Extend to other invertible functions
1503 Value *LHSOp = nullptr;
1504 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1505 continue;
1506
1507 // In general, we only rotate if we can do so without increasing the number
1508 // of instructions. The exception is when we have an zext(add-rec). The
1509 // reason for allowing this exception is that we know we need to get rid
1510 // of the zext for SCEV to be able to compute a trip count for said loops;
1511 // we consider the new trip count valuable enough to increase instruction
1512 // count by one.
1513 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1514 continue;
1515
1516 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1517 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1518 // when zext is loop varying and RHS is loop invariant. This converts
1519 // loop varying work to loop-invariant work.
1520 auto doRotateTransform = [&]() {
1521 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1522 auto *NewRHS =
1523 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1524 L->getLoopPreheader()->getTerminator());
1525 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1526 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1527 if (LHS->use_empty())
1528 DeadInsts.push_back(LHS);
1529 };
1530
1531
1532 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1533 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1534 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1535 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1536 FullCR = FullCR.zeroExtend(OuterBitWidth);
1537 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1538 if (FullCR.contains(RHSCR)) {
1539 doRotateTransform();
1540 Changed = true;
1541 // Note, we are leaving SCEV in an unfortunately imprecise case here
1542 // as rotation tends to reveal information about trip counts not
1543 // previously visible.
1544 continue;
1545 }
1546 }
1547
1548 return Changed;
1549}
1550
1551bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1552 SmallVector<BasicBlock*, 16> ExitingBlocks;
1553 L->getExitingBlocks(ExitingBlocks);
1554
1555 // Remove all exits which aren't both rewriteable and execute on every
1556 // iteration.
1557 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1558 // If our exitting block exits multiple loops, we can only rewrite the
1559 // innermost one. Otherwise, we're changing how many times the innermost
1560 // loop runs before it exits.
1561 if (LI->getLoopFor(ExitingBB) != L)
1562 return true;
1563
1564 // Can't rewrite non-branch yet.
1565 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1566 if (!BI)
1567 return true;
1568
1569 // Likewise, the loop latch must be dominated by the exiting BB.
1570 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1571 return true;
1572
1573 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1574 // If already constant, nothing to do. However, if this is an
1575 // unconditional exit, we can still replace header phis with their
1576 // preheader value.
1577 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1578 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1579 return true;
1580 }
1581
1582 return false;
1583 });
1584
1585 if (ExitingBlocks.empty())
1586 return false;
1587
1588 // Get a symbolic upper bound on the loop backedge taken count.
1589 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1590 if (isa<SCEVCouldNotCompute>(MaxBECount))
1591 return false;
1592
1593 // Visit our exit blocks in order of dominance. We know from the fact that
1594 // all exits must dominate the latch, so there is a total dominance order
1595 // between them.
1596 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1597 // std::sort sorts in ascending order, so we want the inverse of
1598 // the normal dominance relation.
1599 if (A == B) return false;
1600 if (DT->properlyDominates(A, B))
1601 return true;
1602 else {
1603 assert(DT->properlyDominates(B, A) &&
1604 "expected total dominance order!");
1605 return false;
1606 }
1607 });
1608#ifdef ASSERT
1609 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1610 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1611 }
1612#endif
1613
1614 bool Changed = false;
1615 bool SkipLastIter = false;
1616 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1617 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1618 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1619 return;
1620 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1621 CurrMaxExit = MaxExitCount;
1622 else
1623 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1624 // If the loop has more than 1 iteration, all further checks will be
1625 // executed 1 iteration less.
1626 if (CurrMaxExit == MaxBECount)
1627 SkipLastIter = true;
1628 };
1629 SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1630 for (BasicBlock *ExitingBB : ExitingBlocks) {
1631 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1632 const SCEV *MaxExitCount = SE->getExitCount(
1633 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1634 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1635 // Okay, we do not know the exit count here. Can we at least prove that it
1636 // will remain the same within iteration space?
1637 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1638 auto OptimizeCond = [&](bool SkipLastIter) {
1639 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1640 MaxBECount, SkipLastIter,
1641 SE, Rewriter, DeadInsts);
1642 };
1643
1644 // TODO: We might have proved that we can skip the last iteration for
1645 // this check. In this case, we only want to check the condition on the
1646 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1647 // corner case:
1648 //
1649 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1650 //
1651 // If we could not prove that len != 0, then we also could not prove that
1652 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1653 // OptimizeCond will likely not prove anything for it, even if it could
1654 // prove the same fact for len.
1655 //
1656 // As a temporary solution, we query both last and pre-last iterations in
1657 // hope that we will be able to prove triviality for at least one of
1658 // them. We can stop querying MaxBECount for this case once SCEV
1659 // understands that (MaxBECount - 1) will not overflow here.
1660 if (OptimizeCond(false))
1661 Changed = true;
1662 else if (SkipLastIter && OptimizeCond(true))
1663 Changed = true;
1664 UpdateSkipLastIter(MaxExitCount);
1665 continue;
1666 }
1667
1668 UpdateSkipLastIter(ExactExitCount);
1669
1670 // If we know we'd exit on the first iteration, rewrite the exit to
1671 // reflect this. This does not imply the loop must exit through this
1672 // exit; there may be an earlier one taken on the first iteration.
1673 // We know that the backedge can't be taken, so we replace all
1674 // the header PHIs with values coming from the preheader.
1675 if (ExactExitCount->isZero()) {
1676 foldExit(L, ExitingBB, true, DeadInsts);
1677 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1678 Changed = true;
1679 continue;
1680 }
1681
1682 assert(ExactExitCount->getType()->isIntegerTy() &&
1683 MaxBECount->getType()->isIntegerTy() &&
1684 "Exit counts must be integers");
1685
1686 Type *WiderType =
1687 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1688 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1689 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1690 assert(MaxBECount->getType() == ExactExitCount->getType());
1691
1692 // Can we prove that some other exit must be taken strictly before this
1693 // one?
1694 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1695 ExactExitCount)) {
1696 foldExit(L, ExitingBB, false, DeadInsts);
1697 Changed = true;
1698 continue;
1699 }
1700
1701 // As we run, keep track of which exit counts we've encountered. If we
1702 // find a duplicate, we've found an exit which would have exited on the
1703 // exiting iteration, but (from the visit order) strictly follows another
1704 // which does the same and is thus dead.
1705 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1706 foldExit(L, ExitingBB, false, DeadInsts);
1707 Changed = true;
1708 continue;
1709 }
1710
1711 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1712 // here. If we kept track of the min of dominanting exits so far, we could
1713 // discharge exits with EC >= MDEC. This is less powerful than the existing
1714 // transform (since later exits aren't considered), but potentially more
1715 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1716 // or a >u b. Such a case is not currently known.
1717 }
1718 return Changed;
1719}
1720
1721bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1722 SmallVector<BasicBlock*, 16> ExitingBlocks;
1723 L->getExitingBlocks(ExitingBlocks);
1724
1725 // Finally, see if we can rewrite our exit conditions into a loop invariant
1726 // form. If we have a read-only loop, and we can tell that we must exit down
1727 // a path which does not need any of the values computed within the loop, we
1728 // can rewrite the loop to exit on the first iteration. Note that this
1729 // doesn't either a) tell us the loop exits on the first iteration (unless
1730 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1731 // This transformation looks a lot like a restricted form of dead loop
1732 // elimination, but restricted to read-only loops and without neccesssarily
1733 // needing to kill the loop entirely.
1734 if (!LoopPredication)
1735 return false;
1736
1737 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1738 // through *explicit* control flow. We have to eliminate the possibility of
1739 // implicit exits (see below) before we know it's truly exact.
1740 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1741 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1742 return false;
1743
1744 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1745 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1746
1747 auto BadExit = [&](BasicBlock *ExitingBB) {
1748 // If our exiting block exits multiple loops, we can only rewrite the
1749 // innermost one. Otherwise, we're changing how many times the innermost
1750 // loop runs before it exits.
1751 if (LI->getLoopFor(ExitingBB) != L)
1752 return true;
1753
1754 // Can't rewrite non-branch yet.
1755 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1756 if (!BI)
1757 return true;
1758
1759 // If already constant, nothing to do.
1760 if (isa<Constant>(BI->getCondition()))
1761 return true;
1762
1763 // If the exit block has phis, we need to be able to compute the values
1764 // within the loop which contains them. This assumes trivially lcssa phis
1765 // have already been removed; TODO: generalize
1766 BasicBlock *ExitBlock =
1767 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1768 if (!ExitBlock->phis().empty())
1769 return true;
1770
1771 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1772 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1773 !Rewriter.isSafeToExpand(ExitCount))
1774 return true;
1775
1776 assert(SE->isLoopInvariant(ExitCount, L) &&
1777 "Exit count must be loop invariant");
1778 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1779 return false;
1780 };
1781
1782 // If we have any exits which can't be predicated themselves, than we can't
1783 // predicate any exit which isn't guaranteed to execute before it. Consider
1784 // two exits (a) and (b) which would both exit on the same iteration. If we
1785 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1786 // we could convert a loop from exiting through (a) to one exiting through
1787 // (b). Note that this problem exists only for exits with the same exit
1788 // count, and we could be more aggressive when exit counts are known inequal.
1789 llvm::sort(ExitingBlocks,
1790 [&](BasicBlock *A, BasicBlock *B) {
1791 // std::sort sorts in ascending order, so we want the inverse of
1792 // the normal dominance relation, plus a tie breaker for blocks
1793 // unordered by dominance.
1794 if (DT->properlyDominates(A, B)) return true;
1795 if (DT->properlyDominates(B, A)) return false;
1796 return A->getName() < B->getName();
1797 });
1798 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1799 // exits before the backedge). If they aren't, reasoning about reachability
1800 // is complicated and we choose not to for now.
1801 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1802 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1803 return false;
1804
1805 // Given our sorted total order, we know that exit[j] must be evaluated
1806 // after all exit[i] such j > i.
1807 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1808 if (BadExit(ExitingBlocks[i])) {
1809 ExitingBlocks.resize(i);
1810 break;
1811 }
1812
1813 if (ExitingBlocks.empty())
1814 return false;
1815
1816 // We rely on not being able to reach an exiting block on a later iteration
1817 // then it's statically compute exit count. The implementaton of
1818 // getExitCount currently has this invariant, but assert it here so that
1819 // breakage is obvious if this ever changes..
1820 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1821 return DT->dominates(ExitingBB, L->getLoopLatch());
1822 }));
1823
1824 // At this point, ExitingBlocks consists of only those blocks which are
1825 // predicatable. Given that, we know we have at least one exit we can
1826 // predicate if the loop is doesn't have side effects and doesn't have any
1827 // implicit exits (because then our exact BTC isn't actually exact).
1828 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1829 // suggestions on how to improve this? I can obviously bail out for outer
1830 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1831 // is that enough for *all* side effects?
1832 for (BasicBlock *BB : L->blocks())
1833 for (auto &I : *BB)
1834 // TODO:isGuaranteedToTransfer
1835 if (I.mayHaveSideEffects())
1836 return false;
1837
1838 bool Changed = false;
1839 // Finally, do the actual predication for all predicatable blocks. A couple
1840 // of notes here:
1841 // 1) We don't bother to constant fold dominated exits with identical exit
1842 // counts; that's simply a form of CSE/equality propagation and we leave
1843 // it for dedicated passes.
1844 // 2) We insert the comparison at the branch. Hoisting introduces additional
1845 // legality constraints and we leave that to dedicated logic. We want to
1846 // predicate even if we can't insert a loop invariant expression as
1847 // peeling or unrolling will likely reduce the cost of the otherwise loop
1848 // varying check.
1849 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1850 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1851 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1852 for (BasicBlock *ExitingBB : ExitingBlocks) {
1853 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1854
1855 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1856 Value *NewCond;
1857 if (ExitCount == ExactBTC) {
1858 NewCond = L->contains(BI->getSuccessor(0)) ?
1859 B.getFalse() : B.getTrue();
1860 } else {
1861 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1862 if (!ExactBTCV)
1863 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1864 Value *RHS = ExactBTCV;
1865 if (ECV->getType() != RHS->getType()) {
1866 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1867 ECV = B.CreateZExt(ECV, WiderTy);
1868 RHS = B.CreateZExt(RHS, WiderTy);
1869 }
1870 auto Pred = L->contains(BI->getSuccessor(0)) ?
1871 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1872 NewCond = B.CreateICmp(Pred, ECV, RHS);
1873 }
1874 Value *OldCond = BI->getCondition();
1875 BI->setCondition(NewCond);
1876 if (OldCond->use_empty())
1877 DeadInsts.emplace_back(OldCond);
1878 Changed = true;
1879 }
1880
1881 return Changed;
1882}
1883
1884//===----------------------------------------------------------------------===//
1885// IndVarSimplify driver. Manage several subpasses of IV simplification.
1886//===----------------------------------------------------------------------===//
1887
1888bool IndVarSimplify::run(Loop *L) {
1889 // We need (and expect!) the incoming loop to be in LCSSA.
1890 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1891 "LCSSA required to run indvars!");
1892
1893 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1894 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1895 // canonicalization can be a pessimization without LSR to "clean up"
1896 // afterwards.
1897 // - We depend on having a preheader; in particular,
1898 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1899 // and we're in trouble if we can't find the induction variable even when
1900 // we've manually inserted one.
1901 // - LFTR relies on having a single backedge.
1902 if (!L->isLoopSimplifyForm())
1903 return false;
1904
1905 bool Changed = false;
1906 // If there are any floating-point recurrences, attempt to
1907 // transform them to use integer recurrences.
1908 Changed |= rewriteNonIntegerIVs(L);
1909
1910 // Create a rewriter object which we'll use to transform the code with.
1911 SCEVExpander Rewriter(*SE, DL, "indvars");
1912#ifndef NDEBUG
1913 Rewriter.setDebugType(DEBUG_TYPE);
1914#endif
1915
1916 // Eliminate redundant IV users.
1917 //
1918 // Simplification works best when run before other consumers of SCEV. We
1919 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1920 // other expressions involving loop IVs have been evaluated. This helps SCEV
1921 // set no-wrap flags before normalizing sign/zero extension.
1922 Rewriter.disableCanonicalMode();
1923 Changed |= simplifyAndExtend(L, Rewriter, LI);
1924
1925 // Check to see if we can compute the final value of any expressions
1926 // that are recurrent in the loop, and substitute the exit values from the
1927 // loop into any instructions outside of the loop that use the final values
1928 // of the current expressions.
1929 if (ReplaceExitValue != NeverRepl) {
1930 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1931 ReplaceExitValue, DeadInsts)) {
1932 NumReplaced += Rewrites;
1933 Changed = true;
1934 }
1935 }
1936
1937 // Eliminate redundant IV cycles.
1938 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1939
1940 // Try to convert exit conditions to unsigned and rotate computation
1941 // out of the loop. Note: Handles invalidation internally if needed.
1942 Changed |= canonicalizeExitCondition(L);
1943
1944 // Try to eliminate loop exits based on analyzeable exit counts
1945 if (optimizeLoopExits(L, Rewriter)) {
1946 Changed = true;
1947 // Given we've changed exit counts, notify SCEV
1948 // Some nested loops may share same folded exit basic block,
1949 // thus we need to notify top most loop.
1950 SE->forgetTopmostLoop(L);
1951 }
1952
1953 // Try to form loop invariant tests for loop exits by changing how many
1954 // iterations of the loop run when that is unobservable.
1955 if (predicateLoopExits(L, Rewriter)) {
1956 Changed = true;
1957 // Given we've changed exit counts, notify SCEV
1958 SE->forgetLoop(L);
1959 }
1960
1961 // If we have a trip count expression, rewrite the loop's exit condition
1962 // using it.
1963 if (!DisableLFTR) {
1964 BasicBlock *PreHeader = L->getLoopPreheader();
1965
1966 SmallVector<BasicBlock*, 16> ExitingBlocks;
1967 L->getExitingBlocks(ExitingBlocks);
1968 for (BasicBlock *ExitingBB : ExitingBlocks) {
1969 // Can't rewrite non-branch yet.
1970 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1971 continue;
1972
1973 // If our exitting block exits multiple loops, we can only rewrite the
1974 // innermost one. Otherwise, we're changing how many times the innermost
1975 // loop runs before it exits.
1976 if (LI->getLoopFor(ExitingBB) != L)
1977 continue;
1978
1979 if (!needsLFTR(L, ExitingBB))
1980 continue;
1981
1982 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1983 if (isa<SCEVCouldNotCompute>(ExitCount))
1984 continue;
1985
1986 // This was handled above, but as we form SCEVs, we can sometimes refine
1987 // existing ones; this allows exit counts to be folded to zero which
1988 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1989 // until stable to handle cases like this better.
1990 if (ExitCount->isZero())
1991 continue;
1992
1993 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1994 if (!IndVar)
1995 continue;
1996
1997 // Avoid high cost expansions. Note: This heuristic is questionable in
1998 // that our definition of "high cost" is not exactly principled.
1999 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2000 TTI, PreHeader->getTerminator()))
2001 continue;
2002
2003 // Check preconditions for proper SCEVExpander operation. SCEV does not
2004 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2005 // any pass that uses the SCEVExpander must do it. This does not work
2006 // well for loop passes because SCEVExpander makes assumptions about
2007 // all loops, while LoopPassManager only forces the current loop to be
2008 // simplified.
2009 //
2010 // FIXME: SCEV expansion has no way to bail out, so the caller must
2011 // explicitly check any assumptions made by SCEV. Brittle.
2012 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2013 if (!AR || AR->getLoop()->getLoopPreheader())
2014 Changed |= linearFunctionTestReplace(L, ExitingBB,
2015 ExitCount, IndVar,
2016 Rewriter);
2017 }
2018 }
2019 // Clear the rewriter cache, because values that are in the rewriter's cache
2020 // can be deleted in the loop below, causing the AssertingVH in the cache to
2021 // trigger.
2022 Rewriter.clear();
2023
2024 // Now that we're done iterating through lists, clean up any instructions
2025 // which are now dead.
2026 while (!DeadInsts.empty()) {
2027 Value *V = DeadInsts.pop_back_val();
2028
2029 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2030 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2031 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2032 Changed |=
2033 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2034 }
2035
2036 // The Rewriter may not be used from this point on.
2037
2038 // Loop-invariant instructions in the preheader that aren't used in the
2039 // loop may be sunk below the loop to reduce register pressure.
2040 Changed |= sinkUnusedInvariants(L);
2041
2042 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2043 // trip count and therefore can further simplify exit values in addition to
2044 // rewriteLoopExitValues.
2045 Changed |= rewriteFirstIterationLoopExitValues(L);
2046
2047 // Clean up dead instructions.
2048 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2049
2050 // Check a post-condition.
2051 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2052 "Indvars did not preserve LCSSA!");
2053 if (VerifyMemorySSA && MSSAU)
2054 MSSAU->getMemorySSA()->verifyMemorySSA();
2055
2056 return Changed;
2057}
2058
2061 LPMUpdater &) {
2062 Function *F = L.getHeader()->getParent();
2063 const DataLayout &DL = F->getParent()->getDataLayout();
2064
2065 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2066 WidenIndVars && AllowIVWidening);
2067 if (!IVS.run(&L))
2068 return PreservedAnalyses::all();
2069
2070 auto PA = getLoopPassPreservedAnalyses();
2071 PA.preserveSet<CFGAnalyses>();
2072 if (AR.MSSA)
2073 PA.preserve<MemorySSAAnalysis>();
2074 return PA;
2075}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
#define DEBUG_TYPE
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
if(VerifyEach)
This header defines various interfaces for pass management in LLVM.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:77
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1159
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:323
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:381
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:254
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:675
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:721
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:747
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:748
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:724
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:733
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:722
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:723
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:745
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:732
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:726
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:729
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:730
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:725
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:746
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:734
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:731
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:832
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:808
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
const APFloat & getValueAPF() const
Definition: Constants.h:301
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void visitCast(CastInst *Cast)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Class to represent integer types.
Definition: DerivedTypes.h:40
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1140
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1750
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
Type * getWiderType(Type *Ty1, Type *Ty2) const
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
std::optional< bool > evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
size_type size() const
Definition: SmallPtrSet.h:93
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:996
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
self_iterator getIterator()
Definition: ilist_node.h:82
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:979
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:537
@ Done
Definition: Threading.h:61
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
TargetTransformInfo TTI
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
@ UMin
Unisgned integer min implemented in terms of select(cmp()).
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2113
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:469
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1272
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:645
@ UnusedIndVarInLoop
Definition: LoopUtils.h:445
@ OnlyCheapRepl
Definition: LoopUtils.h:443
@ NeverRepl
Definition: LoopUtils.h:442
@ NoHardUse
Definition: LoopUtils.h:444
@ AlwaysRepl
Definition: LoopUtils.h:446
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Collect information about induction variables that are used by sign/zero extend operations.
PHINode * NarrowIV