LLVM 19.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PassManager.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
79#include <cassert>
80#include <cstdint>
81#include <utility>
82
83using namespace llvm;
84using namespace PatternMatch;
85
86#define DEBUG_TYPE "indvars"
87
88STATISTIC(NumWidened , "Number of indvars widened");
89STATISTIC(NumReplaced , "Number of exit values replaced");
90STATISTIC(NumLFTR , "Number of loop exit tests replaced");
91STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
92STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
93
95 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
96 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
98 clEnumValN(NeverRepl, "never", "never replace exit value"),
100 "only replace exit value when the cost is cheap"),
102 UnusedIndVarInLoop, "unusedindvarinloop",
103 "only replace exit value when it is an unused "
104 "induction variable in the loop and has cheap replacement cost"),
105 clEnumValN(NoHardUse, "noharduse",
106 "only replace exit values when loop def likely dead"),
107 clEnumValN(AlwaysRepl, "always",
108 "always replace exit value whenever possible")));
109
111 "indvars-post-increment-ranges", cl::Hidden,
112 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
113 cl::init(true));
114
115static cl::opt<bool>
116DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
117 cl::desc("Disable Linear Function Test Replace optimization"));
118
119static cl::opt<bool>
120LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
121 cl::desc("Predicate conditions in read only loops"));
122
123static cl::opt<bool>
124AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
125 cl::desc("Allow widening of indvars to eliminate s/zext"));
126
127namespace {
128
129class IndVarSimplify {
130 LoopInfo *LI;
131 ScalarEvolution *SE;
132 DominatorTree *DT;
133 const DataLayout &DL;
136 std::unique_ptr<MemorySSAUpdater> MSSAU;
137
139 bool WidenIndVars;
140
141 bool RunUnswitching = false;
142
143 bool handleFloatingPointIV(Loop *L, PHINode *PH);
144 bool rewriteNonIntegerIVs(Loop *L);
145
146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147 /// Try to improve our exit conditions by converting condition from signed
148 /// to unsigned or rotating computation out of the loop.
149 /// (See inline comment about why this is duplicated from simplifyAndExtend)
150 bool canonicalizeExitCondition(Loop *L);
151 /// Try to eliminate loop exits based on analyzeable exit counts
152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153 /// Try to form loop invariant tests for loop exits by changing how many
154 /// iterations of the loop run when that is unobservable.
155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156
157 bool rewriteFirstIterationLoopExitValues(Loop *L);
158
159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160 const SCEV *ExitCount,
161 PHINode *IndVar, SCEVExpander &Rewriter);
162
163 bool sinkUnusedInvariants(Loop *L);
164
165public:
166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167 const DataLayout &DL, TargetLibraryInfo *TLI,
168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170 WidenIndVars(WidenIndVars) {
171 if (MSSA)
172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173 }
174
175 bool run(Loop *L);
176
177 bool runUnswitching() const { return RunUnswitching; }
178};
179
180} // end anonymous namespace
181
182//===----------------------------------------------------------------------===//
183// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
184//===----------------------------------------------------------------------===//
185
186/// Convert APF to an integer, if possible.
187static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
188 bool isExact = false;
189 // See if we can convert this to an int64_t
190 uint64_t UIntVal;
191 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
192 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
193 !isExact)
194 return false;
195 IntVal = UIntVal;
196 return true;
197}
198
199/// If the loop has floating induction variable then insert corresponding
200/// integer induction variable if possible.
201/// For example,
202/// for(double i = 0; i < 10000; ++i)
203/// bar(i)
204/// is converted into
205/// for(int i = 0; i < 10000; ++i)
206/// bar((double)i);
207bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
208 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
209 unsigned BackEdge = IncomingEdge^1;
210
211 // Check incoming value.
212 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
213
214 int64_t InitValue;
215 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
216 return false;
217
218 // Check IV increment. Reject this PN if increment operation is not
219 // an add or increment value can not be represented by an integer.
220 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
221 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
222
223 // If this is not an add of the PHI with a constantfp, or if the constant fp
224 // is not an integer, bail out.
225 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
226 int64_t IncValue;
227 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
228 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
229 return false;
230
231 // Check Incr uses. One user is PN and the other user is an exit condition
232 // used by the conditional terminator.
233 Value::user_iterator IncrUse = Incr->user_begin();
234 Instruction *U1 = cast<Instruction>(*IncrUse++);
235 if (IncrUse == Incr->user_end()) return false;
236 Instruction *U2 = cast<Instruction>(*IncrUse++);
237 if (IncrUse != Incr->user_end()) return false;
238
239 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
240 // only used by a branch, we can't transform it.
241 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
242 if (!Compare)
243 Compare = dyn_cast<FCmpInst>(U2);
244 if (!Compare || !Compare->hasOneUse() ||
245 !isa<BranchInst>(Compare->user_back()))
246 return false;
247
248 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
249
250 // We need to verify that the branch actually controls the iteration count
251 // of the loop. If not, the new IV can overflow and no one will notice.
252 // The branch block must be in the loop and one of the successors must be out
253 // of the loop.
254 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
255 if (!L->contains(TheBr->getParent()) ||
256 (L->contains(TheBr->getSuccessor(0)) &&
257 L->contains(TheBr->getSuccessor(1))))
258 return false;
259
260 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
261 // transform it.
262 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
263 int64_t ExitValue;
264 if (ExitValueVal == nullptr ||
265 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
266 return false;
267
268 // Find new predicate for integer comparison.
270 switch (Compare->getPredicate()) {
271 default: return false; // Unknown comparison.
273 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
275 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
277 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
279 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
281 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
283 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
284 }
285
286 // We convert the floating point induction variable to a signed i32 value if
287 // we can. This is only safe if the comparison will not overflow in a way
288 // that won't be trapped by the integer equivalent operations. Check for this
289 // now.
290 // TODO: We could use i64 if it is native and the range requires it.
291
292 // The start/stride/exit values must all fit in signed i32.
293 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
294 return false;
295
296 // If not actually striding (add x, 0.0), avoid touching the code.
297 if (IncValue == 0)
298 return false;
299
300 // Positive and negative strides have different safety conditions.
301 if (IncValue > 0) {
302 // If we have a positive stride, we require the init to be less than the
303 // exit value.
304 if (InitValue >= ExitValue)
305 return false;
306
307 uint32_t Range = uint32_t(ExitValue-InitValue);
308 // Check for infinite loop, either:
309 // while (i <= Exit) or until (i > Exit)
310 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
311 if (++Range == 0) return false; // Range overflows.
312 }
313
314 unsigned Leftover = Range % uint32_t(IncValue);
315
316 // If this is an equality comparison, we require that the strided value
317 // exactly land on the exit value, otherwise the IV condition will wrap
318 // around and do things the fp IV wouldn't.
319 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
320 Leftover != 0)
321 return false;
322
323 // If the stride would wrap around the i32 before exiting, we can't
324 // transform the IV.
325 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
326 return false;
327 } else {
328 // If we have a negative stride, we require the init to be greater than the
329 // exit value.
330 if (InitValue <= ExitValue)
331 return false;
332
333 uint32_t Range = uint32_t(InitValue-ExitValue);
334 // Check for infinite loop, either:
335 // while (i >= Exit) or until (i < Exit)
336 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
337 if (++Range == 0) return false; // Range overflows.
338 }
339
340 unsigned Leftover = Range % uint32_t(-IncValue);
341
342 // If this is an equality comparison, we require that the strided value
343 // exactly land on the exit value, otherwise the IV condition will wrap
344 // around and do things the fp IV wouldn't.
345 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
346 Leftover != 0)
347 return false;
348
349 // If the stride would wrap around the i32 before exiting, we can't
350 // transform the IV.
351 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
352 return false;
353 }
354
356
357 // Insert new integer induction variable.
358 PHINode *NewPHI =
359 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
360 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
361 PN->getIncomingBlock(IncomingEdge));
362
363 Value *NewAdd =
364 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
365 Incr->getName() + ".int", Incr->getIterator());
366 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
367
368 ICmpInst *NewCompare =
369 new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
370 ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
371
372 // In the following deletions, PN may become dead and may be deleted.
373 // Use a WeakTrackingVH to observe whether this happens.
374 WeakTrackingVH WeakPH = PN;
375
376 // Delete the old floating point exit comparison. The branch starts using the
377 // new comparison.
378 NewCompare->takeName(Compare);
379 Compare->replaceAllUsesWith(NewCompare);
380 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
381
382 // Delete the old floating point increment.
383 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
384 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
385
386 // If the FP induction variable still has uses, this is because something else
387 // in the loop uses its value. In order to canonicalize the induction
388 // variable, we chose to eliminate the IV and rewrite it in terms of an
389 // int->fp cast.
390 //
391 // We give preference to sitofp over uitofp because it is faster on most
392 // platforms.
393 if (WeakPH) {
394 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
396 PN->replaceAllUsesWith(Conv);
397 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
398 }
399 return true;
400}
401
402bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
403 // First step. Check to see if there are any floating-point recurrences.
404 // If there are, change them into integer recurrences, permitting analysis by
405 // the SCEV routines.
406 BasicBlock *Header = L->getHeader();
407
409 for (PHINode &PN : Header->phis())
410 PHIs.push_back(&PN);
411
412 bool Changed = false;
413 for (WeakTrackingVH &PHI : PHIs)
414 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
415 Changed |= handleFloatingPointIV(L, PN);
416
417 // If the loop previously had floating-point IV, ScalarEvolution
418 // may not have been able to compute a trip count. Now that we've done some
419 // re-writing, the trip count may be computable.
420 if (Changed)
421 SE->forgetLoop(L);
422 return Changed;
423}
424
425//===---------------------------------------------------------------------===//
426// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
427// they will exit at the first iteration.
428//===---------------------------------------------------------------------===//
429
430/// Check to see if this loop has loop invariant conditions which lead to loop
431/// exits. If so, we know that if the exit path is taken, it is at the first
432/// loop iteration. This lets us predict exit values of PHI nodes that live in
433/// loop header.
434bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
435 // Verify the input to the pass is already in LCSSA form.
436 assert(L->isLCSSAForm(*DT));
437
439 L->getUniqueExitBlocks(ExitBlocks);
440
441 bool MadeAnyChanges = false;
442 for (auto *ExitBB : ExitBlocks) {
443 // If there are no more PHI nodes in this exit block, then no more
444 // values defined inside the loop are used on this path.
445 for (PHINode &PN : ExitBB->phis()) {
446 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
447 IncomingValIdx != E; ++IncomingValIdx) {
448 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
449
450 // Can we prove that the exit must run on the first iteration if it
451 // runs at all? (i.e. early exits are fine for our purposes, but
452 // traces which lead to this exit being taken on the 2nd iteration
453 // aren't.) Note that this is about whether the exit branch is
454 // executed, not about whether it is taken.
455 if (!L->getLoopLatch() ||
456 !DT->dominates(IncomingBB, L->getLoopLatch()))
457 continue;
458
459 // Get condition that leads to the exit path.
460 auto *TermInst = IncomingBB->getTerminator();
461
462 Value *Cond = nullptr;
463 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
464 // Must be a conditional branch, otherwise the block
465 // should not be in the loop.
466 Cond = BI->getCondition();
467 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
468 Cond = SI->getCondition();
469 else
470 continue;
471
472 if (!L->isLoopInvariant(Cond))
473 continue;
474
475 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
476
477 // Only deal with PHIs in the loop header.
478 if (!ExitVal || ExitVal->getParent() != L->getHeader())
479 continue;
480
481 // If ExitVal is a PHI on the loop header, then we know its
482 // value along this exit because the exit can only be taken
483 // on the first iteration.
484 auto *LoopPreheader = L->getLoopPreheader();
485 assert(LoopPreheader && "Invalid loop");
486 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
487 if (PreheaderIdx != -1) {
488 assert(ExitVal->getParent() == L->getHeader() &&
489 "ExitVal must be in loop header");
490 MadeAnyChanges = true;
491 PN.setIncomingValue(IncomingValIdx,
492 ExitVal->getIncomingValue(PreheaderIdx));
493 SE->forgetValue(&PN);
494 }
495 }
496 }
497 }
498 return MadeAnyChanges;
499}
500
501//===----------------------------------------------------------------------===//
502// IV Widening - Extend the width of an IV to cover its widest uses.
503//===----------------------------------------------------------------------===//
504
505/// Update information about the induction variable that is extended by this
506/// sign or zero extend operation. This is used to determine the final width of
507/// the IV before actually widening it.
508static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
509 ScalarEvolution *SE,
510 const TargetTransformInfo *TTI) {
511 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
512 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
513 return;
514
515 Type *Ty = Cast->getType();
516 uint64_t Width = SE->getTypeSizeInBits(Ty);
517 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
518 return;
519
520 // Check that `Cast` actually extends the induction variable (we rely on this
521 // later). This takes care of cases where `Cast` is extending a truncation of
522 // the narrow induction variable, and thus can end up being narrower than the
523 // "narrow" induction variable.
524 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
525 if (NarrowIVWidth >= Width)
526 return;
527
528 // Cast is either an sext or zext up to this point.
529 // We should not widen an indvar if arithmetics on the wider indvar are more
530 // expensive than those on the narrower indvar. We check only the cost of ADD
531 // because at least an ADD is required to increment the induction variable. We
532 // could compute more comprehensively the cost of all instructions on the
533 // induction variable when necessary.
534 if (TTI &&
535 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
536 TTI->getArithmeticInstrCost(Instruction::Add,
537 Cast->getOperand(0)->getType())) {
538 return;
539 }
540
541 if (!WI.WidestNativeType ||
542 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
544 WI.IsSigned = IsSigned;
545 return;
546 }
547
548 // We extend the IV to satisfy the sign of its user(s), or 'signed'
549 // if there are multiple users with both sign- and zero extensions,
550 // in order not to introduce nondeterministic behaviour based on the
551 // unspecified order of a PHI nodes' users-iterator.
552 WI.IsSigned |= IsSigned;
553}
554
555//===----------------------------------------------------------------------===//
556// Live IV Reduction - Minimize IVs live across the loop.
557//===----------------------------------------------------------------------===//
558
559//===----------------------------------------------------------------------===//
560// Simplification of IV users based on SCEV evaluation.
561//===----------------------------------------------------------------------===//
562
563namespace {
564
565class IndVarSimplifyVisitor : public IVVisitor {
566 ScalarEvolution *SE;
568 PHINode *IVPhi;
569
570public:
571 WideIVInfo WI;
572
573 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
575 const DominatorTree *DTree)
576 : SE(SCEV), TTI(TTI), IVPhi(IV) {
577 DT = DTree;
578 WI.NarrowIV = IVPhi;
579 }
580
581 // Implement the interface used by simplifyUsersOfIV.
582 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
583};
584
585} // end anonymous namespace
586
587/// Iteratively perform simplification on a worklist of IV users. Each
588/// successive simplification may push more users which may themselves be
589/// candidates for simplification.
590///
591/// Sign/Zero extend elimination is interleaved with IV simplification.
592bool IndVarSimplify::simplifyAndExtend(Loop *L,
593 SCEVExpander &Rewriter,
594 LoopInfo *LI) {
596
597 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
598 Intrinsic::getName(Intrinsic::experimental_guard));
599 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
600
602 for (PHINode &PN : L->getHeader()->phis())
603 LoopPhis.push_back(&PN);
604
605 // Each round of simplification iterates through the SimplifyIVUsers worklist
606 // for all current phis, then determines whether any IVs can be
607 // widened. Widening adds new phis to LoopPhis, inducing another round of
608 // simplification on the wide IVs.
609 bool Changed = false;
610 while (!LoopPhis.empty()) {
611 // Evaluate as many IV expressions as possible before widening any IVs. This
612 // forces SCEV to set no-wrap flags before evaluating sign/zero
613 // extension. The first time SCEV attempts to normalize sign/zero extension,
614 // the result becomes final. So for the most predictable results, we delay
615 // evaluation of sign/zero extend evaluation until needed, and avoid running
616 // other SCEV based analysis prior to simplifyAndExtend.
617 do {
618 PHINode *CurrIV = LoopPhis.pop_back_val();
619
620 // Information about sign/zero extensions of CurrIV.
621 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
622
623 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
624 Rewriter, &Visitor);
625
626 Changed |= C;
627 RunUnswitching |= U;
628 if (Visitor.WI.WidestNativeType) {
629 WideIVs.push_back(Visitor.WI);
630 }
631 } while(!LoopPhis.empty());
632
633 // Continue if we disallowed widening.
634 if (!WidenIndVars)
635 continue;
636
637 for (; !WideIVs.empty(); WideIVs.pop_back()) {
638 unsigned ElimExt;
639 unsigned Widened;
640 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
641 DT, DeadInsts, ElimExt, Widened,
642 HasGuards, UsePostIncrementRanges)) {
643 NumElimExt += ElimExt;
644 NumWidened += Widened;
645 Changed = true;
646 LoopPhis.push_back(WidePhi);
647 }
648 }
649 }
650 return Changed;
651}
652
653//===----------------------------------------------------------------------===//
654// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
655//===----------------------------------------------------------------------===//
656
657/// Given an Value which is hoped to be part of an add recurance in the given
658/// loop, return the associated Phi node if so. Otherwise, return null. Note
659/// that this is less general than SCEVs AddRec checking.
661 Instruction *IncI = dyn_cast<Instruction>(IncV);
662 if (!IncI)
663 return nullptr;
664
665 switch (IncI->getOpcode()) {
666 case Instruction::Add:
667 case Instruction::Sub:
668 break;
669 case Instruction::GetElementPtr:
670 // An IV counter must preserve its type.
671 if (IncI->getNumOperands() == 2)
672 break;
673 [[fallthrough]];
674 default:
675 return nullptr;
676 }
677
678 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
679 if (Phi && Phi->getParent() == L->getHeader()) {
680 if (L->isLoopInvariant(IncI->getOperand(1)))
681 return Phi;
682 return nullptr;
683 }
684 if (IncI->getOpcode() == Instruction::GetElementPtr)
685 return nullptr;
686
687 // Allow add/sub to be commuted.
688 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
689 if (Phi && Phi->getParent() == L->getHeader()) {
690 if (L->isLoopInvariant(IncI->getOperand(0)))
691 return Phi;
692 }
693 return nullptr;
694}
695
696/// Whether the current loop exit test is based on this value. Currently this
697/// is limited to a direct use in the loop condition.
698static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
699 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
700 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
701 // TODO: Allow non-icmp loop test.
702 if (!ICmp)
703 return false;
704
705 // TODO: Allow indirect use.
706 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
707}
708
709/// linearFunctionTestReplace policy. Return true unless we can show that the
710/// current exit test is already sufficiently canonical.
711static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
712 assert(L->getLoopLatch() && "Must be in simplified form");
713
714 // Avoid converting a constant or loop invariant test back to a runtime
715 // test. This is critical for when SCEV's cached ExitCount is less precise
716 // than the current IR (such as after we've proven a particular exit is
717 // actually dead and thus the BE count never reaches our ExitCount.)
718 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
719 if (L->isLoopInvariant(BI->getCondition()))
720 return false;
721
722 // Do LFTR to simplify the exit condition to an ICMP.
723 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
724 if (!Cond)
725 return true;
726
727 // Do LFTR to simplify the exit ICMP to EQ/NE
728 ICmpInst::Predicate Pred = Cond->getPredicate();
729 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
730 return true;
731
732 // Look for a loop invariant RHS
733 Value *LHS = Cond->getOperand(0);
734 Value *RHS = Cond->getOperand(1);
735 if (!L->isLoopInvariant(RHS)) {
736 if (!L->isLoopInvariant(LHS))
737 return true;
738 std::swap(LHS, RHS);
739 }
740 // Look for a simple IV counter LHS
741 PHINode *Phi = dyn_cast<PHINode>(LHS);
742 if (!Phi)
743 Phi = getLoopPhiForCounter(LHS, L);
744
745 if (!Phi)
746 return true;
747
748 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
749 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
750 if (Idx < 0)
751 return true;
752
753 // Do LFTR if the exit condition's IV is *not* a simple counter.
754 Value *IncV = Phi->getIncomingValue(Idx);
755 return Phi != getLoopPhiForCounter(IncV, L);
756}
757
758/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
759/// down to checking that all operands are constant and listing instructions
760/// that may hide undef.
762 unsigned Depth) {
763 if (isa<Constant>(V))
764 return !isa<UndefValue>(V);
765
766 if (Depth >= 6)
767 return false;
768
769 // Conservatively handle non-constant non-instructions. For example, Arguments
770 // may be undef.
771 Instruction *I = dyn_cast<Instruction>(V);
772 if (!I)
773 return false;
774
775 // Load and return values may be undef.
776 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
777 return false;
778
779 // Optimistically handle other instructions.
780 for (Value *Op : I->operands()) {
781 if (!Visited.insert(Op).second)
782 continue;
783 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
784 return false;
785 }
786 return true;
787}
788
789/// Return true if the given value is concrete. We must prove that undef can
790/// never reach it.
791///
792/// TODO: If we decide that this is a good approach to checking for undef, we
793/// may factor it into a common location.
794static bool hasConcreteDef(Value *V) {
796 Visited.insert(V);
797 return hasConcreteDefImpl(V, Visited, 0);
798}
799
800/// Return true if the given phi is a "counter" in L. A counter is an
801/// add recurance (of integer or pointer type) with an arbitrary start, and a
802/// step of 1. Note that L must have exactly one latch.
803static bool isLoopCounter(PHINode* Phi, Loop *L,
804 ScalarEvolution *SE) {
805 assert(Phi->getParent() == L->getHeader());
806 assert(L->getLoopLatch());
807
808 if (!SE->isSCEVable(Phi->getType()))
809 return false;
810
811 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
812 if (!AR || AR->getLoop() != L || !AR->isAffine())
813 return false;
814
815 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
816 if (!Step || !Step->isOne())
817 return false;
818
819 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
820 Value *IncV = Phi->getIncomingValue(LatchIdx);
821 return (getLoopPhiForCounter(IncV, L) == Phi &&
822 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
823}
824
825/// Search the loop header for a loop counter (anadd rec w/step of one)
826/// suitable for use by LFTR. If multiple counters are available, select the
827/// "best" one based profitable heuristics.
828///
829/// BECount may be an i8* pointer type. The pointer difference is already
830/// valid count without scaling the address stride, so it remains a pointer
831/// expression as far as SCEV is concerned.
832static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
833 const SCEV *BECount,
835 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
836
837 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
838
839 // Loop over all of the PHI nodes, looking for a simple counter.
840 PHINode *BestPhi = nullptr;
841 const SCEV *BestInit = nullptr;
842 BasicBlock *LatchBlock = L->getLoopLatch();
843 assert(LatchBlock && "Must be in simplified form");
844 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
845
846 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
847 PHINode *Phi = cast<PHINode>(I);
848 if (!isLoopCounter(Phi, L, SE))
849 continue;
850
851 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
852
853 // AR may be a pointer type, while BECount is an integer type.
854 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
855 // AR may not be a narrower type, or we may never exit.
856 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
857 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
858 continue;
859
860 // Avoid reusing a potentially undef value to compute other values that may
861 // have originally had a concrete definition.
862 if (!hasConcreteDef(Phi)) {
863 // We explicitly allow unknown phis as long as they are already used by
864 // the loop exit test. This is legal since performing LFTR could not
865 // increase the number of undef users.
866 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
867 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
868 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
869 continue;
870 }
871
872 // Avoid introducing undefined behavior due to poison which didn't exist in
873 // the original program. (Annoyingly, the rules for poison and undef
874 // propagation are distinct, so this does NOT cover the undef case above.)
875 // We have to ensure that we don't introduce UB by introducing a use on an
876 // iteration where said IV produces poison. Our strategy here differs for
877 // pointers and integer IVs. For integers, we strip and reinfer as needed,
878 // see code in linearFunctionTestReplace. For pointers, we restrict
879 // transforms as there is no good way to reinfer inbounds once lost.
880 if (!Phi->getType()->isIntegerTy() &&
881 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
882 continue;
883
884 const SCEV *Init = AR->getStart();
885
886 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
887 // Don't force a live loop counter if another IV can be used.
888 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
889 continue;
890
891 // Prefer to count-from-zero. This is a more "canonical" counter form. It
892 // also prefers integer to pointer IVs.
893 if (BestInit->isZero() != Init->isZero()) {
894 if (BestInit->isZero())
895 continue;
896 }
897 // If two IVs both count from zero or both count from nonzero then the
898 // narrower is likely a dead phi that has been widened. Use the wider phi
899 // to allow the other to be eliminated.
900 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
901 continue;
902 }
903 BestPhi = Phi;
904 BestInit = Init;
905 }
906 return BestPhi;
907}
908
909/// Insert an IR expression which computes the value held by the IV IndVar
910/// (which must be an loop counter w/unit stride) after the backedge of loop L
911/// is taken ExitCount times.
912static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
913 const SCEV *ExitCount, bool UsePostInc, Loop *L,
914 SCEVExpander &Rewriter, ScalarEvolution *SE) {
915 assert(isLoopCounter(IndVar, L, SE));
916 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
917 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
918 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
919
920 // For integer IVs, truncate the IV before computing the limit unless we
921 // know apriori that the limit must be a constant when evaluated in the
922 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
923 // IV in the loop over a (potentially) expensive expansion of the widened
924 // exit count add(zext(add)) expression.
925 if (IndVar->getType()->isIntegerTy() &&
926 SE->getTypeSizeInBits(AR->getType()) >
927 SE->getTypeSizeInBits(ExitCount->getType())) {
928 const SCEV *IVInit = AR->getStart();
929 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
930 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
931 }
932
933 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
934 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
935 assert(SE->isLoopInvariant(IVLimit, L) &&
936 "Computed iteration count is not loop invariant!");
937 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
938 ExitingBB->getTerminator());
939}
940
941/// This method rewrites the exit condition of the loop to be a canonical !=
942/// comparison against the incremented loop induction variable. This pass is
943/// able to rewrite the exit tests of any loop where the SCEV analysis can
944/// determine a loop-invariant trip count of the loop, which is actually a much
945/// broader range than just linear tests.
946bool IndVarSimplify::
947linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
948 const SCEV *ExitCount,
949 PHINode *IndVar, SCEVExpander &Rewriter) {
950 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
951 assert(isLoopCounter(IndVar, L, SE));
952 Instruction * const IncVar =
953 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
954
955 // Initialize CmpIndVar to the preincremented IV.
956 Value *CmpIndVar = IndVar;
957 bool UsePostInc = false;
958
959 // If the exiting block is the same as the backedge block, we prefer to
960 // compare against the post-incremented value, otherwise we must compare
961 // against the preincremented value.
962 if (ExitingBB == L->getLoopLatch()) {
963 // For pointer IVs, we chose to not strip inbounds which requires us not
964 // to add a potentially UB introducing use. We need to either a) show
965 // the loop test we're modifying is already in post-inc form, or b) show
966 // that adding a use must not introduce UB.
967 bool SafeToPostInc =
968 IndVar->getType()->isIntegerTy() ||
969 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
970 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
971 if (SafeToPostInc) {
972 UsePostInc = true;
973 CmpIndVar = IncVar;
974 }
975 }
976
977 // It may be necessary to drop nowrap flags on the incrementing instruction
978 // if either LFTR moves from a pre-inc check to a post-inc check (in which
979 // case the increment might have previously been poison on the last iteration
980 // only) or if LFTR switches to a different IV that was previously dynamically
981 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
982 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
983 // check), because the pre-inc addrec flags may be adopted from the original
984 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
985 // TODO: This handling is inaccurate for one case: If we switch to a
986 // dynamically dead IV that wraps on the first loop iteration only, which is
987 // not covered by the post-inc addrec. (If the new IV was not dynamically
988 // dead, it could not be poison on the first iteration in the first place.)
989 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
990 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
991 if (BO->hasNoUnsignedWrap())
992 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
993 if (BO->hasNoSignedWrap())
994 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
995 }
996
997 Value *ExitCnt = genLoopLimit(
998 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
999 assert(ExitCnt->getType()->isPointerTy() ==
1000 IndVar->getType()->isPointerTy() &&
1001 "genLoopLimit missed a cast");
1002
1003 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1004 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1006 if (L->contains(BI->getSuccessor(0)))
1007 P = ICmpInst::ICMP_NE;
1008 else
1009 P = ICmpInst::ICMP_EQ;
1010
1011 IRBuilder<> Builder(BI);
1012
1013 // The new loop exit condition should reuse the debug location of the
1014 // original loop exit condition.
1015 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1016 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1017
1018 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1019 // avoid the expensive expansion of the limit expression in the wider type,
1020 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1021 // since we know (from the exit count bitwidth), that we can't self-wrap in
1022 // the narrower type.
1023 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1024 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1025 if (CmpIndVarSize > ExitCntSize) {
1026 assert(!CmpIndVar->getType()->isPointerTy() &&
1027 !ExitCnt->getType()->isPointerTy());
1028
1029 // Before resorting to actually inserting the truncate, use the same
1030 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1031 // the other side of the comparison instead. We still evaluate the limit
1032 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1033 // a truncate within in.
1034 bool Extended = false;
1035 const SCEV *IV = SE->getSCEV(CmpIndVar);
1036 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1037 const SCEV *ZExtTrunc =
1038 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1039
1040 if (ZExtTrunc == IV) {
1041 Extended = true;
1042 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1043 "wide.trip.count");
1044 } else {
1045 const SCEV *SExtTrunc =
1046 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1047 if (SExtTrunc == IV) {
1048 Extended = true;
1049 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1050 "wide.trip.count");
1051 }
1052 }
1053
1054 if (Extended) {
1055 bool Discard;
1056 L->makeLoopInvariant(ExitCnt, Discard);
1057 } else
1058 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1059 "lftr.wideiv");
1060 }
1061 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1062 << " LHS:" << *CmpIndVar << '\n'
1063 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1064 << "\n"
1065 << " RHS:\t" << *ExitCnt << "\n"
1066 << "ExitCount:\t" << *ExitCount << "\n"
1067 << " was: " << *BI->getCondition() << "\n");
1068
1069 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1070 Value *OrigCond = BI->getCondition();
1071 // It's tempting to use replaceAllUsesWith here to fully replace the old
1072 // comparison, but that's not immediately safe, since users of the old
1073 // comparison may not be dominated by the new comparison. Instead, just
1074 // update the branch to use the new comparison; in the common case this
1075 // will make old comparison dead.
1076 BI->setCondition(Cond);
1077 DeadInsts.emplace_back(OrigCond);
1078
1079 ++NumLFTR;
1080 return true;
1081}
1082
1083//===----------------------------------------------------------------------===//
1084// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1085//===----------------------------------------------------------------------===//
1086
1087/// If there's a single exit block, sink any loop-invariant values that
1088/// were defined in the preheader but not used inside the loop into the
1089/// exit block to reduce register pressure in the loop.
1090bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1091 BasicBlock *ExitBlock = L->getExitBlock();
1092 if (!ExitBlock) return false;
1093
1094 BasicBlock *Preheader = L->getLoopPreheader();
1095 if (!Preheader) return false;
1096
1097 bool MadeAnyChanges = false;
1098 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1099 BasicBlock::iterator I(Preheader->getTerminator());
1100 while (I != Preheader->begin()) {
1101 --I;
1102 // New instructions were inserted at the end of the preheader.
1103 if (isa<PHINode>(I))
1104 break;
1105
1106 // Don't move instructions which might have side effects, since the side
1107 // effects need to complete before instructions inside the loop. Also don't
1108 // move instructions which might read memory, since the loop may modify
1109 // memory. Note that it's okay if the instruction might have undefined
1110 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1111 // block.
1112 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1113 continue;
1114
1115 // Skip debug info intrinsics.
1116 if (isa<DbgInfoIntrinsic>(I))
1117 continue;
1118
1119 // Skip eh pad instructions.
1120 if (I->isEHPad())
1121 continue;
1122
1123 // Don't sink alloca: we never want to sink static alloca's out of the
1124 // entry block, and correctly sinking dynamic alloca's requires
1125 // checks for stacksave/stackrestore intrinsics.
1126 // FIXME: Refactor this check somehow?
1127 if (isa<AllocaInst>(I))
1128 continue;
1129
1130 // Determine if there is a use in or before the loop (direct or
1131 // otherwise).
1132 bool UsedInLoop = false;
1133 for (Use &U : I->uses()) {
1134 Instruction *User = cast<Instruction>(U.getUser());
1135 BasicBlock *UseBB = User->getParent();
1136 if (PHINode *P = dyn_cast<PHINode>(User)) {
1137 unsigned i =
1139 UseBB = P->getIncomingBlock(i);
1140 }
1141 if (UseBB == Preheader || L->contains(UseBB)) {
1142 UsedInLoop = true;
1143 break;
1144 }
1145 }
1146
1147 // If there is, the def must remain in the preheader.
1148 if (UsedInLoop)
1149 continue;
1150
1151 // Otherwise, sink it to the exit block.
1152 Instruction *ToMove = &*I;
1153 bool Done = false;
1154
1155 if (I != Preheader->begin()) {
1156 // Skip debug info intrinsics.
1157 do {
1158 --I;
1159 } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1160
1161 if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1162 Done = true;
1163 } else {
1164 Done = true;
1165 }
1166
1167 MadeAnyChanges = true;
1168 ToMove->moveBefore(*ExitBlock, InsertPt);
1169 SE->forgetValue(ToMove);
1170 if (Done) break;
1171 InsertPt = ToMove->getIterator();
1172 }
1173
1174 return MadeAnyChanges;
1175}
1176
1177static void replaceExitCond(BranchInst *BI, Value *NewCond,
1179 auto *OldCond = BI->getCondition();
1180 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1181 << " with " << *NewCond << "\n");
1182 BI->setCondition(NewCond);
1183 if (OldCond->use_empty())
1184 DeadInsts.emplace_back(OldCond);
1185}
1186
1187static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1188 bool IsTaken) {
1189 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1190 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1191 auto *OldCond = BI->getCondition();
1192 return ConstantInt::get(OldCond->getType(),
1193 IsTaken ? ExitIfTrue : !ExitIfTrue);
1194}
1195
1196static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1198 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1199 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1200 replaceExitCond(BI, NewCond, DeadInsts);
1201}
1202
1204 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1205 ScalarEvolution &SE) {
1206 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1207 auto *LoopPreheader = L->getLoopPreheader();
1208 auto *LoopHeader = L->getHeader();
1210 for (auto &PN : LoopHeader->phis()) {
1211 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1212 for (User *U : PN.users())
1213 Worklist.push_back(cast<Instruction>(U));
1214 SE.forgetValue(&PN);
1215 PN.replaceAllUsesWith(PreheaderIncoming);
1216 DeadInsts.emplace_back(&PN);
1217 }
1218
1219 // Replacing with the preheader value will often allow IV users to simplify
1220 // (especially if the preheader value is a constant).
1222 while (!Worklist.empty()) {
1223 auto *I = cast<Instruction>(Worklist.pop_back_val());
1224 if (!Visited.insert(I).second)
1225 continue;
1226
1227 // Don't simplify instructions outside the loop.
1228 if (!L->contains(I))
1229 continue;
1230
1231 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1232 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1233 for (User *U : I->users())
1234 Worklist.push_back(cast<Instruction>(U));
1235 I->replaceAllUsesWith(Res);
1236 DeadInsts.emplace_back(I);
1237 }
1238 }
1239}
1240
1241static Value *
1244 SCEVExpander &Rewriter) {
1245 ICmpInst::Predicate InvariantPred = LIP.Pred;
1246 BasicBlock *Preheader = L->getLoopPreheader();
1247 assert(Preheader && "Preheader doesn't exist");
1248 Rewriter.setInsertPoint(Preheader->getTerminator());
1249 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1250 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1251 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1252 if (ExitIfTrue)
1253 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1254 IRBuilder<> Builder(Preheader->getTerminator());
1255 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1256 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1257 BI->getCondition()->getName());
1258}
1259
1260static std::optional<Value *>
1261createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1262 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1263 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1264 ICmpInst::Predicate Pred = ICmp->getPredicate();
1265 Value *LHS = ICmp->getOperand(0);
1266 Value *RHS = ICmp->getOperand(1);
1267
1268 // 'LHS pred RHS' should now mean that we stay in loop.
1269 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1270 if (Inverted)
1271 Pred = CmpInst::getInversePredicate(Pred);
1272
1273 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1274 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1275 // Can we prove it to be trivially true or false?
1276 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1277 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1278
1279 auto *ARTy = LHSS->getType();
1280 auto *MaxIterTy = MaxIter->getType();
1281 // If possible, adjust types.
1282 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1283 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1284 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1285 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1286 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1287 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1288 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1289 }
1290
1291 if (SkipLastIter) {
1292 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1293 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1294 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1295 // So we manually construct umin(a - 1, b - 1).
1297 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1298 for (auto *Op : UMin->operands())
1299 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1300 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1301 } else
1302 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1303 }
1304
1305 // Check if there is a loop-invariant predicate equivalent to our check.
1306 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1307 L, BI, MaxIter);
1308 if (!LIP)
1309 return std::nullopt;
1310
1311 // Can we prove it to be trivially true?
1312 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1313 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1314 else
1315 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1316}
1317
1319 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1320 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1322 assert(
1323 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1324 "Not a loop exit!");
1325
1326 // For branch that stays in loop by TRUE condition, go through AND. For branch
1327 // that stays in loop by FALSE condition, go through OR. Both gives the
1328 // similar logic: "stay in loop iff all conditions are true(false)".
1329 bool Inverted = L->contains(BI->getSuccessor(1));
1330 SmallVector<ICmpInst *, 4> LeafConditions;
1331 SmallVector<Value *, 4> Worklist;
1333 Value *OldCond = BI->getCondition();
1334 Visited.insert(OldCond);
1335 Worklist.push_back(OldCond);
1336
1337 auto GoThrough = [&](Value *V) {
1338 Value *LHS = nullptr, *RHS = nullptr;
1339 if (Inverted) {
1340 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1341 return false;
1342 } else {
1343 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1344 return false;
1345 }
1346 if (Visited.insert(LHS).second)
1347 Worklist.push_back(LHS);
1348 if (Visited.insert(RHS).second)
1349 Worklist.push_back(RHS);
1350 return true;
1351 };
1352
1353 do {
1354 Value *Curr = Worklist.pop_back_val();
1355 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1356 // those with one use, to avoid instruction duplication.
1357 if (Curr->hasOneUse())
1358 if (!GoThrough(Curr))
1359 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1360 LeafConditions.push_back(ICmp);
1361 } while (!Worklist.empty());
1362
1363 // If the current basic block has the same exit count as the whole loop, and
1364 // it consists of multiple icmp's, try to collect all icmp's that give exact
1365 // same exit count. For all other icmp's, we could use one less iteration,
1366 // because their value on the last iteration doesn't really matter.
1367 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1368 if (!SkipLastIter && LeafConditions.size() > 1 &&
1369 SE->getExitCount(L, ExitingBB,
1370 ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1371 MaxIter)
1372 for (auto *ICmp : LeafConditions) {
1373 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1374 /*ControlsExit*/ false);
1375 auto *ExitMax = EL.SymbolicMaxNotTaken;
1376 if (isa<SCEVCouldNotCompute>(ExitMax))
1377 continue;
1378 // They could be of different types (specifically this happens after
1379 // IV widening).
1380 auto *WiderType =
1381 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1382 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1383 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1384 if (WideExitMax == WideMaxIter)
1385 ICmpsFailingOnLastIter.insert(ICmp);
1386 }
1387
1388 bool Changed = false;
1389 for (auto *OldCond : LeafConditions) {
1390 // Skip last iteration for this icmp under one of two conditions:
1391 // - We do it for all conditions;
1392 // - There is another ICmp that would fail on last iter, so this one doesn't
1393 // really matter.
1394 bool OptimisticSkipLastIter = SkipLastIter;
1395 if (!OptimisticSkipLastIter) {
1396 if (ICmpsFailingOnLastIter.size() > 1)
1397 OptimisticSkipLastIter = true;
1398 else if (ICmpsFailingOnLastIter.size() == 1)
1399 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1400 }
1401 if (auto Replaced =
1402 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1403 OptimisticSkipLastIter, SE, Rewriter)) {
1404 Changed = true;
1405 auto *NewCond = *Replaced;
1406 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1407 NCI->setName(OldCond->getName() + ".first_iter");
1408 }
1409 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1410 << " with " << *NewCond << "\n");
1411 assert(OldCond->hasOneUse() && "Must be!");
1412 OldCond->replaceAllUsesWith(NewCond);
1413 DeadInsts.push_back(OldCond);
1414 // Make sure we no longer consider this condition as failing on last
1415 // iteration.
1416 ICmpsFailingOnLastIter.erase(OldCond);
1417 }
1418 }
1419 return Changed;
1420}
1421
1422bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1423 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1424 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1425 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1426 // it already has flags. The alternative to this would be to extending the
1427 // set of "interesting" IV users to include the icmp, but doing that
1428 // regresses results in practice by querying SCEVs before trip counts which
1429 // rely on them which results in SCEV caching sub-optimal answers. The
1430 // concern about caching sub-optimal results is why we only query SCEVs of
1431 // the loop invariant RHS here.
1432 SmallVector<BasicBlock*, 16> ExitingBlocks;
1433 L->getExitingBlocks(ExitingBlocks);
1434 bool Changed = false;
1435 for (auto *ExitingBB : ExitingBlocks) {
1436 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1437 if (!BI)
1438 continue;
1439 assert(BI->isConditional() && "exit branch must be conditional");
1440
1441 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1442 if (!ICmp || !ICmp->hasOneUse())
1443 continue;
1444
1445 auto *LHS = ICmp->getOperand(0);
1446 auto *RHS = ICmp->getOperand(1);
1447 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1448 // poisoning cache with sub-optimal results. For the must-execute case,
1449 // this is a neccessary precondition for correctness.
1450 if (!L->isLoopInvariant(RHS)) {
1451 if (!L->isLoopInvariant(LHS))
1452 continue;
1453 // Same logic applies for the inverse case
1454 std::swap(LHS, RHS);
1455 }
1456
1457 // Match (icmp signed-cond zext, RHS)
1458 Value *LHSOp = nullptr;
1459 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1460 continue;
1461
1462 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1463 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1464 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1465 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1466 FullCR = FullCR.zeroExtend(OuterBitWidth);
1467 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1468 if (FullCR.contains(RHSCR)) {
1469 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1470 // replace the signed condition with the unsigned version.
1471 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1472 Changed = true;
1473 // Note: No SCEV invalidation needed. We've changed the predicate, but
1474 // have not changed exit counts, or the values produced by the compare.
1475 continue;
1476 }
1477 }
1478
1479 // Now that we've canonicalized the condition to match the extend,
1480 // see if we can rotate the extend out of the loop.
1481 for (auto *ExitingBB : ExitingBlocks) {
1482 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1483 if (!BI)
1484 continue;
1485 assert(BI->isConditional() && "exit branch must be conditional");
1486
1487 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1488 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1489 continue;
1490
1491 bool Swapped = false;
1492 auto *LHS = ICmp->getOperand(0);
1493 auto *RHS = ICmp->getOperand(1);
1494 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1495 // Nothing to rotate
1496 continue;
1497 if (L->isLoopInvariant(LHS)) {
1498 // Same logic applies for the inverse case until we actually pick
1499 // which operand of the compare to update.
1500 Swapped = true;
1501 std::swap(LHS, RHS);
1502 }
1503 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1504
1505 // Match (icmp unsigned-cond zext, RHS)
1506 // TODO: Extend to handle corresponding sext/signed-cmp case
1507 // TODO: Extend to other invertible functions
1508 Value *LHSOp = nullptr;
1509 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1510 continue;
1511
1512 // In general, we only rotate if we can do so without increasing the number
1513 // of instructions. The exception is when we have an zext(add-rec). The
1514 // reason for allowing this exception is that we know we need to get rid
1515 // of the zext for SCEV to be able to compute a trip count for said loops;
1516 // we consider the new trip count valuable enough to increase instruction
1517 // count by one.
1518 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1519 continue;
1520
1521 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1522 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1523 // when zext is loop varying and RHS is loop invariant. This converts
1524 // loop varying work to loop-invariant work.
1525 auto doRotateTransform = [&]() {
1526 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1527 auto *NewRHS = CastInst::Create(
1528 Instruction::Trunc, RHS, LHSOp->getType(), "",
1529 L->getLoopPreheader()->getTerminator()->getIterator());
1530 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1531 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1532 if (LHS->use_empty())
1533 DeadInsts.push_back(LHS);
1534 };
1535
1536
1537 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1538 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1539 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1540 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1541 FullCR = FullCR.zeroExtend(OuterBitWidth);
1542 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1543 if (FullCR.contains(RHSCR)) {
1544 doRotateTransform();
1545 Changed = true;
1546 // Note, we are leaving SCEV in an unfortunately imprecise case here
1547 // as rotation tends to reveal information about trip counts not
1548 // previously visible.
1549 continue;
1550 }
1551 }
1552
1553 return Changed;
1554}
1555
1556bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1557 SmallVector<BasicBlock*, 16> ExitingBlocks;
1558 L->getExitingBlocks(ExitingBlocks);
1559
1560 // Remove all exits which aren't both rewriteable and execute on every
1561 // iteration.
1562 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1563 // If our exitting block exits multiple loops, we can only rewrite the
1564 // innermost one. Otherwise, we're changing how many times the innermost
1565 // loop runs before it exits.
1566 if (LI->getLoopFor(ExitingBB) != L)
1567 return true;
1568
1569 // Can't rewrite non-branch yet.
1570 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1571 if (!BI)
1572 return true;
1573
1574 // Likewise, the loop latch must be dominated by the exiting BB.
1575 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1576 return true;
1577
1578 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1579 // If already constant, nothing to do. However, if this is an
1580 // unconditional exit, we can still replace header phis with their
1581 // preheader value.
1582 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1583 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1584 return true;
1585 }
1586
1587 return false;
1588 });
1589
1590 if (ExitingBlocks.empty())
1591 return false;
1592
1593 // Get a symbolic upper bound on the loop backedge taken count.
1594 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1595 if (isa<SCEVCouldNotCompute>(MaxBECount))
1596 return false;
1597
1598 // Visit our exit blocks in order of dominance. We know from the fact that
1599 // all exits must dominate the latch, so there is a total dominance order
1600 // between them.
1601 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1602 // std::sort sorts in ascending order, so we want the inverse of
1603 // the normal dominance relation.
1604 if (A == B) return false;
1605 if (DT->properlyDominates(A, B))
1606 return true;
1607 else {
1608 assert(DT->properlyDominates(B, A) &&
1609 "expected total dominance order!");
1610 return false;
1611 }
1612 });
1613#ifdef ASSERT
1614 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1615 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1616 }
1617#endif
1618
1619 bool Changed = false;
1620 bool SkipLastIter = false;
1621 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1622 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1623 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1624 return;
1625 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1626 CurrMaxExit = MaxExitCount;
1627 else
1628 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1629 // If the loop has more than 1 iteration, all further checks will be
1630 // executed 1 iteration less.
1631 if (CurrMaxExit == MaxBECount)
1632 SkipLastIter = true;
1633 };
1634 SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1635 for (BasicBlock *ExitingBB : ExitingBlocks) {
1636 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1637 const SCEV *MaxExitCount = SE->getExitCount(
1638 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1639 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1640 // Okay, we do not know the exit count here. Can we at least prove that it
1641 // will remain the same within iteration space?
1642 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1643 auto OptimizeCond = [&](bool SkipLastIter) {
1644 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1645 MaxBECount, SkipLastIter,
1646 SE, Rewriter, DeadInsts);
1647 };
1648
1649 // TODO: We might have proved that we can skip the last iteration for
1650 // this check. In this case, we only want to check the condition on the
1651 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1652 // corner case:
1653 //
1654 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1655 //
1656 // If we could not prove that len != 0, then we also could not prove that
1657 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1658 // OptimizeCond will likely not prove anything for it, even if it could
1659 // prove the same fact for len.
1660 //
1661 // As a temporary solution, we query both last and pre-last iterations in
1662 // hope that we will be able to prove triviality for at least one of
1663 // them. We can stop querying MaxBECount for this case once SCEV
1664 // understands that (MaxBECount - 1) will not overflow here.
1665 if (OptimizeCond(false))
1666 Changed = true;
1667 else if (SkipLastIter && OptimizeCond(true))
1668 Changed = true;
1669 UpdateSkipLastIter(MaxExitCount);
1670 continue;
1671 }
1672
1673 UpdateSkipLastIter(ExactExitCount);
1674
1675 // If we know we'd exit on the first iteration, rewrite the exit to
1676 // reflect this. This does not imply the loop must exit through this
1677 // exit; there may be an earlier one taken on the first iteration.
1678 // We know that the backedge can't be taken, so we replace all
1679 // the header PHIs with values coming from the preheader.
1680 if (ExactExitCount->isZero()) {
1681 foldExit(L, ExitingBB, true, DeadInsts);
1682 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1683 Changed = true;
1684 continue;
1685 }
1686
1687 assert(ExactExitCount->getType()->isIntegerTy() &&
1688 MaxBECount->getType()->isIntegerTy() &&
1689 "Exit counts must be integers");
1690
1691 Type *WiderType =
1692 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1693 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1694 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1695 assert(MaxBECount->getType() == ExactExitCount->getType());
1696
1697 // Can we prove that some other exit must be taken strictly before this
1698 // one?
1699 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1700 ExactExitCount)) {
1701 foldExit(L, ExitingBB, false, DeadInsts);
1702 Changed = true;
1703 continue;
1704 }
1705
1706 // As we run, keep track of which exit counts we've encountered. If we
1707 // find a duplicate, we've found an exit which would have exited on the
1708 // exiting iteration, but (from the visit order) strictly follows another
1709 // which does the same and is thus dead.
1710 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1711 foldExit(L, ExitingBB, false, DeadInsts);
1712 Changed = true;
1713 continue;
1714 }
1715
1716 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1717 // here. If we kept track of the min of dominanting exits so far, we could
1718 // discharge exits with EC >= MDEC. This is less powerful than the existing
1719 // transform (since later exits aren't considered), but potentially more
1720 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1721 // or a >u b. Such a case is not currently known.
1722 }
1723 return Changed;
1724}
1725
1726bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1727 SmallVector<BasicBlock*, 16> ExitingBlocks;
1728 L->getExitingBlocks(ExitingBlocks);
1729
1730 // Finally, see if we can rewrite our exit conditions into a loop invariant
1731 // form. If we have a read-only loop, and we can tell that we must exit down
1732 // a path which does not need any of the values computed within the loop, we
1733 // can rewrite the loop to exit on the first iteration. Note that this
1734 // doesn't either a) tell us the loop exits on the first iteration (unless
1735 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1736 // This transformation looks a lot like a restricted form of dead loop
1737 // elimination, but restricted to read-only loops and without neccesssarily
1738 // needing to kill the loop entirely.
1739 if (!LoopPredication)
1740 return false;
1741
1742 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1743 // through *explicit* control flow. We have to eliminate the possibility of
1744 // implicit exits (see below) before we know it's truly exact.
1745 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1746 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1747 return false;
1748
1749 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1750 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1751
1752 auto BadExit = [&](BasicBlock *ExitingBB) {
1753 // If our exiting block exits multiple loops, we can only rewrite the
1754 // innermost one. Otherwise, we're changing how many times the innermost
1755 // loop runs before it exits.
1756 if (LI->getLoopFor(ExitingBB) != L)
1757 return true;
1758
1759 // Can't rewrite non-branch yet.
1760 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1761 if (!BI)
1762 return true;
1763
1764 // If already constant, nothing to do.
1765 if (isa<Constant>(BI->getCondition()))
1766 return true;
1767
1768 // If the exit block has phis, we need to be able to compute the values
1769 // within the loop which contains them. This assumes trivially lcssa phis
1770 // have already been removed; TODO: generalize
1771 BasicBlock *ExitBlock =
1772 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1773 if (!ExitBlock->phis().empty())
1774 return true;
1775
1776 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1777 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1778 !Rewriter.isSafeToExpand(ExitCount))
1779 return true;
1780
1781 assert(SE->isLoopInvariant(ExitCount, L) &&
1782 "Exit count must be loop invariant");
1783 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1784 return false;
1785 };
1786
1787 // If we have any exits which can't be predicated themselves, than we can't
1788 // predicate any exit which isn't guaranteed to execute before it. Consider
1789 // two exits (a) and (b) which would both exit on the same iteration. If we
1790 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1791 // we could convert a loop from exiting through (a) to one exiting through
1792 // (b). Note that this problem exists only for exits with the same exit
1793 // count, and we could be more aggressive when exit counts are known inequal.
1794 llvm::sort(ExitingBlocks,
1795 [&](BasicBlock *A, BasicBlock *B) {
1796 // std::sort sorts in ascending order, so we want the inverse of
1797 // the normal dominance relation, plus a tie breaker for blocks
1798 // unordered by dominance.
1799 if (DT->properlyDominates(A, B)) return true;
1800 if (DT->properlyDominates(B, A)) return false;
1801 return A->getName() < B->getName();
1802 });
1803 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1804 // exits before the backedge). If they aren't, reasoning about reachability
1805 // is complicated and we choose not to for now.
1806 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1807 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1808 return false;
1809
1810 // Given our sorted total order, we know that exit[j] must be evaluated
1811 // after all exit[i] such j > i.
1812 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1813 if (BadExit(ExitingBlocks[i])) {
1814 ExitingBlocks.resize(i);
1815 break;
1816 }
1817
1818 if (ExitingBlocks.empty())
1819 return false;
1820
1821 // We rely on not being able to reach an exiting block on a later iteration
1822 // then it's statically compute exit count. The implementaton of
1823 // getExitCount currently has this invariant, but assert it here so that
1824 // breakage is obvious if this ever changes..
1825 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1826 return DT->dominates(ExitingBB, L->getLoopLatch());
1827 }));
1828
1829 // At this point, ExitingBlocks consists of only those blocks which are
1830 // predicatable. Given that, we know we have at least one exit we can
1831 // predicate if the loop is doesn't have side effects and doesn't have any
1832 // implicit exits (because then our exact BTC isn't actually exact).
1833 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1834 // suggestions on how to improve this? I can obviously bail out for outer
1835 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1836 // is that enough for *all* side effects?
1837 for (BasicBlock *BB : L->blocks())
1838 for (auto &I : *BB)
1839 // TODO:isGuaranteedToTransfer
1840 if (I.mayHaveSideEffects())
1841 return false;
1842
1843 bool Changed = false;
1844 // Finally, do the actual predication for all predicatable blocks. A couple
1845 // of notes here:
1846 // 1) We don't bother to constant fold dominated exits with identical exit
1847 // counts; that's simply a form of CSE/equality propagation and we leave
1848 // it for dedicated passes.
1849 // 2) We insert the comparison at the branch. Hoisting introduces additional
1850 // legality constraints and we leave that to dedicated logic. We want to
1851 // predicate even if we can't insert a loop invariant expression as
1852 // peeling or unrolling will likely reduce the cost of the otherwise loop
1853 // varying check.
1854 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1855 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1856 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1857 for (BasicBlock *ExitingBB : ExitingBlocks) {
1858 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1859
1860 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1861 Value *NewCond;
1862 if (ExitCount == ExactBTC) {
1863 NewCond = L->contains(BI->getSuccessor(0)) ?
1864 B.getFalse() : B.getTrue();
1865 } else {
1866 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1867 if (!ExactBTCV)
1868 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1869 Value *RHS = ExactBTCV;
1870 if (ECV->getType() != RHS->getType()) {
1871 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1872 ECV = B.CreateZExt(ECV, WiderTy);
1873 RHS = B.CreateZExt(RHS, WiderTy);
1874 }
1875 auto Pred = L->contains(BI->getSuccessor(0)) ?
1876 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1877 NewCond = B.CreateICmp(Pred, ECV, RHS);
1878 }
1879 Value *OldCond = BI->getCondition();
1880 BI->setCondition(NewCond);
1881 if (OldCond->use_empty())
1882 DeadInsts.emplace_back(OldCond);
1883 Changed = true;
1884 RunUnswitching = true;
1885 }
1886
1887 return Changed;
1888}
1889
1890//===----------------------------------------------------------------------===//
1891// IndVarSimplify driver. Manage several subpasses of IV simplification.
1892//===----------------------------------------------------------------------===//
1893
1894bool IndVarSimplify::run(Loop *L) {
1895 // We need (and expect!) the incoming loop to be in LCSSA.
1896 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1897 "LCSSA required to run indvars!");
1898
1899 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1900 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1901 // canonicalization can be a pessimization without LSR to "clean up"
1902 // afterwards.
1903 // - We depend on having a preheader; in particular,
1904 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1905 // and we're in trouble if we can't find the induction variable even when
1906 // we've manually inserted one.
1907 // - LFTR relies on having a single backedge.
1908 if (!L->isLoopSimplifyForm())
1909 return false;
1910
1911 bool Changed = false;
1912 // If there are any floating-point recurrences, attempt to
1913 // transform them to use integer recurrences.
1914 Changed |= rewriteNonIntegerIVs(L);
1915
1916 // Create a rewriter object which we'll use to transform the code with.
1917 SCEVExpander Rewriter(*SE, DL, "indvars");
1918#ifndef NDEBUG
1919 Rewriter.setDebugType(DEBUG_TYPE);
1920#endif
1921
1922 // Eliminate redundant IV users.
1923 //
1924 // Simplification works best when run before other consumers of SCEV. We
1925 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1926 // other expressions involving loop IVs have been evaluated. This helps SCEV
1927 // set no-wrap flags before normalizing sign/zero extension.
1928 Rewriter.disableCanonicalMode();
1929 Changed |= simplifyAndExtend(L, Rewriter, LI);
1930
1931 // Check to see if we can compute the final value of any expressions
1932 // that are recurrent in the loop, and substitute the exit values from the
1933 // loop into any instructions outside of the loop that use the final values
1934 // of the current expressions.
1935 if (ReplaceExitValue != NeverRepl) {
1936 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1937 ReplaceExitValue, DeadInsts)) {
1938 NumReplaced += Rewrites;
1939 Changed = true;
1940 }
1941 }
1942
1943 // Eliminate redundant IV cycles.
1944 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1945
1946 // Try to convert exit conditions to unsigned and rotate computation
1947 // out of the loop. Note: Handles invalidation internally if needed.
1948 Changed |= canonicalizeExitCondition(L);
1949
1950 // Try to eliminate loop exits based on analyzeable exit counts
1951 if (optimizeLoopExits(L, Rewriter)) {
1952 Changed = true;
1953 // Given we've changed exit counts, notify SCEV
1954 // Some nested loops may share same folded exit basic block,
1955 // thus we need to notify top most loop.
1956 SE->forgetTopmostLoop(L);
1957 }
1958
1959 // Try to form loop invariant tests for loop exits by changing how many
1960 // iterations of the loop run when that is unobservable.
1961 if (predicateLoopExits(L, Rewriter)) {
1962 Changed = true;
1963 // Given we've changed exit counts, notify SCEV
1964 SE->forgetLoop(L);
1965 }
1966
1967 // If we have a trip count expression, rewrite the loop's exit condition
1968 // using it.
1969 if (!DisableLFTR) {
1970 BasicBlock *PreHeader = L->getLoopPreheader();
1971
1972 SmallVector<BasicBlock*, 16> ExitingBlocks;
1973 L->getExitingBlocks(ExitingBlocks);
1974 for (BasicBlock *ExitingBB : ExitingBlocks) {
1975 // Can't rewrite non-branch yet.
1976 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1977 continue;
1978
1979 // If our exitting block exits multiple loops, we can only rewrite the
1980 // innermost one. Otherwise, we're changing how many times the innermost
1981 // loop runs before it exits.
1982 if (LI->getLoopFor(ExitingBB) != L)
1983 continue;
1984
1985 if (!needsLFTR(L, ExitingBB))
1986 continue;
1987
1988 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1989 if (isa<SCEVCouldNotCompute>(ExitCount))
1990 continue;
1991
1992 // This was handled above, but as we form SCEVs, we can sometimes refine
1993 // existing ones; this allows exit counts to be folded to zero which
1994 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1995 // until stable to handle cases like this better.
1996 if (ExitCount->isZero())
1997 continue;
1998
1999 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2000 if (!IndVar)
2001 continue;
2002
2003 // Avoid high cost expansions. Note: This heuristic is questionable in
2004 // that our definition of "high cost" is not exactly principled.
2005 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2006 TTI, PreHeader->getTerminator()))
2007 continue;
2008
2009 if (!Rewriter.isSafeToExpand(ExitCount))
2010 continue;
2011
2012 Changed |= linearFunctionTestReplace(L, ExitingBB,
2013 ExitCount, IndVar,
2014 Rewriter);
2015 }
2016 }
2017 // Clear the rewriter cache, because values that are in the rewriter's cache
2018 // can be deleted in the loop below, causing the AssertingVH in the cache to
2019 // trigger.
2020 Rewriter.clear();
2021
2022 // Now that we're done iterating through lists, clean up any instructions
2023 // which are now dead.
2024 while (!DeadInsts.empty()) {
2025 Value *V = DeadInsts.pop_back_val();
2026
2027 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2028 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2029 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2030 Changed |=
2031 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2032 }
2033
2034 // The Rewriter may not be used from this point on.
2035
2036 // Loop-invariant instructions in the preheader that aren't used in the
2037 // loop may be sunk below the loop to reduce register pressure.
2038 Changed |= sinkUnusedInvariants(L);
2039
2040 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2041 // trip count and therefore can further simplify exit values in addition to
2042 // rewriteLoopExitValues.
2043 Changed |= rewriteFirstIterationLoopExitValues(L);
2044
2045 // Clean up dead instructions.
2046 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2047
2048 // Check a post-condition.
2049 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2050 "Indvars did not preserve LCSSA!");
2051 if (VerifyMemorySSA && MSSAU)
2052 MSSAU->getMemorySSA()->verifyMemorySSA();
2053
2054 return Changed;
2055}
2056
2059 LPMUpdater &) {
2060 Function *F = L.getHeader()->getParent();
2061 const DataLayout &DL = F->getParent()->getDataLayout();
2062
2063 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2064 WidenIndVars && AllowIVWidening);
2065 if (!IVS.run(&L))
2066 return PreservedAnalyses::all();
2067
2068 auto PA = getLoopPassPreservedAnalyses();
2069 PA.preserveSet<CFGAnalyses>();
2070 if (IVS.runUnswitching()) {
2072 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2073 }
2074
2075 if (AR.MSSA)
2076 PA.preserve<MemorySSAAnalysis>();
2077 return PA;
2078}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
#define DEBUG_TYPE
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
if(VerifyEach)
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:930
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:996
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:1001
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:1004
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:1009
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void visitCast(CastInst *Cast)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
const BasicBlock * getParent() const
Definition: Instruction.h:152
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Class to represent integer types.
Definition: DerivedTypes.h:40
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
Type * getWiderType(Type *Ty1, Type *Ty2) const
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
std::optional< bool > evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
self_iterator getIterator()
Definition: ilist_node.h:109
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1023
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
@ Done
Definition: Threading.h:61
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:469
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1416
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:651
@ UnusedIndVarInLoop
Definition: LoopUtils.h:465
@ OnlyCheapRepl
Definition: LoopUtils.h:463
@ NeverRepl
Definition: LoopUtils.h:462
@ NoHardUse
Definition: LoopUtils.h:464
@ AlwaysRepl
Definition: LoopUtils.h:466
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Collect information about induction variables that are used by sign/zero extend operations.
PHINode * NarrowIV