LLVM 17.0.0git
InductiveRangeCheckElimination.cpp
Go to the documentation of this file.
1//===- InductiveRangeCheckElimination.cpp - -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The InductiveRangeCheckElimination pass splits a loop's iteration space into
10// three disjoint ranges. It does that in a way such that the loop running in
11// the middle loop provably does not need range checks. As an example, it will
12// convert
13//
14// len = < known positive >
15// for (i = 0; i < n; i++) {
16// if (0 <= i && i < len) {
17// do_something();
18// } else {
19// throw_out_of_bounds();
20// }
21// }
22//
23// to
24//
25// len = < known positive >
26// limit = smin(n, len)
27// // no first segment
28// for (i = 0; i < limit; i++) {
29// if (0 <= i && i < len) { // this check is fully redundant
30// do_something();
31// } else {
32// throw_out_of_bounds();
33// }
34// }
35// for (i = limit; i < n; i++) {
36// if (0 <= i && i < len) {
37// do_something();
38// } else {
39// throw_out_of_bounds();
40// }
41// }
42//
43//===----------------------------------------------------------------------===//
44
46#include "llvm/ADT/APInt.h"
47#include "llvm/ADT/ArrayRef.h"
51#include "llvm/ADT/StringRef.h"
52#include "llvm/ADT/Twine.h"
59#include "llvm/IR/BasicBlock.h"
60#include "llvm/IR/CFG.h"
61#include "llvm/IR/Constants.h"
63#include "llvm/IR/Dominators.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/IRBuilder.h"
66#include "llvm/IR/InstrTypes.h"
68#include "llvm/IR/Metadata.h"
69#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/User.h"
74#include "llvm/IR/Value.h"
76#include "llvm/Pass.h"
81#include "llvm/Support/Debug.h"
90#include <algorithm>
91#include <cassert>
92#include <iterator>
93#include <limits>
94#include <optional>
95#include <utility>
96#include <vector>
97
98using namespace llvm;
99using namespace llvm::PatternMatch;
100
101static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden,
102 cl::init(64));
103
104static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,
105 cl::init(false));
106
107static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
111 cl::Hidden, cl::init(false));
112
113static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
114 cl::Hidden, cl::init(10));
115
116static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
117 cl::Hidden, cl::init(true));
118
120 "irce-allow-narrow-latch", cl::Hidden, cl::init(true),
121 cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
122 "with narrow latch condition."));
123
124static const char *ClonedLoopTag = "irce.loop.clone";
125
126#define DEBUG_TYPE "irce"
127
128namespace {
129
130/// An inductive range check is conditional branch in a loop with
131///
132/// 1. a very cold successor (i.e. the branch jumps to that successor very
133/// rarely)
134///
135/// and
136///
137/// 2. a condition that is provably true for some contiguous range of values
138/// taken by the containing loop's induction variable.
139///
140class InductiveRangeCheck {
141
142 const SCEV *Begin = nullptr;
143 const SCEV *Step = nullptr;
144 const SCEV *End = nullptr;
145 Use *CheckUse = nullptr;
146
147 static bool parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE,
148 Value *&Index, Value *&Length,
149 bool &IsSigned);
150
151 static void
152 extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse,
154 SmallPtrSetImpl<Value *> &Visited);
155
156public:
157 const SCEV *getBegin() const { return Begin; }
158 const SCEV *getStep() const { return Step; }
159 const SCEV *getEnd() const { return End; }
160
161 void print(raw_ostream &OS) const {
162 OS << "InductiveRangeCheck:\n";
163 OS << " Begin: ";
164 Begin->print(OS);
165 OS << " Step: ";
166 Step->print(OS);
167 OS << " End: ";
168 End->print(OS);
169 OS << "\n CheckUse: ";
170 getCheckUse()->getUser()->print(OS);
171 OS << " Operand: " << getCheckUse()->getOperandNo() << "\n";
172 }
173
175 void dump() {
176 print(dbgs());
177 }
178
179 Use *getCheckUse() const { return CheckUse; }
180
181 /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If
182 /// R.getEnd() le R.getBegin(), then R denotes the empty range.
183
184 class Range {
185 const SCEV *Begin;
186 const SCEV *End;
187
188 public:
189 Range(const SCEV *Begin, const SCEV *End) : Begin(Begin), End(End) {
190 assert(Begin->getType() == End->getType() && "ill-typed range!");
191 }
192
193 Type *getType() const { return Begin->getType(); }
194 const SCEV *getBegin() const { return Begin; }
195 const SCEV *getEnd() const { return End; }
196 bool isEmpty(ScalarEvolution &SE, bool IsSigned) const {
197 if (Begin == End)
198 return true;
199 if (IsSigned)
200 return SE.isKnownPredicate(ICmpInst::ICMP_SGE, Begin, End);
201 else
202 return SE.isKnownPredicate(ICmpInst::ICMP_UGE, Begin, End);
203 }
204 };
205
206 /// This is the value the condition of the branch needs to evaluate to for the
207 /// branch to take the hot successor (see (1) above).
208 bool getPassingDirection() { return true; }
209
210 /// Computes a range for the induction variable (IndVar) in which the range
211 /// check is redundant and can be constant-folded away. The induction
212 /// variable is not required to be the canonical {0,+,1} induction variable.
213 std::optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
214 const SCEVAddRecExpr *IndVar,
215 bool IsLatchSigned) const;
216
217 /// Parse out a set of inductive range checks from \p BI and append them to \p
218 /// Checks.
219 ///
220 /// NB! There may be conditions feeding into \p BI that aren't inductive range
221 /// checks, and hence don't end up in \p Checks.
222 static void
223 extractRangeChecksFromBranch(BranchInst *BI, Loop *L, ScalarEvolution &SE,
226};
227
228struct LoopStructure;
229
230class InductiveRangeCheckElimination {
231 ScalarEvolution &SE;
233 DominatorTree &DT;
234 LoopInfo &LI;
235
236 using GetBFIFunc =
238 GetBFIFunc GetBFI;
239
240 // Returns true if it is profitable to do a transform basing on estimation of
241 // number of iterations.
242 bool isProfitableToTransform(const Loop &L, LoopStructure &LS);
243
244public:
245 InductiveRangeCheckElimination(ScalarEvolution &SE,
247 LoopInfo &LI, GetBFIFunc GetBFI = std::nullopt)
248 : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
249
250 bool run(Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop);
251};
252
253class IRCELegacyPass : public FunctionPass {
254public:
255 static char ID;
256
257 IRCELegacyPass() : FunctionPass(ID) {
259 }
260
261 void getAnalysisUsage(AnalysisUsage &AU) const override {
269 }
270
271 bool runOnFunction(Function &F) override;
272};
273
274} // end anonymous namespace
275
276char IRCELegacyPass::ID = 0;
277
278INITIALIZE_PASS_BEGIN(IRCELegacyPass, "irce",
279 "Inductive range check elimination", false, false)
284INITIALIZE_PASS_END(IRCELegacyPass, "irce", "Inductive range check elimination",
286
287/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` cannot
288/// be interpreted as a range check, return false and set `Index` and `Length`
289/// to `nullptr`. Otherwise set `Index` to the value being range checked, and
290/// set `Length` to the upper limit `Index` is being range checked.
291bool
292InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
294 Value *&Length, bool &IsSigned) {
295 auto IsLoopInvariant = [&SE, L](Value *V) {
296 return SE.isLoopInvariant(SE.getSCEV(V), L);
297 };
298
299 ICmpInst::Predicate Pred = ICI->getPredicate();
300 Value *LHS = ICI->getOperand(0);
301 Value *RHS = ICI->getOperand(1);
302
303 switch (Pred) {
304 default:
305 return false;
306
307 case ICmpInst::ICMP_SLE:
308 std::swap(LHS, RHS);
309 [[fallthrough]];
310 case ICmpInst::ICMP_SGE:
311 IsSigned = true;
312 if (match(RHS, m_ConstantInt<0>())) {
313 Index = LHS;
314 return true; // Lower.
315 }
316 return false;
317
318 case ICmpInst::ICMP_SLT:
319 std::swap(LHS, RHS);
320 [[fallthrough]];
321 case ICmpInst::ICMP_SGT:
322 IsSigned = true;
323 if (match(RHS, m_ConstantInt<-1>())) {
324 Index = LHS;
325 return true; // Lower.
326 }
327
328 if (IsLoopInvariant(LHS)) {
329 Index = RHS;
330 Length = LHS;
331 return true; // Upper.
332 }
333 return false;
334
335 case ICmpInst::ICMP_ULT:
336 std::swap(LHS, RHS);
337 [[fallthrough]];
338 case ICmpInst::ICMP_UGT:
339 IsSigned = false;
340 if (IsLoopInvariant(LHS)) {
341 Index = RHS;
342 Length = LHS;
343 return true; // Both lower and upper.
344 }
345 return false;
346 }
347
348 llvm_unreachable("default clause returns!");
349}
350
351void InductiveRangeCheck::extractRangeChecksFromCond(
352 Loop *L, ScalarEvolution &SE, Use &ConditionUse,
354 SmallPtrSetImpl<Value *> &Visited) {
355 Value *Condition = ConditionUse.get();
356 if (!Visited.insert(Condition).second)
357 return;
358
359 // TODO: Do the same for OR, XOR, NOT etc?
360 if (match(Condition, m_LogicalAnd(m_Value(), m_Value()))) {
361 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(0),
362 Checks, Visited);
363 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(1),
364 Checks, Visited);
365 return;
366 }
367
368 ICmpInst *ICI = dyn_cast<ICmpInst>(Condition);
369 if (!ICI)
370 return;
371
372 Value *Length = nullptr, *Index;
373 bool IsSigned;
374 if (!parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned))
375 return;
376
377 const auto *IndexAddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Index));
378 bool IsAffineIndex =
379 IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
380
381 if (!IsAffineIndex)
382 return;
383
384 const SCEV *End = nullptr;
385 // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
386 // We can potentially do much better here.
387 if (Length)
388 End = SE.getSCEV(Length);
389 else {
390 // So far we can only reach this point for Signed range check. This may
391 // change in future. In this case we will need to pick Unsigned max for the
392 // unsigned range check.
393 unsigned BitWidth = cast<IntegerType>(IndexAddRec->getType())->getBitWidth();
395 End = SIntMax;
396 }
397
398 InductiveRangeCheck IRC;
399 IRC.End = End;
400 IRC.Begin = IndexAddRec->getStart();
401 IRC.Step = IndexAddRec->getStepRecurrence(SE);
402 IRC.CheckUse = &ConditionUse;
403 Checks.push_back(IRC);
404}
405
406void InductiveRangeCheck::extractRangeChecksFromBranch(
409 if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
410 return;
411
412 BranchProbability LikelyTaken(15, 16);
413
414 if (!SkipProfitabilityChecks && BPI &&
415 BPI->getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
416 return;
417
419 InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->getOperandUse(0),
420 Checks, Visited);
421}
422
423// Add metadata to the loop L to disable loop optimizations. Callers need to
424// confirm that optimizing loop L is not beneficial.
426 // We do not care about any existing loopID related metadata for L, since we
427 // are setting all loop metadata to false.
428 LLVMContext &Context = L.getHeader()->getContext();
429 // Reserve first location for self reference to the LoopID metadata node.
430 MDNode *Dummy = MDNode::get(Context, {});
431 MDNode *DisableUnroll = MDNode::get(
432 Context, {MDString::get(Context, "llvm.loop.unroll.disable")});
433 Metadata *FalseVal =
435 MDNode *DisableVectorize = MDNode::get(
436 Context,
437 {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal});
438 MDNode *DisableLICMVersioning = MDNode::get(
439 Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")});
440 MDNode *DisableDistribution= MDNode::get(
441 Context,
442 {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal});
443 MDNode *NewLoopID =
444 MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize,
445 DisableLICMVersioning, DisableDistribution});
446 // Set operand 0 to refer to the loop id itself.
447 NewLoopID->replaceOperandWith(0, NewLoopID);
448 L.setLoopID(NewLoopID);
449}
450
451namespace {
452
453// Keeps track of the structure of a loop. This is similar to llvm::Loop,
454// except that it is more lightweight and can track the state of a loop through
455// changing and potentially invalid IR. This structure also formalizes the
456// kinds of loops we can deal with -- ones that have a single latch that is also
457// an exiting block *and* have a canonical induction variable.
458struct LoopStructure {
459 const char *Tag = "";
460
461 BasicBlock *Header = nullptr;
462 BasicBlock *Latch = nullptr;
463
464 // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
465 // successor is `LatchExit', the exit block of the loop.
466 BranchInst *LatchBr = nullptr;
467 BasicBlock *LatchExit = nullptr;
468 unsigned LatchBrExitIdx = std::numeric_limits<unsigned>::max();
469
470 // The loop represented by this instance of LoopStructure is semantically
471 // equivalent to:
472 //
473 // intN_ty inc = IndVarIncreasing ? 1 : -1;
474 // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT;
475 //
476 // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase)
477 // ... body ...
478
479 Value *IndVarBase = nullptr;
480 Value *IndVarStart = nullptr;
481 Value *IndVarStep = nullptr;
482 Value *LoopExitAt = nullptr;
483 bool IndVarIncreasing = false;
484 bool IsSignedPredicate = true;
485
486 LoopStructure() = default;
487
488 template <typename M> LoopStructure map(M Map) const {
489 LoopStructure Result;
490 Result.Tag = Tag;
491 Result.Header = cast<BasicBlock>(Map(Header));
492 Result.Latch = cast<BasicBlock>(Map(Latch));
493 Result.LatchBr = cast<BranchInst>(Map(LatchBr));
494 Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
495 Result.LatchBrExitIdx = LatchBrExitIdx;
496 Result.IndVarBase = Map(IndVarBase);
497 Result.IndVarStart = Map(IndVarStart);
498 Result.IndVarStep = Map(IndVarStep);
499 Result.LoopExitAt = Map(LoopExitAt);
500 Result.IndVarIncreasing = IndVarIncreasing;
501 Result.IsSignedPredicate = IsSignedPredicate;
502 return Result;
503 }
504
505 static std::optional<LoopStructure> parseLoopStructure(ScalarEvolution &,
506 Loop &, const char *&);
507};
508
509/// This class is used to constrain loops to run within a given iteration space.
510/// The algorithm this class implements is given a Loop and a range [Begin,
511/// End). The algorithm then tries to break out a "main loop" out of the loop
512/// it is given in a way that the "main loop" runs with the induction variable
513/// in a subset of [Begin, End). The algorithm emits appropriate pre and post
514/// loops to run any remaining iterations. The pre loop runs any iterations in
515/// which the induction variable is < Begin, and the post loop runs any
516/// iterations in which the induction variable is >= End.
517class LoopConstrainer {
518 // The representation of a clone of the original loop we started out with.
519 struct ClonedLoop {
520 // The cloned blocks
521 std::vector<BasicBlock *> Blocks;
522
523 // `Map` maps values in the clonee into values in the cloned version
525
526 // An instance of `LoopStructure` for the cloned loop
527 LoopStructure Structure;
528 };
529
530 // Result of rewriting the range of a loop. See changeIterationSpaceEnd for
531 // more details on what these fields mean.
532 struct RewrittenRangeInfo {
533 BasicBlock *PseudoExit = nullptr;
534 BasicBlock *ExitSelector = nullptr;
535 std::vector<PHINode *> PHIValuesAtPseudoExit;
536 PHINode *IndVarEnd = nullptr;
537
538 RewrittenRangeInfo() = default;
539 };
540
541 // Calculated subranges we restrict the iteration space of the main loop to.
542 // See the implementation of `calculateSubRanges' for more details on how
543 // these fields are computed. `LowLimit` is std::nullopt if there is no
544 // restriction on low end of the restricted iteration space of the main loop.
545 // `HighLimit` is std::nullopt if there is no restriction on high end of the
546 // restricted iteration space of the main loop.
547
548 struct SubRanges {
549 std::optional<const SCEV *> LowLimit;
550 std::optional<const SCEV *> HighLimit;
551 };
552
553 // Compute a safe set of limits for the main loop to run in -- effectively the
554 // intersection of `Range' and the iteration space of the original loop.
555 // Return std::nullopt if unable to compute the set of subranges.
556 std::optional<SubRanges> calculateSubRanges(bool IsSignedPredicate) const;
557
558 // Clone `OriginalLoop' and return the result in CLResult. The IR after
559 // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
560 // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
561 // but there is no such edge.
562 void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
563
564 // Create the appropriate loop structure needed to describe a cloned copy of
565 // `Original`. The clone is described by `VM`.
566 Loop *createClonedLoopStructure(Loop *Original, Loop *Parent,
567 ValueToValueMapTy &VM, bool IsSubloop);
568
569 // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
570 // iteration space of the rewritten loop ends at ExitLoopAt. The start of the
571 // iteration space is not changed. `ExitLoopAt' is assumed to be slt
572 // `OriginalHeaderCount'.
573 //
574 // If there are iterations left to execute, control is made to jump to
575 // `ContinuationBlock', otherwise they take the normal loop exit. The
576 // returned `RewrittenRangeInfo' object is populated as follows:
577 //
578 // .PseudoExit is a basic block that unconditionally branches to
579 // `ContinuationBlock'.
580 //
581 // .ExitSelector is a basic block that decides, on exit from the loop,
582 // whether to branch to the "true" exit or to `PseudoExit'.
583 //
584 // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
585 // for each PHINode in the loop header on taking the pseudo exit.
586 //
587 // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
588 // preheader because it is made to branch to the loop header only
589 // conditionally.
590 RewrittenRangeInfo
591 changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader,
592 Value *ExitLoopAt,
593 BasicBlock *ContinuationBlock) const;
594
595 // The loop denoted by `LS' has `OldPreheader' as its preheader. This
596 // function creates a new preheader for `LS' and returns it.
597 BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader,
598 const char *Tag) const;
599
600 // `ContinuationBlockAndPreheader' was the continuation block for some call to
601 // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
602 // This function rewrites the PHI nodes in `LS.Header' to start with the
603 // correct value.
604 void rewriteIncomingValuesForPHIs(
605 LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader,
606 const LoopConstrainer::RewrittenRangeInfo &RRI) const;
607
608 // Even though we do not preserve any passes at this time, we at least need to
609 // keep the parent loop structure consistent. The `LPPassManager' seems to
610 // verify this after running a loop pass. This function adds the list of
611 // blocks denoted by BBs to this loops parent loop if required.
612 void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs);
613
614 // Some global state.
615 Function &F;
616 LLVMContext &Ctx;
617 ScalarEvolution &SE;
618 DominatorTree &DT;
619 LoopInfo &LI;
620 function_ref<void(Loop *, bool)> LPMAddNewLoop;
621
622 // Information about the original loop we started out with.
623 Loop &OriginalLoop;
624
625 const SCEV *LatchTakenCount = nullptr;
626 BasicBlock *OriginalPreheader = nullptr;
627
628 // The preheader of the main loop. This may or may not be different from
629 // `OriginalPreheader'.
630 BasicBlock *MainLoopPreheader = nullptr;
631
632 // The range we need to run the main loop in.
633 InductiveRangeCheck::Range Range;
634
635 // The structure of the main loop (see comment at the beginning of this class
636 // for a definition)
637 LoopStructure MainLoopStructure;
638
639public:
640 LoopConstrainer(Loop &L, LoopInfo &LI,
641 function_ref<void(Loop *, bool)> LPMAddNewLoop,
642 const LoopStructure &LS, ScalarEvolution &SE,
643 DominatorTree &DT, InductiveRangeCheck::Range R)
644 : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
645 SE(SE), DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L),
646 Range(R), MainLoopStructure(LS) {}
647
648 // Entry point for the algorithm. Returns true on success.
649 bool run();
650};
651
652} // end anonymous namespace
653
654/// Given a loop with an deccreasing induction variable, is it possible to
655/// safely calculate the bounds of a new loop using the given Predicate.
656static bool isSafeDecreasingBound(const SCEV *Start,
657 const SCEV *BoundSCEV, const SCEV *Step,
659 unsigned LatchBrExitIdx,
660 Loop *L, ScalarEvolution &SE) {
661 if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
662 Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
663 return false;
664
665 if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
666 return false;
667
668 assert(SE.isKnownNegative(Step) && "expecting negative step");
669
670 LLVM_DEBUG(dbgs() << "irce: isSafeDecreasingBound with:\n");
671 LLVM_DEBUG(dbgs() << "irce: Start: " << *Start << "\n");
672 LLVM_DEBUG(dbgs() << "irce: Step: " << *Step << "\n");
673 LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV << "\n");
674 LLVM_DEBUG(dbgs() << "irce: Pred: " << Pred << "\n");
675 LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n");
676
677 bool IsSigned = ICmpInst::isSigned(Pred);
678 // The predicate that we need to check that the induction variable lies
679 // within bounds.
680 ICmpInst::Predicate BoundPred =
682
683 if (LatchBrExitIdx == 1)
684 return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
685
686 assert(LatchBrExitIdx == 0 &&
687 "LatchBrExitIdx should be either 0 or 1");
688
689 const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
690 unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
691 APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) :
693 const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne);
694
695 const SCEV *MinusOne =
696 SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType()));
697
698 return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) &&
699 SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit);
700
701}
702
703/// Given a loop with an increasing induction variable, is it possible to
704/// safely calculate the bounds of a new loop using the given Predicate.
705static bool isSafeIncreasingBound(const SCEV *Start,
706 const SCEV *BoundSCEV, const SCEV *Step,
708 unsigned LatchBrExitIdx,
709 Loop *L, ScalarEvolution &SE) {
710 if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
711 Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
712 return false;
713
714 if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
715 return false;
716
717 LLVM_DEBUG(dbgs() << "irce: isSafeIncreasingBound with:\n");
718 LLVM_DEBUG(dbgs() << "irce: Start: " << *Start << "\n");
719 LLVM_DEBUG(dbgs() << "irce: Step: " << *Step << "\n");
720 LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV << "\n");
721 LLVM_DEBUG(dbgs() << "irce: Pred: " << Pred << "\n");
722 LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n");
723
724 bool IsSigned = ICmpInst::isSigned(Pred);
725 // The predicate that we need to check that the induction variable lies
726 // within bounds.
727 ICmpInst::Predicate BoundPred =
729
730 if (LatchBrExitIdx == 1)
731 return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
732
733 assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1");
734
735 const SCEV *StepMinusOne =
736 SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
737 unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
738 APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
740 const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
741
742 return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start,
743 SE.getAddExpr(BoundSCEV, Step)) &&
744 SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
745}
746
747std::optional<LoopStructure>
748LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
749 const char *&FailureReason) {
750 if (!L.isLoopSimplifyForm()) {
751 FailureReason = "loop not in LoopSimplify form";
752 return std::nullopt;
753 }
754
755 BasicBlock *Latch = L.getLoopLatch();
756 assert(Latch && "Simplified loops only have one latch!");
757
758 if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) {
759 FailureReason = "loop has already been cloned";
760 return std::nullopt;
761 }
762
763 if (!L.isLoopExiting(Latch)) {
764 FailureReason = "no loop latch";
765 return std::nullopt;
766 }
767
768 BasicBlock *Header = L.getHeader();
769 BasicBlock *Preheader = L.getLoopPreheader();
770 if (!Preheader) {
771 FailureReason = "no preheader";
772 return std::nullopt;
773 }
774
775 BranchInst *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
776 if (!LatchBr || LatchBr->isUnconditional()) {
777 FailureReason = "latch terminator not conditional branch";
778 return std::nullopt;
779 }
780
781 unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
782
783 ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
784 if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
785 FailureReason = "latch terminator branch not conditional on integral icmp";
786 return std::nullopt;
787 }
788
789 const SCEV *LatchCount = SE.getExitCount(&L, Latch);
790 if (isa<SCEVCouldNotCompute>(LatchCount)) {
791 FailureReason = "could not compute latch count";
792 return std::nullopt;
793 }
794
795 ICmpInst::Predicate Pred = ICI->getPredicate();
796 Value *LeftValue = ICI->getOperand(0);
797 const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
798 IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
799
800 Value *RightValue = ICI->getOperand(1);
801 const SCEV *RightSCEV = SE.getSCEV(RightValue);
802
803 // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
804 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
805 if (isa<SCEVAddRecExpr>(RightSCEV)) {
806 std::swap(LeftSCEV, RightSCEV);
807 std::swap(LeftValue, RightValue);
808 Pred = ICmpInst::getSwappedPredicate(Pred);
809 } else {
810 FailureReason = "no add recurrences in the icmp";
811 return std::nullopt;
812 }
813 }
814
815 auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
816 if (AR->getNoWrapFlags(SCEV::FlagNSW))
817 return true;
818
819 IntegerType *Ty = cast<IntegerType>(AR->getType());
820 IntegerType *WideTy =
821 IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
822
823 const SCEVAddRecExpr *ExtendAfterOp =
824 dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
825 if (ExtendAfterOp) {
826 const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
827 const SCEV *ExtendedStep =
828 SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
829
830 bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
831 ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
832
833 if (NoSignedWrap)
834 return true;
835 }
836
837 // We may have proved this when computing the sign extension above.
838 return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
839 };
840
841 // `ICI` is interpreted as taking the backedge if the *next* value of the
842 // induction variable satisfies some constraint.
843
844 const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV);
845 if (IndVarBase->getLoop() != &L) {
846 FailureReason = "LHS in cmp is not an AddRec for this loop";
847 return std::nullopt;
848 }
849 if (!IndVarBase->isAffine()) {
850 FailureReason = "LHS in icmp not induction variable";
851 return std::nullopt;
852 }
853 const SCEV* StepRec = IndVarBase->getStepRecurrence(SE);
854 if (!isa<SCEVConstant>(StepRec)) {
855 FailureReason = "LHS in icmp not induction variable";
856 return std::nullopt;
857 }
858 ConstantInt *StepCI = cast<SCEVConstant>(StepRec)->getValue();
859
860 if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) {
861 FailureReason = "LHS in icmp needs nsw for equality predicates";
862 return std::nullopt;
863 }
864
865 assert(!StepCI->isZero() && "Zero step?");
866 bool IsIncreasing = !StepCI->isNegative();
867 bool IsSignedPredicate;
868 const SCEV *StartNext = IndVarBase->getStart();
869 const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
870 const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
871 const SCEV *Step = SE.getSCEV(StepCI);
872
873 const SCEV *FixedRightSCEV = nullptr;
874
875 // If RightValue resides within loop (but still being loop invariant),
876 // regenerate it as preheader.
877 if (auto *I = dyn_cast<Instruction>(RightValue))
878 if (L.contains(I->getParent()))
879 FixedRightSCEV = RightSCEV;
880
881 if (IsIncreasing) {
882 bool DecreasedRightValueByOne = false;
883 if (StepCI->isOne()) {
884 // Try to turn eq/ne predicates to those we can work with.
885 if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
886 // while (++i != len) { while (++i < len) {
887 // ... ---> ...
888 // } }
889 // If both parts are known non-negative, it is profitable to use
890 // unsigned comparison in increasing loop. This allows us to make the
891 // comparison check against "RightSCEV + 1" more optimistic.
892 if (isKnownNonNegativeInLoop(IndVarStart, &L, SE) &&
893 isKnownNonNegativeInLoop(RightSCEV, &L, SE))
894 Pred = ICmpInst::ICMP_ULT;
895 else
896 Pred = ICmpInst::ICMP_SLT;
897 else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
898 // while (true) { while (true) {
899 // if (++i == len) ---> if (++i > len - 1)
900 // break; break;
901 // ... ...
902 // } }
903 if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
904 cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) {
905 Pred = ICmpInst::ICMP_UGT;
906 RightSCEV = SE.getMinusSCEV(RightSCEV,
907 SE.getOne(RightSCEV->getType()));
908 DecreasedRightValueByOne = true;
909 } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) {
910 Pred = ICmpInst::ICMP_SGT;
911 RightSCEV = SE.getMinusSCEV(RightSCEV,
912 SE.getOne(RightSCEV->getType()));
913 DecreasedRightValueByOne = true;
914 }
915 }
916 }
917
918 bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
919 bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
920 bool FoundExpectedPred =
921 (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0);
922
923 if (!FoundExpectedPred) {
924 FailureReason = "expected icmp slt semantically, found something else";
925 return std::nullopt;
926 }
927
928 IsSignedPredicate = ICmpInst::isSigned(Pred);
929 if (!IsSignedPredicate && !AllowUnsignedLatchCondition) {
930 FailureReason = "unsigned latch conditions are explicitly prohibited";
931 return std::nullopt;
932 }
933
934 if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred,
935 LatchBrExitIdx, &L, SE)) {
936 FailureReason = "Unsafe loop bounds";
937 return std::nullopt;
938 }
939 if (LatchBrExitIdx == 0) {
940 // We need to increase the right value unless we have already decreased
941 // it virtually when we replaced EQ with SGT.
942 if (!DecreasedRightValueByOne)
943 FixedRightSCEV =
944 SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
945 } else {
946 assert(!DecreasedRightValueByOne &&
947 "Right value can be decreased only for LatchBrExitIdx == 0!");
948 }
949 } else {
950 bool IncreasedRightValueByOne = false;
951 if (StepCI->isMinusOne()) {
952 // Try to turn eq/ne predicates to those we can work with.
953 if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
954 // while (--i != len) { while (--i > len) {
955 // ... ---> ...
956 // } }
957 // We intentionally don't turn the predicate into UGT even if we know
958 // that both operands are non-negative, because it will only pessimize
959 // our check against "RightSCEV - 1".
960 Pred = ICmpInst::ICMP_SGT;
961 else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
962 // while (true) { while (true) {
963 // if (--i == len) ---> if (--i < len + 1)
964 // break; break;
965 // ... ...
966 // } }
967 if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
968 cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
969 Pred = ICmpInst::ICMP_ULT;
970 RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
971 IncreasedRightValueByOne = true;
972 } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
973 Pred = ICmpInst::ICMP_SLT;
974 RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
975 IncreasedRightValueByOne = true;
976 }
977 }
978 }
979
980 bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
981 bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
982
983 bool FoundExpectedPred =
984 (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0);
985
986 if (!FoundExpectedPred) {
987 FailureReason = "expected icmp sgt semantically, found something else";
988 return std::nullopt;
989 }
990
991 IsSignedPredicate =
992 Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT;
993
994 if (!IsSignedPredicate && !AllowUnsignedLatchCondition) {
995 FailureReason = "unsigned latch conditions are explicitly prohibited";
996 return std::nullopt;
997 }
998
999 if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred,
1000 LatchBrExitIdx, &L, SE)) {
1001 FailureReason = "Unsafe bounds";
1002 return std::nullopt;
1003 }
1004
1005 if (LatchBrExitIdx == 0) {
1006 // We need to decrease the right value unless we have already increased
1007 // it virtually when we replaced EQ with SLT.
1008 if (!IncreasedRightValueByOne)
1009 FixedRightSCEV =
1010 SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
1011 } else {
1012 assert(!IncreasedRightValueByOne &&
1013 "Right value can be increased only for LatchBrExitIdx == 0!");
1014 }
1015 }
1016 BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
1017
1018 assert(SE.getLoopDisposition(LatchCount, &L) ==
1020 "loop variant exit count doesn't make sense!");
1021
1022 assert(!L.contains(LatchExit) && "expected an exit block!");
1023 const DataLayout &DL = Preheader->getModule()->getDataLayout();
1024 SCEVExpander Expander(SE, DL, "irce");
1025 Instruction *Ins = Preheader->getTerminator();
1026
1027 if (FixedRightSCEV)
1028 RightValue =
1029 Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->getType(), Ins);
1030
1031 Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy, Ins);
1032 IndVarStartV->setName("indvar.start");
1033
1034 LoopStructure Result;
1035
1036 Result.Tag = "main";
1037 Result.Header = Header;
1038 Result.Latch = Latch;
1039 Result.LatchBr = LatchBr;
1040 Result.LatchExit = LatchExit;
1041 Result.LatchBrExitIdx = LatchBrExitIdx;
1042 Result.IndVarStart = IndVarStartV;
1043 Result.IndVarStep = StepCI;
1044 Result.IndVarBase = LeftValue;
1045 Result.IndVarIncreasing = IsIncreasing;
1046 Result.LoopExitAt = RightValue;
1047 Result.IsSignedPredicate = IsSignedPredicate;
1048
1049 FailureReason = nullptr;
1050
1051 return Result;
1052}
1053
1054/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
1055/// signed or unsigned extension of \p S to type \p Ty.
1056static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
1057 bool Signed) {
1058 return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);
1059}
1060
1061std::optional<LoopConstrainer::SubRanges>
1062LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
1063 IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
1064
1065 auto *RTy = cast<IntegerType>(Range.getType());
1066
1067 // We only support wide range checks and narrow latches.
1068 if (!AllowNarrowLatchCondition && RTy != Ty)
1069 return std::nullopt;
1070 if (RTy->getBitWidth() < Ty->getBitWidth())
1071 return std::nullopt;
1072
1073 LoopConstrainer::SubRanges Result;
1074
1075 // I think we can be more aggressive here and make this nuw / nsw if the
1076 // addition that feeds into the icmp for the latch's terminating branch is nuw
1077 // / nsw. In any case, a wrapping 2's complement addition is safe.
1078 const SCEV *Start = NoopOrExtend(SE.getSCEV(MainLoopStructure.IndVarStart),
1079 RTy, SE, IsSignedPredicate);
1080 const SCEV *End = NoopOrExtend(SE.getSCEV(MainLoopStructure.LoopExitAt), RTy,
1081 SE, IsSignedPredicate);
1082
1083 bool Increasing = MainLoopStructure.IndVarIncreasing;
1084
1085 // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or
1086 // [Smallest, GreatestSeen] is the range of values the induction variable
1087 // takes.
1088
1089 const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
1090
1091 const SCEV *One = SE.getOne(RTy);
1092 if (Increasing) {
1093 Smallest = Start;
1094 Greatest = End;
1095 // No overflow, because the range [Smallest, GreatestSeen] is not empty.
1096 GreatestSeen = SE.getMinusSCEV(End, One);
1097 } else {
1098 // These two computations may sign-overflow. Here is why that is okay:
1099 //
1100 // We know that the induction variable does not sign-overflow on any
1101 // iteration except the last one, and it starts at `Start` and ends at
1102 // `End`, decrementing by one every time.
1103 //
1104 // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
1105 // induction variable is decreasing we know that that the smallest value
1106 // the loop body is actually executed with is `INT_SMIN` == `Smallest`.
1107 //
1108 // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In
1109 // that case, `Clamp` will always return `Smallest` and
1110 // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)
1111 // will be an empty range. Returning an empty range is always safe.
1112
1113 Smallest = SE.getAddExpr(End, One);
1114 Greatest = SE.getAddExpr(Start, One);
1115 GreatestSeen = Start;
1116 }
1117
1118 auto Clamp = [this, Smallest, Greatest, IsSignedPredicate](const SCEV *S) {
1119 return IsSignedPredicate
1120 ? SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S))
1121 : SE.getUMaxExpr(Smallest, SE.getUMinExpr(Greatest, S));
1122 };
1123
1124 // In some cases we can prove that we don't need a pre or post loop.
1125 ICmpInst::Predicate PredLE =
1126 IsSignedPredicate ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1127 ICmpInst::Predicate PredLT =
1128 IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1129
1130 bool ProvablyNoPreloop =
1131 SE.isKnownPredicate(PredLE, Range.getBegin(), Smallest);
1132 if (!ProvablyNoPreloop)
1133 Result.LowLimit = Clamp(Range.getBegin());
1134
1135 bool ProvablyNoPostLoop =
1136 SE.isKnownPredicate(PredLT, GreatestSeen, Range.getEnd());
1137 if (!ProvablyNoPostLoop)
1138 Result.HighLimit = Clamp(Range.getEnd());
1139
1140 return Result;
1141}
1142
1143void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
1144 const char *Tag) const {
1145 for (BasicBlock *BB : OriginalLoop.getBlocks()) {
1146 BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
1147 Result.Blocks.push_back(Clone);
1148 Result.Map[BB] = Clone;
1149 }
1150
1151 auto GetClonedValue = [&Result](Value *V) {
1152 assert(V && "null values not in domain!");
1153 auto It = Result.Map.find(V);
1154 if (It == Result.Map.end())
1155 return V;
1156 return static_cast<Value *>(It->second);
1157 };
1158
1159 auto *ClonedLatch =
1160 cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
1161 ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag,
1162 MDNode::get(Ctx, {}));
1163
1164 Result.Structure = MainLoopStructure.map(GetClonedValue);
1165 Result.Structure.Tag = Tag;
1166
1167 for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
1168 BasicBlock *ClonedBB = Result.Blocks[i];
1169 BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
1170
1171 assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
1172
1173 for (Instruction &I : *ClonedBB)
1176
1177 // Exit blocks will now have one more predecessor and their PHI nodes need
1178 // to be edited to reflect that. No phi nodes need to be introduced because
1179 // the loop is in LCSSA.
1180
1181 for (auto *SBB : successors(OriginalBB)) {
1182 if (OriginalLoop.contains(SBB))
1183 continue; // not an exit block
1184
1185 for (PHINode &PN : SBB->phis()) {
1186 Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
1187 PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
1188 SE.forgetValue(&PN);
1189 }
1190 }
1191 }
1192}
1193
1194LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
1195 const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
1196 BasicBlock *ContinuationBlock) const {
1197 // We start with a loop with a single latch:
1198 //
1199 // +--------------------+
1200 // | |
1201 // | preheader |
1202 // | |
1203 // +--------+-----------+
1204 // | ----------------\
1205 // | / |
1206 // +--------v----v------+ |
1207 // | | |
1208 // | header | |
1209 // | | |
1210 // +--------------------+ |
1211 // |
1212 // ..... |
1213 // |
1214 // +--------------------+ |
1215 // | | |
1216 // | latch >----------/
1217 // | |
1218 // +-------v------------+
1219 // |
1220 // |
1221 // | +--------------------+
1222 // | | |
1223 // +---> original exit |
1224 // | |
1225 // +--------------------+
1226 //
1227 // We change the control flow to look like
1228 //
1229 //
1230 // +--------------------+
1231 // | |
1232 // | preheader >-------------------------+
1233 // | | |
1234 // +--------v-----------+ |
1235 // | /-------------+ |
1236 // | / | |
1237 // +--------v--v--------+ | |
1238 // | | | |
1239 // | header | | +--------+ |
1240 // | | | | | |
1241 // +--------------------+ | | +-----v-----v-----------+
1242 // | | | |
1243 // | | | .pseudo.exit |
1244 // | | | |
1245 // | | +-----------v-----------+
1246 // | | |
1247 // ..... | | |
1248 // | | +--------v-------------+
1249 // +--------------------+ | | | |
1250 // | | | | | ContinuationBlock |
1251 // | latch >------+ | | |
1252 // | | | +----------------------+
1253 // +---------v----------+ |
1254 // | |
1255 // | |
1256 // | +---------------^-----+
1257 // | | |
1258 // +-----> .exit.selector |
1259 // | |
1260 // +----------v----------+
1261 // |
1262 // +--------------------+ |
1263 // | | |
1264 // | original exit <----+
1265 // | |
1266 // +--------------------+
1267
1268 RewrittenRangeInfo RRI;
1269
1270 BasicBlock *BBInsertLocation = LS.Latch->getNextNode();
1271 RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
1272 &F, BBInsertLocation);
1273 RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
1274 BBInsertLocation);
1275
1276 BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator());
1277 bool Increasing = LS.IndVarIncreasing;
1278 bool IsSignedPredicate = LS.IsSignedPredicate;
1279
1280 IRBuilder<> B(PreheaderJump);
1281 auto *RangeTy = Range.getBegin()->getType();
1282 auto NoopOrExt = [&](Value *V) {
1283 if (V->getType() == RangeTy)
1284 return V;
1285 return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
1286 : B.CreateZExt(V, RangeTy, "wide." + V->getName());
1287 };
1288
1289 // EnterLoopCond - is it okay to start executing this `LS'?
1290 Value *EnterLoopCond = nullptr;
1291 auto Pred =
1292 Increasing
1293 ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
1294 : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
1295 Value *IndVarStart = NoopOrExt(LS.IndVarStart);
1296 EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
1297
1298 B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
1299 PreheaderJump->eraseFromParent();
1300
1301 LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
1302 B.SetInsertPoint(LS.LatchBr);
1303 Value *IndVarBase = NoopOrExt(LS.IndVarBase);
1304 Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
1305
1306 Value *CondForBranch = LS.LatchBrExitIdx == 1
1307 ? TakeBackedgeLoopCond
1308 : B.CreateNot(TakeBackedgeLoopCond);
1309
1310 LS.LatchBr->setCondition(CondForBranch);
1311
1312 B.SetInsertPoint(RRI.ExitSelector);
1313
1314 // IterationsLeft - are there any more iterations left, given the original
1315 // upper bound on the induction variable? If not, we branch to the "real"
1316 // exit.
1317 Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
1318 Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
1319 B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
1320
1321 BranchInst *BranchToContinuation =
1322 BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
1323
1324 // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
1325 // each of the PHI nodes in the loop header. This feeds into the initial
1326 // value of the same PHI nodes if/when we continue execution.
1327 for (PHINode &PN : LS.Header->phis()) {
1328 PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy",
1329 BranchToContinuation);
1330
1331 NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
1332 NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch),
1333 RRI.ExitSelector);
1334 RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
1335 }
1336
1337 RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end",
1338 BranchToContinuation);
1339 RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
1340 RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
1341
1342 // The latch exit now has a branch from `RRI.ExitSelector' instead of
1343 // `LS.Latch'. The PHI nodes need to be updated to reflect that.
1344 LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector);
1345
1346 return RRI;
1347}
1348
1349void LoopConstrainer::rewriteIncomingValuesForPHIs(
1350 LoopStructure &LS, BasicBlock *ContinuationBlock,
1351 const LoopConstrainer::RewrittenRangeInfo &RRI) const {
1352 unsigned PHIIndex = 0;
1353 for (PHINode &PN : LS.Header->phis())
1354 PN.setIncomingValueForBlock(ContinuationBlock,
1355 RRI.PHIValuesAtPseudoExit[PHIIndex++]);
1356
1357 LS.IndVarStart = RRI.IndVarEnd;
1358}
1359
1360BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
1361 BasicBlock *OldPreheader,
1362 const char *Tag) const {
1363 BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
1364 BranchInst::Create(LS.Header, Preheader);
1365
1366 LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
1367
1368 return Preheader;
1369}
1370
1371void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
1372 Loop *ParentLoop = OriginalLoop.getParentLoop();
1373 if (!ParentLoop)
1374 return;
1375
1376 for (BasicBlock *BB : BBs)
1377 ParentLoop->addBasicBlockToLoop(BB, LI);
1378}
1379
1380Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
1382 bool IsSubloop) {
1383 Loop &New = *LI.AllocateLoop();
1384 if (Parent)
1385 Parent->addChildLoop(&New);
1386 else
1387 LI.addTopLevelLoop(&New);
1388 LPMAddNewLoop(&New, IsSubloop);
1389
1390 // Add all of the blocks in Original to the new loop.
1391 for (auto *BB : Original->blocks())
1392 if (LI.getLoopFor(BB) == Original)
1393 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI);
1394
1395 // Add all of the subloops to the new loop.
1396 for (Loop *SubLoop : *Original)
1397 createClonedLoopStructure(SubLoop, &New, VM, /* IsSubloop */ true);
1398
1399 return &New;
1400}
1401
1402bool LoopConstrainer::run() {
1403 BasicBlock *Preheader = nullptr;
1404 LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
1405 Preheader = OriginalLoop.getLoopPreheader();
1406 assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
1407 "preconditions!");
1408
1409 OriginalPreheader = Preheader;
1410 MainLoopPreheader = Preheader;
1411
1412 bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
1413 std::optional<SubRanges> MaybeSR = calculateSubRanges(IsSignedPredicate);
1414 if (!MaybeSR) {
1415 LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");
1416 return false;
1417 }
1418
1419 SubRanges SR = *MaybeSR;
1420 bool Increasing = MainLoopStructure.IndVarIncreasing;
1421 IntegerType *IVTy =
1422 cast<IntegerType>(Range.getBegin()->getType());
1423
1424 SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
1425 Instruction *InsertPt = OriginalPreheader->getTerminator();
1426
1427 // It would have been better to make `PreLoop' and `PostLoop'
1428 // `std::optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
1429 // constructor.
1430 ClonedLoop PreLoop, PostLoop;
1431 bool NeedsPreLoop =
1432 Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value();
1433 bool NeedsPostLoop =
1434 Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value();
1435
1436 Value *ExitPreLoopAt = nullptr;
1437 Value *ExitMainLoopAt = nullptr;
1438 const SCEVConstant *MinusOneS =
1439 cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
1440
1441 if (NeedsPreLoop) {
1442 const SCEV *ExitPreLoopAtSCEV = nullptr;
1443
1444 if (Increasing)
1445 ExitPreLoopAtSCEV = *SR.LowLimit;
1446 else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
1447 IsSignedPredicate))
1448 ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
1449 else {
1450 LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
1451 << "preloop exit limit. HighLimit = "
1452 << *(*SR.HighLimit) << "\n");
1453 return false;
1454 }
1455
1456 if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) {
1457 LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
1458 << " preloop exit limit " << *ExitPreLoopAtSCEV
1459 << " at block " << InsertPt->getParent()->getName()
1460 << "\n");
1461 return false;
1462 }
1463
1464 ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
1465 ExitPreLoopAt->setName("exit.preloop.at");
1466 }
1467
1468 if (NeedsPostLoop) {
1469 const SCEV *ExitMainLoopAtSCEV = nullptr;
1470
1471 if (Increasing)
1472 ExitMainLoopAtSCEV = *SR.HighLimit;
1473 else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
1474 IsSignedPredicate))
1475 ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
1476 else {
1477 LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
1478 << "mainloop exit limit. LowLimit = "
1479 << *(*SR.LowLimit) << "\n");
1480 return false;
1481 }
1482
1483 if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) {
1484 LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
1485 << " main loop exit limit " << *ExitMainLoopAtSCEV
1486 << " at block " << InsertPt->getParent()->getName()
1487 << "\n");
1488 return false;
1489 }
1490
1491 ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
1492 ExitMainLoopAt->setName("exit.mainloop.at");
1493 }
1494
1495 // We clone these ahead of time so that we don't have to deal with changing
1496 // and temporarily invalid IR as we transform the loops.
1497 if (NeedsPreLoop)
1498 cloneLoop(PreLoop, "preloop");
1499 if (NeedsPostLoop)
1500 cloneLoop(PostLoop, "postloop");
1501
1502 RewrittenRangeInfo PreLoopRRI;
1503
1504 if (NeedsPreLoop) {
1505 Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
1506 PreLoop.Structure.Header);
1507
1508 MainLoopPreheader =
1509 createPreheader(MainLoopStructure, Preheader, "mainloop");
1510 PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
1511 ExitPreLoopAt, MainLoopPreheader);
1512 rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
1513 PreLoopRRI);
1514 }
1515
1516 BasicBlock *PostLoopPreheader = nullptr;
1517 RewrittenRangeInfo PostLoopRRI;
1518
1519 if (NeedsPostLoop) {
1520 PostLoopPreheader =
1521 createPreheader(PostLoop.Structure, Preheader, "postloop");
1522 PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
1523 ExitMainLoopAt, PostLoopPreheader);
1524 rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
1525 PostLoopRRI);
1526 }
1527
1528 BasicBlock *NewMainLoopPreheader =
1529 MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr;
1530 BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
1531 PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
1532 PostLoopRRI.ExitSelector, NewMainLoopPreheader};
1533
1534 // Some of the above may be nullptr, filter them out before passing to
1535 // addToParentLoopIfNeeded.
1536 auto NewBlocksEnd =
1537 std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
1538
1539 addToParentLoopIfNeeded(ArrayRef(std::begin(NewBlocks), NewBlocksEnd));
1540
1541 DT.recalculate(F);
1542
1543 // We need to first add all the pre and post loop blocks into the loop
1544 // structures (as part of createClonedLoopStructure), and then update the
1545 // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
1546 // LI when LoopSimplifyForm is generated.
1547 Loop *PreL = nullptr, *PostL = nullptr;
1548 if (!PreLoop.Blocks.empty()) {
1549 PreL = createClonedLoopStructure(&OriginalLoop,
1550 OriginalLoop.getParentLoop(), PreLoop.Map,
1551 /* IsSubLoop */ false);
1552 }
1553
1554 if (!PostLoop.Blocks.empty()) {
1555 PostL =
1556 createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(),
1557 PostLoop.Map, /* IsSubLoop */ false);
1558 }
1559
1560 // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
1561 auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
1562 formLCSSARecursively(*L, DT, &LI, &SE);
1563 simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true);
1564 // Pre/post loops are slow paths, we do not need to perform any loop
1565 // optimizations on them.
1566 if (!IsOriginalLoop)
1568 };
1569 if (PreL)
1570 CanonicalizeLoop(PreL, false);
1571 if (PostL)
1572 CanonicalizeLoop(PostL, false);
1573 CanonicalizeLoop(&OriginalLoop, true);
1574
1575 return true;
1576}
1577
1578/// Computes and returns a range of values for the induction variable (IndVar)
1579/// in which the range check can be safely elided. If it cannot compute such a
1580/// range, returns std::nullopt.
1581std::optional<InductiveRangeCheck::Range>
1582InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
1583 const SCEVAddRecExpr *IndVar,
1584 bool IsLatchSigned) const {
1585 // We can deal when types of latch check and range checks don't match in case
1586 // if latch check is more narrow.
1587 auto *IVType = dyn_cast<IntegerType>(IndVar->getType());
1588 auto *RCType = dyn_cast<IntegerType>(getBegin()->getType());
1589 // Do not work with pointer types.
1590 if (!IVType || !RCType)
1591 return std::nullopt;
1592 if (IVType->getBitWidth() > RCType->getBitWidth())
1593 return std::nullopt;
1594 // IndVar is of the form "A + B * I" (where "I" is the canonical induction
1595 // variable, that may or may not exist as a real llvm::Value in the loop) and
1596 // this inductive range check is a range check on the "C + D * I" ("C" is
1597 // getBegin() and "D" is getStep()). We rewrite the value being range
1598 // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
1599 //
1600 // The actual inequalities we solve are of the form
1601 //
1602 // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1)
1603 //
1604 // Here L stands for upper limit of the safe iteration space.
1605 // The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid
1606 // overflows when calculating (0 - M) and (L - M) we, depending on type of
1607 // IV's iteration space, limit the calculations by borders of the iteration
1608 // space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0.
1609 // If we figured out that "anything greater than (-M) is safe", we strengthen
1610 // this to "everything greater than 0 is safe", assuming that values between
1611 // -M and 0 just do not exist in unsigned iteration space, and we don't want
1612 // to deal with overflown values.
1613
1614 if (!IndVar->isAffine())
1615 return std::nullopt;
1616
1617 const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned);
1618 const SCEVConstant *B = dyn_cast<SCEVConstant>(
1619 NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned));
1620 if (!B)
1621 return std::nullopt;
1622 assert(!B->isZero() && "Recurrence with zero step?");
1623
1624 const SCEV *C = getBegin();
1625 const SCEVConstant *D = dyn_cast<SCEVConstant>(getStep());
1626 if (D != B)
1627 return std::nullopt;
1628
1629 assert(!D->getValue()->isZero() && "Recurrence with zero step?");
1630 unsigned BitWidth = RCType->getBitWidth();
1631 const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
1632
1633 // Subtract Y from X so that it does not go through border of the IV
1634 // iteration space. Mathematically, it is equivalent to:
1635 //
1636 // ClampedSubtract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX). [1]
1637 //
1638 // In [1], 'X - Y' is a mathematical subtraction (result is not bounded to
1639 // any width of bit grid). But after we take min/max, the result is
1640 // guaranteed to be within [INT_MIN, INT_MAX].
1641 //
1642 // In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min
1643 // values, depending on type of latch condition that defines IV iteration
1644 // space.
1645 auto ClampedSubtract = [&](const SCEV *X, const SCEV *Y) {
1646 // FIXME: The current implementation assumes that X is in [0, SINT_MAX].
1647 // This is required to ensure that SINT_MAX - X does not overflow signed and
1648 // that X - Y does not overflow unsigned if Y is negative. Can we lift this
1649 // restriction and make it work for negative X either?
1650 if (IsLatchSigned) {
1651 // X is a number from signed range, Y is interpreted as signed.
1652 // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only
1653 // thing we should care about is that we didn't cross SINT_MAX.
1654 // So, if Y is positive, we subtract Y safely.
1655 // Rule 1: Y > 0 ---> Y.
1656 // If 0 <= -Y <= (SINT_MAX - X), we subtract Y safely.
1657 // Rule 2: Y >=s (X - SINT_MAX) ---> Y.
1658 // If 0 <= (SINT_MAX - X) < -Y, we can only subtract (X - SINT_MAX).
1659 // Rule 3: Y <s (X - SINT_MAX) ---> (X - SINT_MAX).
1660 // It gives us smax(Y, X - SINT_MAX) to subtract in all cases.
1661 const SCEV *XMinusSIntMax = SE.getMinusSCEV(X, SIntMax);
1662 return SE.getMinusSCEV(X, SE.getSMaxExpr(Y, XMinusSIntMax),
1664 } else
1665 // X is a number from unsigned range, Y is interpreted as signed.
1666 // Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only
1667 // thing we should care about is that we didn't cross zero.
1668 // So, if Y is negative, we subtract Y safely.
1669 // Rule 1: Y <s 0 ---> Y.
1670 // If 0 <= Y <= X, we subtract Y safely.
1671 // Rule 2: Y <=s X ---> Y.
1672 // If 0 <= X < Y, we should stop at 0 and can only subtract X.
1673 // Rule 3: Y >s X ---> X.
1674 // It gives us smin(X, Y) to subtract in all cases.
1675 return SE.getMinusSCEV(X, SE.getSMinExpr(X, Y), SCEV::FlagNUW);
1676 };
1677 const SCEV *M = SE.getMinusSCEV(C, A);
1678 const SCEV *Zero = SE.getZero(M->getType());
1679
1680 // This function returns SCEV equal to 1 if X is non-negative 0 otherwise.
1681 auto SCEVCheckNonNegative = [&](const SCEV *X) {
1682 const Loop *L = IndVar->getLoop();
1683 const SCEV *One = SE.getOne(X->getType());
1684 // Can we trivially prove that X is a non-negative or negative value?
1685 if (isKnownNonNegativeInLoop(X, L, SE))
1686 return One;
1687 else if (isKnownNegativeInLoop(X, L, SE))
1688 return Zero;
1689 // If not, we will have to figure it out during the execution.
1690 // Function smax(smin(X, 0), -1) + 1 equals to 1 if X >= 0 and 0 if X < 0.
1691 const SCEV *NegOne = SE.getNegativeSCEV(One);
1692 return SE.getAddExpr(SE.getSMaxExpr(SE.getSMinExpr(X, Zero), NegOne), One);
1693 };
1694 // FIXME: Current implementation of ClampedSubtract implicitly assumes that
1695 // X is non-negative (in sense of a signed value). We need to re-implement
1696 // this function in a way that it will correctly handle negative X as well.
1697 // We use it twice: for X = 0 everything is fine, but for X = getEnd() we can
1698 // end up with a negative X and produce wrong results. So currently we ensure
1699 // that if getEnd() is negative then both ends of the safe range are zero.
1700 // Note that this may pessimize elimination of unsigned range checks against
1701 // negative values.
1702 const SCEV *REnd = getEnd();
1703 const SCEV *EndIsNonNegative = SCEVCheckNonNegative(REnd);
1704
1705 const SCEV *Begin = SE.getMulExpr(ClampedSubtract(Zero, M), EndIsNonNegative);
1706 const SCEV *End = SE.getMulExpr(ClampedSubtract(REnd, M), EndIsNonNegative);
1707 return InductiveRangeCheck::Range(Begin, End);
1708}
1709
1710static std::optional<InductiveRangeCheck::Range>
1712 const std::optional<InductiveRangeCheck::Range> &R1,
1713 const InductiveRangeCheck::Range &R2) {
1714 if (R2.isEmpty(SE, /* IsSigned */ true))
1715 return std::nullopt;
1716 if (!R1)
1717 return R2;
1718 auto &R1Value = *R1;
1719 // We never return empty ranges from this function, and R1 is supposed to be
1720 // a result of intersection. Thus, R1 is never empty.
1721 assert(!R1Value.isEmpty(SE, /* IsSigned */ true) &&
1722 "We should never have empty R1!");
1723
1724 // TODO: we could widen the smaller range and have this work; but for now we
1725 // bail out to keep things simple.
1726 if (R1Value.getType() != R2.getType())
1727 return std::nullopt;
1728
1729 const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin());
1730 const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd());
1731
1732 // If the resulting range is empty, just return std::nullopt.
1733 auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
1734 if (Ret.isEmpty(SE, /* IsSigned */ true))
1735 return std::nullopt;
1736 return Ret;
1737}
1738
1739static std::optional<InductiveRangeCheck::Range>
1741 const std::optional<InductiveRangeCheck::Range> &R1,
1742 const InductiveRangeCheck::Range &R2) {
1743 if (R2.isEmpty(SE, /* IsSigned */ false))
1744 return std::nullopt;
1745 if (!R1)
1746 return R2;
1747 auto &R1Value = *R1;
1748 // We never return empty ranges from this function, and R1 is supposed to be
1749 // a result of intersection. Thus, R1 is never empty.
1750 assert(!R1Value.isEmpty(SE, /* IsSigned */ false) &&
1751 "We should never have empty R1!");
1752
1753 // TODO: we could widen the smaller range and have this work; but for now we
1754 // bail out to keep things simple.
1755 if (R1Value.getType() != R2.getType())
1756 return std::nullopt;
1757
1758 const SCEV *NewBegin = SE.getUMaxExpr(R1Value.getBegin(), R2.getBegin());
1759 const SCEV *NewEnd = SE.getUMinExpr(R1Value.getEnd(), R2.getEnd());
1760
1761 // If the resulting range is empty, just return std::nullopt.
1762 auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
1763 if (Ret.isEmpty(SE, /* IsSigned */ false))
1764 return std::nullopt;
1765 return Ret;
1766}
1767
1769 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1770 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
1771 // There are no loops in the function. Return before computing other expensive
1772 // analyses.
1773 if (LI.empty())
1774 return PreservedAnalyses::all();
1775 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1776 auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
1777
1778 // Get BFI analysis result on demand. Please note that modification of
1779 // CFG invalidates this analysis and we should handle it.
1780 auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & {
1782 };
1783 InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
1784
1785 bool Changed = false;
1786 {
1787 bool CFGChanged = false;
1788 for (const auto &L : LI) {
1789 CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
1790 /*PreserveLCSSA=*/false);
1791 Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1792 }
1793 Changed |= CFGChanged;
1794
1795 if (CFGChanged && !SkipProfitabilityChecks) {
1798 AM.invalidate(F, PA);
1799 }
1800 }
1801
1803 appendLoopsToWorklist(LI, Worklist);
1804 auto LPMAddNewLoop = [&Worklist](Loop *NL, bool IsSubloop) {
1805 if (!IsSubloop)
1806 appendLoopsToWorklist(*NL, Worklist);
1807 };
1808
1809 while (!Worklist.empty()) {
1810 Loop *L = Worklist.pop_back_val();
1811 if (IRCE.run(L, LPMAddNewLoop)) {
1812 Changed = true;
1816 AM.invalidate(F, PA);
1817 }
1818 }
1819 }
1820
1821 if (!Changed)
1822 return PreservedAnalyses::all();
1824}
1825
1826bool IRCELegacyPass::runOnFunction(Function &F) {
1827 if (skipFunction(F))
1828 return false;
1829
1830 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1832 getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
1833 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1834 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1835 InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI);
1836
1837 bool Changed = false;
1838
1839 for (const auto &L : LI) {
1840 Changed |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
1841 /*PreserveLCSSA=*/false);
1842 Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1843 }
1844
1846 appendLoopsToWorklist(LI, Worklist);
1847 auto LPMAddNewLoop = [&](Loop *NL, bool IsSubloop) {
1848 if (!IsSubloop)
1849 appendLoopsToWorklist(*NL, Worklist);
1850 };
1851
1852 while (!Worklist.empty()) {
1853 Loop *L = Worklist.pop_back_val();
1854 Changed |= IRCE.run(L, LPMAddNewLoop);
1855 }
1856 return Changed;
1857}
1858
1859bool
1860InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
1861 LoopStructure &LS) {
1863 return true;
1864 if (GetBFI) {
1865 BlockFrequencyInfo &BFI = (*GetBFI)();
1866 uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
1867 uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
1868 if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
1869 LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
1870 << "the estimated number of iterations basing on "
1871 "frequency info is " << (hFreq / phFreq) << "\n";);
1872 return false;
1873 }
1874 return true;
1875 }
1876
1877 if (!BPI)
1878 return true;
1879 BranchProbability ExitProbability =
1880 BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx);
1881 if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
1882 LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
1883 << "the exit probability is too big " << ExitProbability
1884 << "\n";);
1885 return false;
1886 }
1887 return true;
1888}
1889
1890bool InductiveRangeCheckElimination::run(
1891 Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop) {
1892 if (L->getBlocks().size() >= LoopSizeCutoff) {
1893 LLVM_DEBUG(dbgs() << "irce: giving up constraining loop, too large\n");
1894 return false;
1895 }
1896
1897 BasicBlock *Preheader = L->getLoopPreheader();
1898 if (!Preheader) {
1899 LLVM_DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");
1900 return false;
1901 }
1902
1903 LLVMContext &Context = Preheader->getContext();
1905
1906 for (auto *BBI : L->getBlocks())
1907 if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
1908 InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
1909 RangeChecks);
1910
1911 if (RangeChecks.empty())
1912 return false;
1913
1914 auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
1915 OS << "irce: looking at loop "; L->print(OS);
1916 OS << "irce: loop has " << RangeChecks.size()
1917 << " inductive range checks: \n";
1918 for (InductiveRangeCheck &IRC : RangeChecks)
1919 IRC.print(OS);
1920 };
1921
1922 LLVM_DEBUG(PrintRecognizedRangeChecks(dbgs()));
1923
1924 if (PrintRangeChecks)
1925 PrintRecognizedRangeChecks(errs());
1926
1927 const char *FailureReason = nullptr;
1928 std::optional<LoopStructure> MaybeLoopStructure =
1929 LoopStructure::parseLoopStructure(SE, *L, FailureReason);
1930 if (!MaybeLoopStructure) {
1931 LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
1932 << FailureReason << "\n";);
1933 return false;
1934 }
1935 LoopStructure LS = *MaybeLoopStructure;
1936 if (!isProfitableToTransform(*L, LS))
1937 return false;
1938 const SCEVAddRecExpr *IndVar =
1939 cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
1940
1941 std::optional<InductiveRangeCheck::Range> SafeIterRange;
1942 Instruction *ExprInsertPt = Preheader->getTerminator();
1943
1944 SmallVector<InductiveRangeCheck, 4> RangeChecksToEliminate;
1945 // Basing on the type of latch predicate, we interpret the IV iteration range
1946 // as signed or unsigned range. We use different min/max functions (signed or
1947 // unsigned) when intersecting this range with safe iteration ranges implied
1948 // by range checks.
1949 auto IntersectRange =
1950 LS.IsSignedPredicate ? IntersectSignedRange : IntersectUnsignedRange;
1951
1952 IRBuilder<> B(ExprInsertPt);
1953 for (InductiveRangeCheck &IRC : RangeChecks) {
1954 auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
1955 LS.IsSignedPredicate);
1956 if (Result) {
1957 auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, *Result);
1958 if (MaybeSafeIterRange) {
1959 assert(!MaybeSafeIterRange->isEmpty(SE, LS.IsSignedPredicate) &&
1960 "We should never return empty ranges!");
1961 RangeChecksToEliminate.push_back(IRC);
1962 SafeIterRange = *MaybeSafeIterRange;
1963 }
1964 }
1965 }
1966
1967 if (!SafeIterRange)
1968 return false;
1969
1970 LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, *SafeIterRange);
1971 bool Changed = LC.run();
1972
1973 if (Changed) {
1974 auto PrintConstrainedLoopInfo = [L]() {
1975 dbgs() << "irce: in function ";
1976 dbgs() << L->getHeader()->getParent()->getName() << ": ";
1977 dbgs() << "constrained ";
1978 L->print(dbgs());
1979 };
1980
1981 LLVM_DEBUG(PrintConstrainedLoopInfo());
1982
1984 PrintConstrainedLoopInfo();
1985
1986 // Optimize away the now-redundant range checks.
1987
1988 for (InductiveRangeCheck &IRC : RangeChecksToEliminate) {
1989 ConstantInt *FoldedRangeCheck = IRC.getPassingDirection()
1990 ? ConstantInt::getTrue(Context)
1992 IRC.getCheckUse()->set(FoldedRangeCheck);
1993 }
1994 }
1995
1996 return Changed;
1997}
1998
2000 return new IRCELegacyPass();
2001}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define check(cond)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static const SCEV * NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE, bool Signed)
If the type of S matches with Ty, return S.
static cl::opt< bool > PrintRangeChecks("irce-print-range-checks", cl::Hidden, cl::init(false))
static cl::opt< bool > AllowUnsignedLatchCondition("irce-allow-unsigned-latch", cl::Hidden, cl::init(true))
static const char * ClonedLoopTag
static cl::opt< unsigned > MinRuntimeIterations("irce-min-runtime-iterations", cl::Hidden, cl::init(10))
static cl::opt< unsigned > LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, cl::init(64))
static std::optional< InductiveRangeCheck::Range > IntersectSignedRange(ScalarEvolution &SE, const std::optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
static cl::opt< bool > AllowNarrowLatchCondition("irce-allow-narrow-latch", cl::Hidden, cl::init(true), cl::desc("If set to true, IRCE may eliminate wide range checks in loops " "with narrow latch condition."))
Inductive range check elimination
static cl::opt< bool > PrintChangedLoops("irce-print-changed-loops", cl::Hidden, cl::init(false))
static std::optional< InductiveRangeCheck::Range > IntersectUnsignedRange(ScalarEvolution &SE, const std::optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
static cl::opt< bool > SkipProfitabilityChecks("irce-skip-profitability-checks", cl::Hidden, cl::init(false))
static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Step, ICmpInst::Predicate Pred, unsigned LatchBrExitIdx, Loop *L, ScalarEvolution &SE)
Given a loop with an deccreasing induction variable, is it possible to safely calculate the bounds of...
static void DisableAllLoopOptsOnLoop(Loop &L)
static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Step, ICmpInst::Predicate Pred, unsigned LatchBrExitIdx, Loop *L, ScalarEvolution &SE)
Given a loop with an increasing induction variable, is it possible to safely calculate the bounds of ...
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file provides a priority worklist.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This defines the Use class.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:186
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:196
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Legacy analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:747
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:741
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:745
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:808
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:209
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:203
bool isNegative() const
Definition: Constants.h:192
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:197
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
const BasicBlock * getParent() const
Definition: Instruction.h:90
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:275
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:331
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:412
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Metadata node.
Definition: Metadata.h:943
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:968
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:497
Root of the metadata hierarchy.
Definition: Metadata.h:61
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void abandon()
Mark an analysis as abandoned.
Definition: PassManager.h:206
bool empty() const
Determine if the PriorityWorklist is empty or not.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
This class represents an analyzed expression in the program.
void print(raw_ostream &OS) const
Print out the internal representation of this scalar to the specified stream.
Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
@ LoopInvariant
The SCEV is loop-invariant.
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt1Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * get() const
Definition: Use.h:66
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Length
Definition: DWP.cpp:406
Pass * createInductiveRangeCheckEliminationPass()
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:410
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Definition: LoopUtils.cpp:1150
void initializeIRCELegacyPassPass(PassRegistry &)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:256
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1537
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1125
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Definition: LoopUtils.cpp:1139
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1132
static bool isProfitableToTransform(const Loop &L, const BranchInst *BI)
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1554
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860