Go to the documentation of this file.
101 using namespace llvm;
124 cl::desc(
"If set to true, IRCE may eliminate wide range checks in loops "
125 "with narrow latch condition."));
129 #define DEBUG_TYPE "irce"
143 class InductiveRangeCheck {
145 const SCEV *Begin =
nullptr;
146 const SCEV *Step =
nullptr;
147 const SCEV *End =
nullptr;
148 Use *CheckUse =
nullptr;
160 const SCEV *getBegin()
const {
return Begin; }
161 const SCEV *getStep()
const {
return Step; }
162 const SCEV *getEnd()
const {
return End; }
165 OS <<
"InductiveRangeCheck:\n";
172 OS <<
"\n CheckUse: ";
173 getCheckUse()->getUser()->print(OS);
174 OS <<
" Operand: " << getCheckUse()->getOperandNo() <<
"\n";
182 Use *getCheckUse()
const {
return CheckUse; }
192 Range(
const SCEV *Begin,
const SCEV *End) : Begin(Begin), End(End) {
193 assert(Begin->
getType() == End->getType() &&
"ill-typed range!");
197 const SCEV *getBegin()
const {
return Begin; }
198 const SCEV *getEnd()
const {
return End; }
211 bool getPassingDirection() {
return true; }
218 bool IsLatchSigned)
const;
231 struct LoopStructure;
233 class InductiveRangeCheckElimination {
245 bool isProfitableToTransform(
const Loop &L, LoopStructure &
LS);
251 : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
282 "Inductive range check elimination",
false,
false)
295 InductiveRangeCheck::parseRangeCheckICmp(
Loop *L,
ICmpInst *ICI,
297 Value *&Length,
bool &IsSigned) {
298 auto IsLoopInvariant = [&SE, L](
Value *V) {
299 return SE.isLoopInvariant(SE.getSCEV(V), L);
303 Value *LHS = ICI->getOperand(0);
304 Value *RHS = ICI->getOperand(1);
315 if (
match(RHS, m_ConstantInt<0>())) {
326 if (
match(RHS, m_ConstantInt<-1>())) {
331 if (IsLoopInvariant(LHS)) {
343 if (IsLoopInvariant(LHS)) {
354 void InductiveRangeCheck::extractRangeChecksFromCond(
358 Value *Condition = ConditionUse.
get();
359 if (!Visited.
insert(Condition).second)
364 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(0),
366 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(1),
371 ICmpInst *ICI = dyn_cast<ICmpInst>(Condition);
377 if (!parseRangeCheckICmp(L, ICI, SE,
Index, Length, IsSigned))
380 const auto *IndexAddRec = dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(
Index));
382 IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
387 const SCEV *End =
nullptr;
396 unsigned BitWidth = cast<IntegerType>(IndexAddRec->getType())->getBitWidth();
401 InductiveRangeCheck IRC;
403 IRC.Begin = IndexAddRec->getStart();
404 IRC.Step = IndexAddRec->getStepRecurrence(SE);
405 IRC.CheckUse = &ConditionUse;
406 Checks.push_back(IRC);
409 void InductiveRangeCheck::extractRangeChecksFromBranch(
422 InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->
getOperandUse(0),
448 DisableLICMVersioning, DisableDistribution});
461 struct LoopStructure {
462 const char *
Tag =
"";
482 Value *IndVarBase =
nullptr;
483 Value *IndVarStart =
nullptr;
484 Value *IndVarStep =
nullptr;
485 Value *LoopExitAt =
nullptr;
486 bool IndVarIncreasing =
false;
487 bool IsSignedPredicate =
true;
489 LoopStructure() =
default;
491 template <
typename M> LoopStructure map(M Map)
const {
494 Result.Header = cast<BasicBlock>(
Map(Header));
495 Result.Latch = cast<BasicBlock>(
Map(Latch));
496 Result.LatchBr = cast<BranchInst>(
Map(LatchBr));
497 Result.LatchExit = cast<BasicBlock>(
Map(LatchExit));
498 Result.LatchBrExitIdx = LatchBrExitIdx;
503 Result.IndVarIncreasing = IndVarIncreasing;
504 Result.IsSignedPredicate = IsSignedPredicate;
520 class LoopConstrainer {
524 std::vector<BasicBlock *> Blocks;
530 LoopStructure Structure;
535 struct RewrittenRangeInfo {
538 std::vector<PHINode *> PHIValuesAtPseudoExit;
541 RewrittenRangeInfo() =
default;
565 void cloneLoop(ClonedLoop &CLResult,
const char *Tag)
const;
569 Loop *createClonedLoopStructure(
Loop *Original,
Loop *Parent,
594 changeIterationSpaceEnd(
const LoopStructure &
LS,
BasicBlock *Preheader,
601 const char *Tag)
const;
607 void rewriteIncomingValuesForPHIs(
608 LoopStructure &
LS,
BasicBlock *ContinuationBlockAndPreheader,
609 const LoopConstrainer::RewrittenRangeInfo &RRI)
const;
628 const SCEV *LatchTakenCount =
nullptr;
636 InductiveRangeCheck::Range Range;
640 LoopStructure MainLoopStructure;
647 :
F(*L.getHeader()->
getParent()), Ctx(L.getHeader()->getContext()),
648 SE(SE), DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L),
649 Range(
R), MainLoopStructure(
LS) {}
660 const SCEV *BoundSCEV,
const SCEV *Step,
662 unsigned LatchBrExitIdx,
679 LLVM_DEBUG(
dbgs() <<
"irce: LatchExitBrIdx: " << LatchBrExitIdx <<
"\n");
687 if (LatchBrExitIdx == 1)
690 assert(LatchBrExitIdx == 0 &&
691 "LatchBrExitIdx should be either 0 or 1");
694 unsigned BitWidth = cast<IntegerType>(BoundSCEV->
getType())->getBitWidth();
699 const SCEV *MinusOne =
710 const SCEV *BoundSCEV,
const SCEV *Step,
712 unsigned LatchBrExitIdx,
727 LLVM_DEBUG(
dbgs() <<
"irce: LatchExitBrIdx: " << LatchBrExitIdx <<
"\n");
735 if (LatchBrExitIdx == 1)
738 assert(LatchBrExitIdx == 0 &&
"LatchBrExitIdx should be 0 or 1");
740 const SCEV *StepMinusOne =
742 unsigned BitWidth = cast<IntegerType>(BoundSCEV->
getType())->getBitWidth();
754 const char *&FailureReason) {
756 FailureReason =
"loop not in LoopSimplify form";
761 assert(Latch &&
"Simplified loops only have one latch!");
764 FailureReason =
"loop has already been cloned";
769 FailureReason =
"no loop latch";
776 FailureReason =
"no preheader";
782 FailureReason =
"latch terminator not conditional branch";
786 unsigned LatchBrExitIdx = LatchBr->
getSuccessor(0) == Header ? 1 : 0;
790 FailureReason =
"latch terminator branch not conditional on integral icmp";
795 if (isa<SCEVCouldNotCompute>(LatchCount)) {
796 FailureReason =
"could not compute latch count";
809 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
810 if (isa<SCEVAddRecExpr>(RightSCEV)) {
815 FailureReason =
"no add recurrences in the icmp";
824 IntegerType *Ty = cast<IntegerType>(AR->getType());
832 const SCEV *ExtendedStep =
835 bool NoSignedWrap = ExtendAfterOp->
getStart() == ExtendedStart &&
849 const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV);
851 FailureReason =
"LHS in icmp not induction variable";
855 if (!isa<SCEVConstant>(StepRec)) {
856 FailureReason =
"LHS in icmp not induction variable";
861 if (ICI->
isEquality() && !HasNoSignedWrap(IndVarBase)) {
862 FailureReason =
"LHS in icmp needs nsw for equality predicates";
868 bool IsSignedPredicate;
874 const SCEV *FixedRightSCEV =
nullptr;
878 if (
auto *
I = dyn_cast<Instruction>(RightValue))
880 FixedRightSCEV = RightSCEV;
883 bool DecreasedRightValueByOne =
false;
884 if (StepCI->
isOne()) {
909 DecreasedRightValueByOne =
true;
914 DecreasedRightValueByOne =
true;
921 bool FoundExpectedPred =
922 (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0);
924 if (!FoundExpectedPred) {
925 FailureReason =
"expected icmp slt semantically, found something else";
931 FailureReason =
"unsigned latch conditions are explicitly prohibited";
936 LatchBrExitIdx, &L, SE)) {
937 FailureReason =
"Unsafe loop bounds";
940 if (LatchBrExitIdx == 0) {
943 if (!DecreasedRightValueByOne)
947 assert(!DecreasedRightValueByOne &&
948 "Right value can be decreased only for LatchBrExitIdx == 0!");
951 bool IncreasedRightValueByOne =
false;
972 IncreasedRightValueByOne =
true;
976 IncreasedRightValueByOne =
true;
984 bool FoundExpectedPred =
985 (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0);
987 if (!FoundExpectedPred) {
988 FailureReason =
"expected icmp sgt semantically, found something else";
996 FailureReason =
"unsigned latch conditions are explicitly prohibited";
1001 LatchBrExitIdx, &L, SE)) {
1002 FailureReason =
"Unsafe bounds";
1006 if (LatchBrExitIdx == 0) {
1009 if (!IncreasedRightValueByOne)
1013 assert(!IncreasedRightValueByOne &&
1014 "Right value can be increased only for LatchBrExitIdx == 0!");
1021 "loop variant exit count doesn't make sense!");
1030 Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->
getType(),
Ins);
1032 Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy,
Ins);
1033 IndVarStartV->
setName(
"indvar.start");
1040 Result.LatchBr = LatchBr;
1041 Result.LatchExit = LatchExit;
1042 Result.LatchBrExitIdx = LatchBrExitIdx;
1043 Result.IndVarStart = IndVarStartV;
1044 Result.IndVarStep = StepCI;
1045 Result.IndVarBase = LeftValue;
1046 Result.IndVarIncreasing = IsIncreasing;
1047 Result.LoopExitAt = RightValue;
1048 Result.IsSignedPredicate = IsSignedPredicate;
1050 FailureReason =
nullptr;
1063 LoopConstrainer::calculateSubRanges(
bool IsSignedPredicate)
const {
1064 IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
1066 auto *RTy = cast<IntegerType>(Range.getType());
1074 LoopConstrainer::SubRanges
Result;
1080 RTy, SE, IsSignedPredicate);
1082 SE, IsSignedPredicate);
1084 bool Increasing = MainLoopStructure.IndVarIncreasing;
1090 const SCEV *Smallest =
nullptr, *Greatest =
nullptr, *GreatestSeen =
nullptr;
1116 GreatestSeen = Start;
1119 auto Clamp = [
this, Smallest, Greatest, IsSignedPredicate](
const SCEV *
S) {
1120 return IsSignedPredicate
1131 bool ProvablyNoPreloop =
1133 if (!ProvablyNoPreloop)
1134 Result.LowLimit = Clamp(Range.getBegin());
1136 bool ProvablyNoPostLoop =
1138 if (!ProvablyNoPostLoop)
1139 Result.HighLimit = Clamp(Range.getEnd());
1145 const char *Tag)
const {
1148 Result.Blocks.push_back(Clone);
1153 assert(V &&
"null values not in domain!");
1154 auto It =
Result.Map.find(V);
1155 if (It ==
Result.Map.end())
1157 return static_cast<Value *
>(It->second);
1161 cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
1165 Result.Structure = MainLoopStructure.map(GetClonedValue);
1168 for (
unsigned i = 0,
e =
Result.Blocks.size();
i !=
e; ++
i) {
1170 BasicBlock *OriginalBB = OriginalLoop.getBlocks()[
i];
1172 assert(
Result.Map[OriginalBB] == ClonedBB &&
"invariant!");
1183 if (OriginalLoop.contains(SBB))
1187 Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
1188 PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
1194 LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
1268 RewrittenRangeInfo RRI;
1270 BasicBlock *BBInsertLocation =
LS.Latch->getNextNode();
1272 &
F, BBInsertLocation);
1277 bool Increasing =
LS.IndVarIncreasing;
1278 bool IsSignedPredicate =
LS.IsSignedPredicate;
1281 auto *RangeTy = Range.getBegin()->getType();
1282 auto NoopOrExt = [&](
Value *V) {
1283 if (V->getType() == RangeTy)
1285 return IsSignedPredicate ?
B.CreateSExt(V, RangeTy,
"wide." + V->getName())
1286 :
B.CreateZExt(V, RangeTy,
"wide." + V->getName());
1290 Value *EnterLoopCond =
nullptr;
1295 Value *IndVarStart = NoopOrExt(
LS.IndVarStart);
1296 EnterLoopCond =
B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
1298 B.CreateCondBr(EnterLoopCond,
LS.Header, RRI.PseudoExit);
1301 LS.LatchBr->setSuccessor(
LS.LatchBrExitIdx, RRI.ExitSelector);
1302 B.SetInsertPoint(
LS.LatchBr);
1303 Value *IndVarBase = NoopOrExt(
LS.IndVarBase);
1304 Value *TakeBackedgeLoopCond =
B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
1306 Value *CondForBranch =
LS.LatchBrExitIdx == 1
1307 ? TakeBackedgeLoopCond
1308 :
B.CreateNot(TakeBackedgeLoopCond);
1310 LS.LatchBr->setCondition(CondForBranch);
1312 B.SetInsertPoint(RRI.ExitSelector);
1317 Value *LoopExitAt = NoopOrExt(
LS.LoopExitAt);
1318 Value *IterationsLeft =
B.CreateICmp(Pred, IndVarBase, LoopExitAt);
1319 B.CreateCondBr(IterationsLeft, RRI.PseudoExit,
LS.LatchExit);
1329 BranchToContinuation);
1331 NewPHI->
addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
1334 RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
1338 BranchToContinuation);
1339 RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
1340 RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
1344 LS.LatchExit->replacePhiUsesWith(
LS.Latch, RRI.ExitSelector);
1349 void LoopConstrainer::rewriteIncomingValuesForPHIs(
1351 const LoopConstrainer::RewrittenRangeInfo &RRI)
const {
1352 unsigned PHIIndex = 0;
1354 PN.setIncomingValueForBlock(ContinuationBlock,
1355 RRI.PHIValuesAtPseudoExit[PHIIndex++]);
1357 LS.IndVarStart = RRI.IndVarEnd;
1360 BasicBlock *LoopConstrainer::createPreheader(
const LoopStructure &
LS,
1362 const char *Tag)
const {
1366 LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
1380 Loop *LoopConstrainer::createClonedLoopStructure(
Loop *Original,
Loop *Parent,
1383 Loop &
New = *LI.AllocateLoop();
1387 LI.addTopLevelLoop(&New);
1388 LPMAddNewLoop(&New, IsSubloop);
1391 for (
auto *
BB : Original->
blocks())
1392 if (LI.getLoopFor(
BB) == Original)
1393 New.addBasicBlockToLoop(cast<BasicBlock>(VM[
BB]), LI);
1396 for (
Loop *SubLoop : *Original)
1397 createClonedLoopStructure(SubLoop, &New, VM,
true);
1402 bool LoopConstrainer::run() {
1404 LatchTakenCount = SE.
getExitCount(&OriginalLoop, MainLoopStructure.Latch);
1405 Preheader = OriginalLoop.getLoopPreheader();
1406 assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader !=
nullptr &&
1409 OriginalPreheader = Preheader;
1410 MainLoopPreheader = Preheader;
1412 bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
1420 bool Increasing = MainLoopStructure.IndVarIncreasing;
1422 cast<IntegerType>(Range.getBegin()->getType());
1424 SCEVExpander Expander(SE,
F.getParent()->getDataLayout(),
"irce");
1425 Instruction *InsertPt = OriginalPreheader->getTerminator();
1430 ClonedLoop PreLoop, PostLoop;
1432 Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
1433 bool NeedsPostLoop =
1434 Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
1436 Value *ExitPreLoopAt =
nullptr;
1437 Value *ExitMainLoopAt =
nullptr;
1439 cast<SCEVConstant>(SE.
getConstant(IVTy, -1,
true ));
1442 const SCEV *ExitPreLoopAtSCEV =
nullptr;
1445 ExitPreLoopAtSCEV = *SR.LowLimit;
1448 ExitPreLoopAtSCEV = SE.
getAddExpr(*SR.HighLimit, MinusOneS);
1450 LLVM_DEBUG(
dbgs() <<
"irce: could not prove no-overflow when computing "
1451 <<
"preloop exit limit. HighLimit = "
1452 << *(*SR.HighLimit) <<
"\n");
1457 LLVM_DEBUG(
dbgs() <<
"irce: could not prove that it is safe to expand the"
1458 <<
" preloop exit limit " << *ExitPreLoopAtSCEV
1464 ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
1465 ExitPreLoopAt->
setName(
"exit.preloop.at");
1468 if (NeedsPostLoop) {
1469 const SCEV *ExitMainLoopAtSCEV =
nullptr;
1472 ExitMainLoopAtSCEV = *SR.HighLimit;
1475 ExitMainLoopAtSCEV = SE.
getAddExpr(*SR.LowLimit, MinusOneS);
1477 LLVM_DEBUG(
dbgs() <<
"irce: could not prove no-overflow when computing "
1478 <<
"mainloop exit limit. LowLimit = "
1479 << *(*SR.LowLimit) <<
"\n");
1484 LLVM_DEBUG(
dbgs() <<
"irce: could not prove that it is safe to expand the"
1485 <<
" main loop exit limit " << *ExitMainLoopAtSCEV
1491 ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
1492 ExitMainLoopAt->
setName(
"exit.mainloop.at");
1502 RewrittenRangeInfo PreLoopRRI;
1506 PreLoop.Structure.Header);
1509 createPreheader(MainLoopStructure, Preheader,
"mainloop");
1510 PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
1511 ExitPreLoopAt, MainLoopPreheader);
1512 rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
1517 RewrittenRangeInfo PostLoopRRI;
1519 if (NeedsPostLoop) {
1521 createPreheader(PostLoop.Structure, Preheader,
"postloop");
1522 PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
1523 ExitMainLoopAt, PostLoopPreheader);
1524 rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
1529 MainLoopPreheader != Preheader ? MainLoopPreheader :
nullptr;
1530 BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
1531 PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
1532 PostLoopRRI.ExitSelector, NewMainLoopPreheader};
1547 Loop *PreL =
nullptr, *PostL =
nullptr;
1548 if (!PreLoop.Blocks.empty()) {
1549 PreL = createClonedLoopStructure(&OriginalLoop,
1550 OriginalLoop.getParentLoop(), PreLoop.Map,
1554 if (!PostLoop.Blocks.empty()) {
1556 createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(),
1557 PostLoop.Map,
false);
1561 auto CanonicalizeLoop = [&] (
Loop *L,
bool IsOriginalLoop) {
1563 simplifyLoop(L, &DT, &LI, &SE,
nullptr,
nullptr,
true);
1566 if (!IsOriginalLoop)
1570 CanonicalizeLoop(PreL,
false);
1572 CanonicalizeLoop(PostL,
false);
1573 CanonicalizeLoop(&OriginalLoop,
true);
1582 InductiveRangeCheck::computeSafeIterationSpace(
1584 bool IsLatchSigned)
const {
1587 auto *IVType = cast<IntegerType>(IndVar->
getType());
1588 auto *RCType = cast<IntegerType>(getBegin()->
getType());
1589 if (IVType->getBitWidth() > RCType->getBitWidth())
1619 assert(!
B->isZero() &&
"Recurrence with zero step?");
1621 const SCEV *
C = getBegin();
1626 assert(!
D->getValue()->isZero() &&
"Recurrence with zero step?");
1627 unsigned BitWidth = RCType->getBitWidth();
1642 auto ClampedSubtract = [&](
const SCEV *
X,
const SCEV *
Y) {
1647 if (IsLatchSigned) {
1678 auto SCEVCheckNonNegative = [&](
const SCEV *
X) {
1699 const SCEV *REnd = getEnd();
1700 const SCEV *EndIsNonNegative = SCEVCheckNonNegative(REnd);
1702 const SCEV *Begin = SE.
getMulExpr(ClampedSubtract(Zero, M), EndIsNonNegative);
1703 const SCEV *End = SE.
getMulExpr(ClampedSubtract(REnd, M), EndIsNonNegative);
1704 return InductiveRangeCheck::Range(Begin, End);
1710 const InductiveRangeCheck::Range &
R2) {
1711 if (
R2.isEmpty(SE,
true))
1718 assert(!R1Value.isEmpty(SE,
true) &&
1719 "We should never have empty R1!");
1723 if (R1Value.getType() !=
R2.getType())
1730 auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
1731 if (
Ret.isEmpty(SE,
true))
1739 const InductiveRangeCheck::Range &
R2) {
1740 if (
R2.isEmpty(SE,
false))
1747 assert(!R1Value.isEmpty(SE,
false) &&
1748 "We should never have empty R1!");
1752 if (R1Value.getType() !=
R2.getType())
1759 auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
1760 if (
Ret.isEmpty(SE,
false))
1776 InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
1778 bool Changed =
false;
1780 bool CFGChanged =
false;
1781 for (
const auto &L : LI) {
1782 CFGChanged |=
simplifyLoop(L, &DT, &LI, &SE,
nullptr,
nullptr,
1786 Changed |= CFGChanged;
1797 auto LPMAddNewLoop = [&Worklist](
Loop *
NL,
bool IsSubloop) {
1802 while (!Worklist.
empty()) {
1804 if (IRCE.run(L, LPMAddNewLoop)) {
1820 if (skipFunction(
F))
1823 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1825 getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
1826 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1827 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1828 InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI);
1830 bool Changed =
false;
1832 for (
const auto &L : LI) {
1833 Changed |=
simplifyLoop(L, &DT, &LI, &SE,
nullptr,
nullptr,
1840 auto LPMAddNewLoop = [&](
Loop *
NL,
bool IsSubloop) {
1845 while (!Worklist.
empty()) {
1847 Changed |= IRCE.run(L, LPMAddNewLoop);
1853 InductiveRangeCheckElimination::isProfitableToTransform(
const Loop &L,
1854 LoopStructure &
LS) {
1857 if (GetBFI.hasValue()) {
1859 uint64_t hFreq =
BFI.getBlockFreq(
LS.Header).getFrequency();
1863 <<
"the estimated number of iterations basing on "
1864 "frequency info is " << (hFreq / phFreq) <<
"\n";);
1876 <<
"the exit probability is too big " << ExitProbability
1883 bool InductiveRangeCheckElimination::run(
1886 LLVM_DEBUG(
dbgs() <<
"irce: giving up constraining loop, too large\n");
1900 if (
BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
1901 InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
1904 if (RangeChecks.empty())
1907 auto PrintRecognizedRangeChecks = [&](
raw_ostream &OS) {
1908 OS <<
"irce: looking at loop "; L->
print(OS);
1909 OS <<
"irce: loop has " << RangeChecks.size()
1910 <<
" inductive range checks: \n";
1911 for (InductiveRangeCheck &IRC : RangeChecks)
1918 PrintRecognizedRangeChecks(
errs());
1920 const char *FailureReason =
nullptr;
1922 LoopStructure::parseLoopStructure(SE, *L, FailureReason);
1923 if (!MaybeLoopStructure.
hasValue()) {
1925 << FailureReason <<
"\n";);
1928 LoopStructure
LS = MaybeLoopStructure.
getValue();
1929 if (!isProfitableToTransform(*L,
LS))
1942 auto IntersectRange =
1946 for (InductiveRangeCheck &IRC : RangeChecks) {
1947 auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
1948 LS.IsSignedPredicate);
1950 auto MaybeSafeIterRange =
1951 IntersectRange(SE, SafeIterRange,
Result.getValue());
1952 if (MaybeSafeIterRange.hasValue()) {
1954 !MaybeSafeIterRange.getValue().isEmpty(SE,
LS.IsSignedPredicate) &&
1955 "We should never return empty ranges!");
1956 RangeChecksToEliminate.push_back(IRC);
1957 SafeIterRange = MaybeSafeIterRange.
getValue();
1965 LoopConstrainer LC(*L, LI, LPMAddNewLoop,
LS, SE, DT,
1967 bool Changed = LC.run();
1970 auto PrintConstrainedLoopInfo = [L]() {
1971 dbgs() <<
"irce: in function ";
1973 dbgs() <<
"constrained ";
1980 PrintConstrainedLoopInfo();
1984 for (InductiveRangeCheck &IRC : RangeChecksToEliminate) {
1985 ConstantInt *FoldedRangeCheck = IRC.getPassingDirection()
1988 IRC.getCheckUse()->set(FoldedRangeCheck);
1996 return new IRCELegacyPass();
A set of analyses that are preserved following a run of a transformation pass.
Analysis pass that exposes the ScalarEvolution for a function.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
static IntegerType * getInt1Ty(LLVMContext &C)
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
static cl::opt< bool > AllowUnsignedLatchCondition("irce-allow-unsigned-latch", cl::Hidden, cl::init(true))
static Optional< InductiveRangeCheck::Range > IntersectSignedRange(ScalarEvolution &SE, const Optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
A parsed version of the target data layout string in and methods for querying it.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
const SCEV * getStart() const
Represents a single loop in the control flow graph.
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
The main scalar evolution driver.
INITIALIZE_PASS_BEGIN(IRCELegacyPass, "irce", "Inductive range check elimination", false, false) INITIALIZE_PASS_END(IRCELegacyPass
void abandon()
Mark an analysis as abandoned.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
@ ICMP_SGT
signed greater than
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
@ LoopInvariant
The SCEV is loop-invariant.
The instances of the Type class are immutable: once they are created, they are never changed.
const_iterator end(StringRef path)
Get end iterator over path.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
The legacy pass manager's analysis pass to compute loop information.
void initializeIRCELegacyPassPass(PassRegistry &)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
static void DisableAllLoopOptsOnLoop(Loop &L)
@ ICMP_SLE
signed less or equal
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static StringRef getPredicateName(Predicate P)
succ_range successors(Instruction *I)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVM Basic Block Representation.
static const char * ClonedLoopTag
constexpr bool hasValue() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Inductive range check elimination
const Use & getOperandUse(unsigned i) const
This is the shared class of boolean and integer constants.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Analysis pass which computes BranchProbabilityInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< bool > PrintRangeChecks("irce-print-range-checks", cl::Hidden, cl::init(false))
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool match(Val *V, const Pattern &P)
Legacy analysis pass which computes BranchProbabilityInfo.
(vector float) vec_cmpeq(*A, *B) C
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
@ ICMP_ULE
unsigned less or equal
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
void print(raw_ostream &OS) const
Print out the internal representation of this scalar to the specified stream.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Analysis providing branch probability information.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Represent the analysis usage information of a pass.
LLVM_NODISCARD T pop_back_val()
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
iterator_range< block_iterator > blocks() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Class to represent integer types.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Legacy analysis pass which computes a DominatorTree.
This class implements an extremely fast bulk output stream that can only output to a stream.
void setName(const Twine &Name)
Change the name of the value.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Analysis pass which computes BlockFrequencyInfo.
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Value * getCondition() const
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
An efficient, type-erasing, non-owning reference to a callable.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
This class represents an analyzed expression in the program.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static cl::opt< bool > SkipProfitabilityChecks("irce-skip-profitability-checks", cl::Hidden, cl::init(false))
This instruction compares its operands according to the predicate given to the constructor.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This is an important class for using LLVM in a threaded context.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
initializer< Ty > init(const Ty &Val)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
This class represents a constant integer value.
bool empty() const
Determine if the PriorityWorklist is empty or not.
static MDString * get(LLVMContext &Context, StringRef Str)
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const SCEV * NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE, bool Signed)
If the type of S matches with Ty, return S.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
@ ICMP_UGE
unsigned greater or equal
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isUnconditional() const
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
Class for arbitrary precision integers.
@ ICMP_SLT
signed less than
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Pass * createInductiveRangeCheckEliminationPass()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getConstant(ConstantInt *V)
@ ICMP_ULT
unsigned less than
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
static const Function * getParent(const Value *V)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
StringRef getName() const
Return a constant reference to the value's name.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
LLVMContext & getContext() const
Get the context in which this basic block lives.
static bool runOnFunction(Function &F, bool PostInlining)
static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Step, ICmpInst::Predicate Pred, unsigned LatchBrExitIdx, Loop *L, ScalarEvolution &SE)
Given a loop with an increasing induction variable, is it possible to safely calculate the bounds of ...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
const Loop * getLoop() const
static ConstantInt * getTrue(LLVMContext &Context)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This node represents a polynomial recurrence on the trip count of the specified loop.
constexpr unsigned BitWidth
BlockT * getHeader() const
static cl::opt< bool > PrintChangedLoops("irce-print-changed-loops", cl::Hidden, cl::init(false))
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
@ ICMP_SGE
signed greater or equal
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Analysis pass which computes a DominatorTree.
Pass interface - Implemented by all 'passes'.
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are d...
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
@ ICMP_UGT
unsigned greater than
static cl::opt< unsigned > MinRuntimeIterations("irce-min-runtime-iterations", cl::Hidden, cl::init(10))
const BasicBlock * getParent() const
static cl::opt< bool > AllowNarrowLatchCondition("irce-allow-narrow-latch", cl::Hidden, cl::init(true), cl::desc("If set to true, IRCE may eliminate wide range checks in loops " "with narrow latch condition."))
Predicate getPredicate() const
Return the predicate for this instruction.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Align max(MaybeAlign Lhs, Align Rhs)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Type * getType() const
Return the LLVM type of this SCEV expression.
A container for analyses that lazily runs them and caches their results.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FunctionPass class - This class is used to implement most global optimizations.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
void print(raw_ostream &OS, bool Verbose=false, bool PrintNested=true, unsigned Depth=0) const
Print loop with all the BBs inside it.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
Value * getOperand(unsigned i) const
Conditional or Unconditional Branch instruction.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
static cl::opt< unsigned > LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, cl::init(64))
LLVM Value Representation.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Step, ICmpInst::Predicate Pred, unsigned LatchBrExitIdx, Loop *L, ScalarEvolution &SE)
Given a loop with an deccreasing induction variable, is it possible to safely calculate the bounds of...
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
BasicBlock * getSuccessor(unsigned i) const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Analysis pass that exposes the LoopInfo for a function.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static Optional< InductiveRangeCheck::Range > IntersectUnsignedRange(ScalarEvolution &SE, const Optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)