61#define DEBUG_TYPE "loop-reroll"
63STATISTIC(NumRerolledLoops,
"Number of rerolled loops");
68 cl::desc(
"The maximum number of failures to tolerate"
69 " during fuzzy matching. (default: 400)"));
148 enum IterationLimits {
150 IL_MaxRerollIterations = 32,
161 : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
162 PreserveLCSSA(PreserveLCSSA) {}
163 bool runOnLoop(
Loop *L);
182 TinyInstructionVector LoopControlIVs;
187 struct SimpleLoopReduction {
189 assert(isa<PHINode>(
P) &&
"First reduction instruction must be a PHI");
198 assert(Valid &&
"Using invalid reduction");
203 assert(Valid &&
"Using invalid reduction");
208 assert(Valid &&
"Using invalid reduction");
215 size_t size()
const {
216 assert(Valid &&
"Using invalid reduction");
224 assert(Valid &&
"Using invalid reduction");
229 assert(Valid &&
"Using invalid reduction");
245 struct ReductionTracker {
249 void addSLR(SimpleLoopReduction &SLR) { PossibleReds.
push_back(SLR); }
259 void restrictToScale(
uint64_t Scale,
260 SmallInstructionSet &PossibleRedSet,
261 SmallInstructionSet &PossibleRedPHISet,
262 SmallInstructionSet &PossibleRedLastSet) {
263 PossibleRedIdx.clear();
264 PossibleRedIter.clear();
267 for (
unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
268 if (PossibleReds[i].
size() % Scale == 0) {
269 PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
270 PossibleRedPHISet.insert(PossibleReds[i].getPHI());
272 PossibleRedSet.insert(PossibleReds[i].getPHI());
273 PossibleRedIdx[PossibleReds[i].getPHI()] = i;
275 PossibleRedSet.insert(J);
276 PossibleRedIdx[J] = i;
287 if (J1I != PossibleRedIdx.end()) {
289 if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
300 if (PossibleRedIdx.count(J1)) {
301 assert(PossibleRedIdx.count(J2) &&
302 "Recording reduction vs. non-reduction instruction?");
304 PossibleRedIter[J1] = 0;
305 PossibleRedIter[J2] = i;
307 int Idx = PossibleRedIdx[J1];
309 "Recording pair from different reductions?");
317 bool validateSelected();
318 void replaceSelected();
322 SmallReductionVector PossibleReds;
357 SmallInstructionVector Roots;
360 SmallInstructionSet SubsumedInsts;
365 struct DAGRootTracker {
371 TinyInstructionVector LoopCtrlIVs)
372 : Parent(Parent),
L(
L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
373 PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
374 LoopControlIVs(LoopCtrlIVs) {}
391 SmallInstructionSet SubsumedInsts);
392 bool findRootsBase(
Instruction *IVU, SmallInstructionSet SubsumedInsts);
394 std::map<int64_t,Instruction*> &Roots);
395 bool validateRootSet(DAGRootSet &DRS);
397 bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
398 void collectInLoopUserSet(
const SmallInstructionVector &Roots,
399 const SmallInstructionSet &Exclude,
400 const SmallInstructionSet &Final,
403 const SmallInstructionSet &Exclude,
404 const SmallInstructionSet &Final,
408 const SmallInstructionSet &Exclude,
415 void replaceIV(DAGRootSet &DRS,
const SCEV *Start,
const SCEV *IncrExpr);
443 SmallInstructionVector LoopIncs;
453 TinyInstructionVector LoopControlIVs;
458 auto *TI =
I->getParent()->getTerminator();
459 if (!isa<BranchInst>(TI) || !isa<CmpInst>(
I))
461 return I->hasOneUse() && TI->getOperand(0) ==
I;
465 void collectPossibleIVs(
Loop *L, SmallInstructionVector &PossibleIVs);
466 void collectPossibleReductions(
Loop *L,
478 for (
User *U :
I->users()) {
479 if (!L->contains(cast<Instruction>(U)))
492 unsigned IVUses =
IV->getNumUses();
493 if (IVUses != 2 && IVUses != 1)
496 for (
auto *
User :
IV->users()) {
498 bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(
User));
501 if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
508 if (IsCompInst || IncOrCmpUses != 2)
513 if (IVUses == 2 && IncOrCmpUses != 1)
517 if (
auto *BO = dyn_cast<BinaryOperator>(
User)) {
518 if (BO->getOpcode() == Instruction::Add) {
522 if (
PHINode *PN = dyn_cast<PHINode>(UU)) {
528 auto *UUser = cast<Instruction>(UU);
531 if (BO->hasNoSignedWrap() && UUser->hasOneUse() &&
532 isa<SExtInst>(UUser))
533 UUser = cast<Instruction>(*(UUser->user_begin()));
534 if (!isCompareUsedByBranch(UUser))
541 }
else if (!IsCompInst)
549void LoopReroll::collectPossibleIVs(
Loop *L,
550 SmallInstructionVector &PossibleIVs) {
552 if (!
IV.getType()->isIntegerTy() && !
IV.getType()->isPointerTy())
556 dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(&
IV))) {
557 if (PHISCEV->getLoop() != L)
559 if (!PHISCEV->isAffine())
561 const auto *IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
563 IVToIncMap[&
IV] = IncSCEV->getValue()->getSExtValue();
567 if (isLoopControlIV(L, &
IV)) {
568 LoopControlIVs.push_back(&
IV);
570 <<
" = " << *PHISCEV <<
"\n");
572 PossibleIVs.push_back(&
IV);
581void LoopReroll::SimpleLoopReduction::add(
Loop *L) {
582 assert(!Valid &&
"Cannot add to an already-valid chain");
592 C = cast<Instruction>(*
C->user_begin());
593 if (
C->hasOneUse()) {
594 if (!
C->isBinaryOp())
603 }
while (
C->hasOneUse());
611 for (
User *U :
C->users()) {
613 if (
L->contains(cast<Instruction>(U)))
623void LoopReroll::collectPossibleReductions(
Loop *L,
627 IE = Header->getFirstInsertionPt();
I != IE; ++
I) {
628 if (!isa<PHINode>(
I))
630 if (!
I->getType()->isSingleValueType())
633 SimpleLoopReduction SLR(&*
I, L);
638 << SLR.size() <<
" chained instructions)\n");
655void LoopReroll::DAGRootTracker::collectInLoopUserSet(
656 Instruction *Root,
const SmallInstructionSet &Exclude,
657 const SmallInstructionSet &Final,
659 SmallInstructionVector
Queue(1, Root);
660 while (!
Queue.empty()) {
662 if (!
Users.insert(
I).second)
666 for (
Use &U :
I->uses()) {
670 if (PN->getIncomingBlock(U) ==
L->getHeader())
674 if (
L->contains(
User) && !Exclude.count(
User)) {
680 for (
Use &U :
I->operands()) {
682 if (
Op->hasOneUse() &&
L->contains(
Op) && !Exclude.count(
Op) &&
691void LoopReroll::DAGRootTracker::collectInLoopUserSet(
692 const SmallInstructionVector &Roots,
693 const SmallInstructionSet &Exclude,
694 const SmallInstructionSet &Final,
697 collectInLoopUserSet(Root, Exclude, Final,
Users);
701 if (
LoadInst *LI = dyn_cast<LoadInst>(
I))
702 return LI->isUnordered();
704 return SI->isUnordered();
706 return !
MI->isVolatile();
715 switch (
I->getOpcode()) {
716 default:
return false;
717 case Instruction::Add:
718 case Instruction::Sub:
719 case Instruction::Mul:
720 case Instruction::Shl:
721 case Instruction::AShr:
722 case Instruction::LShr:
723 case Instruction::GetElementPtr:
724 case Instruction::Trunc:
725 case Instruction::ZExt:
726 case Instruction::SExt:
736 if ((BO && BO->
getOpcode() != Instruction::Add) ||
737 (!BO && !isa<GetElementPtrInst>(U)))
740 for (
auto *UU : U->users()) {
741 PHINode *PN = dyn_cast<PHINode>(UU);
748bool LoopReroll::DAGRootTracker::
749collectPossibleRoots(
Instruction *
Base, std::map<int64_t,Instruction*> &Roots) {
750 SmallInstructionVector BaseUsers;
752 for (
auto *
I :
Base->users()) {
756 LoopIncs.push_back(cast<Instruction>(
I));
761 if (
auto *BO = dyn_cast<BinaryOperator>(
I)) {
762 if (BO->getOpcode() == Instruction::Add ||
763 BO->getOpcode() == Instruction::Or)
764 CI = dyn_cast<ConstantInt>(BO->getOperand(1));
765 }
else if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
I)) {
766 Value *LastOperand =
GEP->getOperand(
GEP->getNumOperands()-1);
767 CI = dyn_cast<ConstantInt>(LastOperand);
772 BaseUsers.push_back(II);
782 if (Roots.find(V) != Roots.end())
786 Roots[
V] = cast<Instruction>(
I);
790 if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
796 if (BaseUsers.size()) {
797 if (Roots.find(0) != Roots.end()) {
798 LLVM_DEBUG(
dbgs() <<
"LRR: Multiple roots found for base - aborting!\n");
805 unsigned NumBaseUses = BaseUsers.size();
806 if (NumBaseUses == 0)
807 NumBaseUses = Roots.begin()->second->getNumUses();
810 for (
auto &KV : Roots) {
813 if (!KV.second->hasNUses(NumBaseUses)) {
814 LLVM_DEBUG(
dbgs() <<
"LRR: Aborting - Root and Base #users not the same: "
815 <<
"#Base=" << NumBaseUses
816 <<
", #Root=" << KV.second->getNumUses() <<
"\n");
824void LoopReroll::DAGRootTracker::
825findRootsRecursive(
Instruction *
I, SmallInstructionSet SubsumedInsts) {
828 if (
I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
831 if (
I !=
IV && findRootsBase(
I, SubsumedInsts))
834 SubsumedInsts.insert(
I);
836 for (
User *V :
I->users()) {
845 findRootsRecursive(
I, SubsumedInsts);
849bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
850 if (DRS.Roots.empty())
868 const auto *
ADR = dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(DRS.BaseInst));
873 unsigned N = DRS.Roots.size() + 1;
878 if (
ADR->getStepRecurrence(*SE) != SE->
getMulExpr(StepSCEV, ScaleSCEV))
882 for (
unsigned i = 1; i <
N - 1; ++i) {
885 if (NewStepSCEV != StepSCEV)
892bool LoopReroll::DAGRootTracker::
893findRootsBase(
Instruction *IVU, SmallInstructionSet SubsumedInsts) {
895 const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(IVU));
896 if (!IVU_ADR || IVU_ADR->getLoop() != L)
899 std::map<int64_t, Instruction*>
V;
900 if (!collectPossibleRoots(IVU, V))
905 if (
V.find(0) ==
V.end())
906 SubsumedInsts.insert(IVU);
910 DRS.BaseInst =
nullptr;
916 DRS.BaseInst = KV.second;
917 DRS.SubsumedInsts = SubsumedInsts;
918 }
else if (DRS.Roots.empty()) {
920 }
else if (
V.find(KV.first - 1) !=
V.end()) {
921 DRS.Roots.push_back(KV.second);
924 if (!validateRootSet(DRS))
929 DRS.BaseInst = KV.second;
934 if (!validateRootSet(DRS))
939 RootSets.append(PotentialRootSets.
begin(), PotentialRootSets.
end());
944bool LoopReroll::DAGRootTracker::findRoots() {
945 Inc = IVToIncMap[
IV];
947 assert(RootSets.empty() &&
"Unclean state!");
948 if (std::abs(Inc) == 1) {
949 for (
auto *IVU :
IV->users()) {
951 LoopIncs.push_back(cast<Instruction>(IVU));
953 findRootsRecursive(
IV, SmallInstructionSet());
954 LoopIncs.push_back(
IV);
956 if (!findRootsBase(
IV, SmallInstructionSet()))
961 if (RootSets.empty()) {
962 LLVM_DEBUG(
dbgs() <<
"LRR: Aborting because no root sets found!\n");
965 for (
auto &V : RootSets) {
966 if (
V.Roots.empty() ||
V.Roots.size() != RootSets[0].Roots.size()) {
969 <<
"LRR: Aborting because not all root sets have the same size\n");
974 Scale = RootSets[0].Roots.size() + 1;
976 if (Scale > IL_MaxRerollIterations) {
977 LLVM_DEBUG(
dbgs() <<
"LRR: Aborting - too many iterations found. "
978 <<
"#Found=" << Scale
979 <<
", #Max=" << IL_MaxRerollIterations <<
"\n");
983 LLVM_DEBUG(
dbgs() <<
"LRR: Successfully found roots: Scale=" << Scale
989bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
992 for (
auto &
I : *
L->getHeader()) {
993 Uses[&
I].resize(IL_End);
996 SmallInstructionSet Exclude;
997 for (
auto &DRS : RootSets) {
998 Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
999 Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1000 Exclude.insert(DRS.BaseInst);
1002 Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1004 for (
auto &DRS : RootSets) {
1006 collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1007 for (
auto *
I : VBase) {
1012 for (
auto *Root : DRS.Roots) {
1014 collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1017 if (
V.size() != VBase.size()) {
1018 LLVM_DEBUG(
dbgs() <<
"LRR: Aborting - use sets are different sizes\n");
1029 for (
auto *
I : DRS.SubsumedInsts) {
1030 Uses[
I].set(IL_All);
1037 for (
auto &DRS : RootSets) {
1038 Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1039 Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1040 Exclude.insert(DRS.BaseInst);
1044 collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1046 if (
I->mayHaveSideEffects()) {
1048 <<
"An instruction which does not belong to any root "
1049 <<
"sets must not have side effects: " << *
I);
1052 Uses[
I].set(IL_All);
1062LoopReroll::DAGRootTracker::nextInstr(
int Val, UsesTy &In,
1063 const SmallInstructionSet &Exclude,
1064 UsesTy::iterator *StartI) {
1065 UsesTy::iterator
I = StartI ? *StartI :
In.begin();
1066 while (
I !=
In.end() && (
I->second.test(Val) == 0 ||
1067 Exclude.contains(
I->first)))
1072bool LoopReroll::DAGRootTracker::isBaseInst(
Instruction *
I) {
1073 for (
auto &DRS : RootSets) {
1074 if (DRS.BaseInst ==
I)
1080bool LoopReroll::DAGRootTracker::isRootInst(
Instruction *
I) {
1081 for (
auto &DRS : RootSets) {
1090bool LoopReroll::DAGRootTracker::instrDependsOn(
Instruction *
I,
1091 UsesTy::iterator Start,
1092 UsesTy::iterator
End) {
1093 for (
auto *U :
I->users()) {
1094 for (
auto It = Start; It !=
End; ++It)
1102 if (isa<DbgInfoIntrinsic>(
I))
1110 case Intrinsic::annotation:
1111 case Intrinsic::ptr_annotation:
1112 case Intrinsic::var_annotation:
1120bool LoopReroll::DAGRootTracker::validate(ReductionTracker &
Reductions) {
1135 SmallInstructionSet PossibleRedSet;
1136 SmallInstructionSet PossibleRedLastSet;
1137 SmallInstructionSet PossibleRedPHISet;
1138 Reductions.restrictToScale(Scale, PossibleRedSet,
1139 PossibleRedPHISet, PossibleRedLastSet);
1142 if (!collectUsedInstructions(PossibleRedSet))
1146 for (
auto *
I : PossibleRedPHISet) {
1147 Uses[
I].set(IL_All);
1153 for (
Instruction *LoopControlIV : LoopControlIVs) {
1154 for (
auto *U : LoopControlIV->users()) {
1157 Uses[IVUser].set(IL_All);
1158 for (
auto *UU : IVUser->
users()) {
1161 Uses[UUser].set(IL_All);
1163 if (isa<SExtInst>(UUser)) {
1164 UUser = dyn_cast<Instruction>(*(UUser->
user_begin()));
1165 Uses[UUser].set(IL_All);
1168 if (UU->hasOneUse()) {
1170 if (BI == cast<BranchInst>(Header->getTerminator()))
1171 Uses[BI].set(IL_All);
1179 for (
auto &KV :
Uses) {
1182 dbgs() <<
"LRR: Aborting - instruction is not used in 1 iteration: "
1183 << *KV.first <<
" (#uses=" << KV.second.count() <<
")\n");
1190 dbgs() <<
"LRR: " << KV.second.find_first() <<
"\t" << *KV.first <<
"\n";
1194 for (
unsigned Iter = 1; Iter < Scale; ++Iter) {
1199 bool FutureSideEffects =
false;
1205 SmallInstructionSet Visited;
1206 auto BaseIt = nextInstr(0,
Uses, Visited);
1207 auto RootIt = nextInstr(Iter,
Uses, Visited);
1208 auto LastRootIt =
Uses.begin();
1210 while (BaseIt !=
Uses.end() && RootIt !=
Uses.end()) {
1215 bool Continue =
false;
1216 if (isBaseInst(BaseInst)) {
1217 Visited.insert(BaseInst);
1218 BaseIt = nextInstr(0,
Uses, Visited);
1221 if (isRootInst(RootInst)) {
1222 LastRootIt = RootIt;
1223 Visited.insert(RootInst);
1224 RootIt = nextInstr(Iter,
Uses, Visited);
1227 if (Continue)
continue;
1240 auto TryIt = RootIt;
1242 while (TryIt !=
Uses.end() &&
1246 TryIt = nextInstr(Iter,
Uses, Visited, &TryIt);
1249 if (TryIt ==
Uses.end() || TryIt == RootIt ||
1250 instrDependsOn(TryIt->first, RootIt, TryIt)) {
1252 << *BaseInst <<
" vs. " << *RootInst <<
"\n");
1257 RootInst = TryIt->first;
1268 for (; LastRootIt < RootIt; ++LastRootIt) {
1270 if (LastRootIt->second.find_first() < (
int)Iter)
1272 if (
I->mayWriteToMemory())
1281 FutureSideEffects =
true;
1287 if (RootIt->second.count() > 1) {
1288 LLVM_DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *BaseInst
1289 <<
" vs. " << *RootInst <<
" (prev. case overlap)\n");
1297 for (
auto &K : AST) {
1300 << *BaseInst <<
" vs. " << *RootInst
1301 <<
" (depends on future store)\n");
1315 LLVM_DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *BaseInst
1316 <<
" vs. " << *RootInst
1317 <<
" (side effects prevent reordering)\n");
1331 bool InReduction =
Reductions.isPairInSame(BaseInst, RootInst);
1334 bool Swapped =
false, SomeOpMatched =
false;
1342 if (
Instruction *Op2I = dyn_cast<Instruction>(Op2))
1347 if (BMI != BaseMap.
end()) {
1350 for (
auto &DRS : RootSets) {
1358 if (BaseInst->
getOperand(Swapped ?
unsigned(!j) : j) != Op2) {
1364 if (!Swapped && BaseInst->
isCommutative() && !SomeOpMatched &&
1369 <<
"LRR: iteration root match failed at " << *BaseInst
1370 <<
" vs. " << *RootInst <<
" (operand " << j <<
")\n");
1375 SomeOpMatched =
true;
1379 if ((!PossibleRedLastSet.count(BaseInst) &&
1381 (!PossibleRedLastSet.count(RootInst) &&
1383 LLVM_DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *BaseInst
1384 <<
" vs. " << *RootInst <<
" (uses outside loop)\n");
1388 Reductions.recordPair(BaseInst, RootInst, Iter);
1389 BaseMap.
insert(std::make_pair(RootInst, BaseInst));
1391 LastRootIt = RootIt;
1392 Visited.insert(BaseInst);
1393 Visited.insert(RootInst);
1394 BaseIt = nextInstr(0,
Uses, Visited);
1395 RootIt = nextInstr(Iter,
Uses, Visited);
1398 "Mismatched set sizes!");
1407void LoopReroll::DAGRootTracker::replace(
const SCEV *BackedgeTakenCount) {
1414 for (
auto &DRS : RootSets) {
1416 cast<SCEVAddRecExpr>(SE->
getSCEV(DRS.BaseInst));
1423 unsigned I =
Uses[&Inst].find_first();
1424 if (
I > 0 &&
I < IL_All) {
1426 Inst.eraseFromParent();
1431 for (
size_t i = 0, e = RootSets.size(); i != e; ++i)
1433 replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]);
1436 BranchInst *BI = cast<BranchInst>(Header->getTerminator());
1437 const DataLayout &
DL = Header->getModule()->getDataLayout();
1443 Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->
getType(),
1444 Header->getFirstNonPHIOrDbg());
1446 auto TripCount = SE->
getAddExpr(BackedgeTakenCount, One);
1449 auto ScaledBECount = SE->
getMinusSCEV(ScaledTripCount, One);
1451 Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->
getType(),
1452 Header->getFirstNonPHIOrDbg());
1465void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS,
1467 const SCEV *IncrExpr) {
1471 const SCEV *NewIVSCEV =
1475 const DataLayout &
DL = Header->getModule()->getDataLayout();
1477 Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->
getType(),
1478 Header->getFirstNonPHIOrDbg());
1480 for (
auto &KV :
Uses)
1481 if (KV.second.find_first() == 0)
1482 KV.first->replaceUsesOfWith(Inst, NewIV);
1489bool LoopReroll::ReductionTracker::validateSelected() {
1491 for (
int i : Reds) {
1492 int PrevIter = 0, BaseCount = 0, Count = 0;
1497 int Iter = PossibleRedIter[J];
1498 if (Iter != PrevIter && Iter != PrevIter + 1 &&
1500 LLVM_DEBUG(
dbgs() <<
"LRR: Out-of-order non-associative reduction: "
1505 if (Iter != PrevIter) {
1506 if (Count != BaseCount) {
1508 <<
"LRR: Iteration " << PrevIter <<
" reduction use count "
1509 << Count <<
" is not equal to the base use count "
1510 << BaseCount <<
"\n");
1532void LoopReroll::ReductionTracker::replaceSelected() {
1535 for (
int i : Reds) {
1537 for (
int e = PossibleReds[i].
size();
j !=
e; ++
j)
1538 if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1544 SmallInstructionVector
Users;
1545 for (
User *U : PossibleReds[i].getReducedValue()->
users()) {
1546 Users.push_back(cast<Instruction>(U));
1551 PossibleReds[i][j]);
1599 const SCEV *BackedgeTakenCount,
1601 DAGRootTracker DAGRoots(
this, L,
IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1602 IVToIncMap, LoopControlIVs);
1604 if (!DAGRoots.findRoots())
1606 LLVM_DEBUG(
dbgs() <<
"LRR: Found all root induction increments for: " << *
IV
1617 DAGRoots.replace(BackedgeTakenCount);
1623bool LoopReroll::runOnLoop(
Loop *L) {
1625 LLVM_DEBUG(
dbgs() <<
"LRR: F[" << Header->getParent()->getName() <<
"] Loop %"
1626 << Header->getName() <<
" (" <<
L->getNumBlocks()
1630 if (
L->getNumBlocks() > 1)
1637 LLVM_DEBUG(
dbgs() <<
"\n Before Reroll:\n" << *(
L->getHeader()) <<
"\n");
1638 LLVM_DEBUG(
dbgs() <<
"LRR: backedge-taken count = " << *BackedgeTakenCount
1643 SmallInstructionVector PossibleIVs;
1645 LoopControlIVs.clear();
1646 collectPossibleIVs(L, PossibleIVs);
1648 if (PossibleIVs.empty()) {
1655 bool Changed =
false;
1660 if (reroll(PossibleIV, L, Header, BackedgeTakenCount,
Reductions)) {
1664 LLVM_DEBUG(
dbgs() <<
"\n After Reroll:\n" << *(
L->getHeader()) <<
"\n");
1676 return LoopReroll(&AR.
AA, &AR.
LI, &AR.
SE, &AR.
TLI, &AR.
DT,
true).runOnLoop(&L)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the BitVector class.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Rewrite Partial Register Uses
iv Induction Variable Users
static bool isIgnorableInst(const Instruction *I)
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" " during fuzzy matching. (default: 400)"))
static bool isLoopIncrement(User *U, Instruction *IV)
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
static bool isUnorderedLoadStore(Instruction *I)
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This defines the Use class.
static bool isAssociative(const COFFSection &Section)
static const uint32_t IV[8]
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
This class implements a map that also provides access to all stored values in a deterministic order.
typename VectorType::iterator iterator
This is the common base class for memset/memcpy/memmove.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
bool isPointerTy() const
True if this is an instance of PointerType.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
iterator_range< user_iterator > users()
unsigned getNumUses() const
This method computes the number of uses of this Value.
@ C
The default llvm calling convention, compatible with C.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
initializer< Ty > init(const Ty &Val)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...