56#define DEBUG_TYPE "loop-interchange"
58STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
62 cl::desc(
"Interchange if you gain more than this number"));
69using CharMatrix = std::vector<std::vector<char>>;
79#ifdef DUMP_DEP_MATRICIES
80static void printDepMatrix(CharMatrix &DepMatrix) {
81 for (
auto &Row : DepMatrix) {
100 if (!isa<Instruction>(
I))
102 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
106 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
109 MemInstr.push_back(&
I);
115 <<
" Loads and Stores to analyze\n");
117 ValueVector::iterator
I, IE, J, JE;
119 for (
I = MemInstr.begin(), IE = MemInstr.end();
I != IE; ++
I) {
120 for (J =
I, JE = MemInstr.end(); J != JE; ++J) {
121 std::vector<char> Dep;
125 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
128 if (
auto D = DI->
depends(Src, Dst,
true)) {
129 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
132 if (
D->normalize(SE))
135 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
136 dbgs() <<
"Found " << DepType
137 <<
" dependency between Src and Dst\n"
138 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
139 unsigned Levels =
D->getLevels();
141 for (
unsigned II = 1; II <= Levels; ++II) {
142 if (
D->isScalar(II)) {
146 unsigned Dir =
D->getDirection(II);
160 while (Dep.size() != Level) {
164 DepMatrix.push_back(Dep);
167 <<
" dependencies inside loop\n");
181 for (
unsigned I = 0,
E = DepMatrix.size();
I <
E; ++
I)
182 std::swap(DepMatrix[
I][ToIndx], DepMatrix[
I][FromIndx]);
190 for (
unsigned Level = 0; Level < DV.size(); ++Level) {
202 unsigned InnerLoopId,
203 unsigned OuterLoopId) {
204 unsigned NumRows = DepMatrix.size();
205 std::vector<char> Cur;
207 for (
unsigned Row = 0; Row < NumRows; ++Row) {
210 Cur = DepMatrix[Row];
213 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
222 << L.getHeader()->getParent()->getName() <<
" Loop: %"
223 << L.getHeader()->getName() <<
'\n');
224 assert(LoopList.empty() &&
"LoopList should initially be empty!");
225 Loop *CurrentLoop = &L;
226 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
227 while (!Vec->empty()) {
231 if (Vec->size() != 1) {
236 LoopList.push_back(CurrentLoop);
237 CurrentLoop = Vec->front();
240 LoopList.push_back(CurrentLoop);
246class LoopInterchangeLegality {
250 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
253 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
254 CharMatrix &DepMatrix);
262 bool isLoopStructureUnderstood();
264 bool currentLimitations();
267 return OuterInnerReductions;
271 return InnerLoopInductions;
275 bool tightlyNested(
Loop *Outer,
Loop *Inner);
276 bool containsUnsafeInstructions(
BasicBlock *BB);
282 bool findInductionAndReductions(
Loop *L,
304class LoopInterchangeProfitability {
308 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
311 bool isProfitable(
const Loop *InnerLoop,
const Loop *OuterLoop,
312 unsigned InnerLoopId,
unsigned OuterLoopId,
313 CharMatrix &DepMatrix,
315 std::unique_ptr<CacheCost> &
CC);
318 int getInstrOrderCost();
319 std::optional<bool> isProfitablePerLoopCacheAnalysis(
321 std::unique_ptr<CacheCost> &
CC);
322 std::optional<bool> isProfitablePerInstrOrderCost();
323 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
324 unsigned OuterLoopId,
325 CharMatrix &DepMatrix);
337class LoopInterchangeTransform {
341 const LoopInterchangeLegality &LIL)
342 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
346 void restructureLoops(
Loop *NewInner,
Loop *NewOuter,
349 void removeChildLoop(
Loop *OuterLoop,
Loop *InnerLoop);
352 bool adjustLoopLinks();
353 bool adjustLoopBranches();
364 const LoopInterchangeLegality &LIL;
367struct LoopInterchange {
372 std::unique_ptr<CacheCost>
CC =
nullptr;
380 : SE(SE), LI(LI), DI(DI), DT(DT),
CC(
std::
move(
CC)), ORE(ORE) {}
383 if (
L->getParentLoop())
387 return processLoopList(LoopList);
392 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
393 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
395 return processLoopList(LoopList);
399 for (
Loop *L : LoopList) {
401 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
405 if (
L->getNumBackEdges() != 1) {
409 if (!
L->getExitingBlock()) {
420 return LoopList.
size() - 1;
424 bool Changed =
false;
425 unsigned LoopNestDepth = LoopList.
size();
426 if (LoopNestDepth < 2) {
427 LLVM_DEBUG(
dbgs() <<
"Loop doesn't contain minimum nesting level.\n");
435 if (!isComputableLoopNest(LoopList)) {
436 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
440 LLVM_DEBUG(
dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
443 CharMatrix DependencyMatrix;
444 Loop *OuterMostLoop = *(LoopList.
begin());
446 OuterMostLoop, DI, SE)) {
450#ifdef DUMP_DEP_MATRICIES
452 printDepMatrix(DependencyMatrix);
462 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
473 const auto &LoopCosts =
CC->getLoopCosts();
474 for (
unsigned i = 0; i < LoopCosts.size(); i++) {
475 CostMap[LoopCosts[i].first] = i;
482 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
483 bool ChangedPerIter =
false;
484 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
485 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
486 DependencyMatrix, CostMap);
493#ifdef DUMP_DEP_MATRICIES
495 printDepMatrix(DependencyMatrix);
497 ChangedPerIter |= Interchanged;
498 Changed |= Interchanged;
508 bool processLoop(
Loop *InnerLoop,
Loop *OuterLoop,
unsigned InnerLoopId,
509 unsigned OuterLoopId,
510 std::vector<std::vector<char>> &DependencyMatrix,
513 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
514 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
515 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
516 LLVM_DEBUG(
dbgs() <<
"Not interchanging loops. Cannot prove legality.\n");
520 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
521 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
522 DependencyMatrix, CostMap, CC)) {
531 <<
"Loop interchanged with enclosing loop.";
534 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
546bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
548 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
552bool LoopInterchangeLegality::tightlyNested(
Loop *OuterLoop,
Loop *InnerLoop) {
564 if (!OuterLoopHeaderBI)
568 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
569 Succ != OuterLoopLatch)
572 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
575 if (containsUnsafeInstructions(OuterLoopHeader) ||
576 containsUnsafeInstructions(OuterLoopLatch))
582 if (InnerLoopPreHeader != OuterLoopHeader &&
583 containsUnsafeInstructions(InnerLoopPreHeader))
591 if (&SuccInner != OuterLoopLatch) {
593 <<
" does not lead to the outer loop latch.\n";);
599 if (containsUnsafeInstructions(InnerLoopExit))
607bool LoopInterchangeLegality::isLoopStructureUnderstood() {
609 for (
PHINode *InnerInduction : InnerLoopInductions) {
610 unsigned Num = InnerInduction->getNumOperands();
611 for (
unsigned i = 0; i < Num; ++i) {
612 Value *Val = InnerInduction->getOperand(i);
613 if (isa<Constant>(Val))
622 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
623 InnerLoopPreheader &&
643 Value *Op0 = InnerLoopCmp->getOperand(0);
644 Value *Op1 = InnerLoopCmp->getOperand(1);
655 std::function<
bool(
Value *)> IsPathToInnerIndVar;
656 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
659 if (isa<Constant>(V))
664 if (isa<CastInst>(
I))
665 return IsPathToInnerIndVar(
I->getOperand(0));
666 if (isa<BinaryOperator>(
I))
667 return IsPathToInnerIndVar(
I->getOperand(0)) &&
668 IsPathToInnerIndVar(
I->getOperand(1));
674 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
679 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
682 }
else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
705 if (
PHI->getNumIncomingValues() != 1)
713 if (isa<Constant>(V))
718 if (
PHI->getNumIncomingValues() == 1)
734bool LoopInterchangeLegality::findInductionAndReductions(
736 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
746 if (!OuterInnerReductions.count(&
PHI)) {
748 "across the outer loop.\n");
752 assert(
PHI.getNumIncomingValues() == 2 &&
753 "Phis in loop header should have exactly 2 incoming values");
762 <<
"Failed to recognize PHI as an induction or reduction.\n");
765 OuterInnerReductions.insert(&
PHI);
766 OuterInnerReductions.insert(InnerRedPhi);
775bool LoopInterchangeLegality::currentLimitations() {
785 dbgs() <<
"Loops where the latch is not the exiting block are not"
786 <<
" supported currently.\n");
791 <<
"Loops where the latch is not the exiting block cannot be"
792 " interchange currently.";
798 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
800 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
801 <<
"are supported currently.\n");
806 <<
"Only outer loops with induction or reduction PHI nodes can be"
807 " interchanged currently.";
816 Loop *CurLevelLoop = OuterLoop;
819 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
820 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
822 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
823 <<
"are supported currently.\n");
828 <<
"Only inner loops with induction or reduction PHI nodes can be"
829 " interchange currently.";
836 if (!isLoopStructureUnderstood()) {
842 <<
"Inner loop structure not understood currently.";
850bool LoopInterchangeLegality::findInductions(
857 return !Inductions.
empty();
869 if (
PHI.getNumIncomingValues() > 1)
872 PHINode *PN = dyn_cast<PHINode>(U);
874 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
892 for (
unsigned i = 0; i <
PHI.getNumIncomingValues(); i++) {
893 Instruction *IncomingI = dyn_cast<Instruction>(
PHI.getIncomingValue(i));
939 for (
auto *U :
PHI.users()) {
948bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
949 unsigned OuterLoopId,
950 CharMatrix &DepMatrix) {
952 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
953 <<
" and OuterLoopId = " << OuterLoopId
954 <<
" due to dependence\n");
959 <<
"Cannot interchange loops due to dependences.";
964 for (
auto *BB : OuterLoop->
blocks())
966 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
968 if (CI->onlyWritesMemory())
971 dbgs() <<
"Loops with call instructions cannot be interchanged "
977 <<
"Cannot interchange loops due to call instruction.";
983 if (!findInductions(InnerLoop, InnerLoopInductions)) {
984 LLVM_DEBUG(
dbgs() <<
"Cound not find inner loop induction variables.\n");
989 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
994 <<
"Cannot interchange loops because unsupported PHI nodes found "
995 "in inner loop latch.";
1002 if (currentLimitations()) {
1003 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1008 if (!tightlyNested(OuterLoop, InnerLoop)) {
1014 <<
"Cannot interchange loops because they are not tightly "
1021 OuterInnerReductions)) {
1022 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1027 <<
"Found unsupported PHI node in loop exit.";
1033 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1038 <<
"Found unsupported PHI node in loop exit.";
1046int LoopInterchangeProfitability::getInstrOrderCost() {
1047 unsigned GoodOrder, BadOrder;
1048 BadOrder = GoodOrder = 0;
1052 unsigned NumOp =
GEP->getNumOperands();
1053 bool FoundInnerInduction =
false;
1054 bool FoundOuterInduction =
false;
1055 for (
unsigned i = 0; i < NumOp; ++i) {
1070 if (AR->
getLoop() == InnerLoop) {
1073 FoundInnerInduction =
true;
1074 if (FoundOuterInduction) {
1084 if (AR->
getLoop() == OuterLoop) {
1087 FoundOuterInduction =
true;
1088 if (FoundInnerInduction) {
1097 return GoodOrder - BadOrder;
1101LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1103 std::unique_ptr<CacheCost> &
CC) {
1108 unsigned InnerIndex = 0, OuterIndex = 0;
1109 InnerIndex = CostMap.
find(InnerLoop)->second;
1110 OuterIndex = CostMap.
find(OuterLoop)->second;
1112 <<
", OuterIndex = " << OuterIndex <<
"\n");
1113 if (InnerIndex < OuterIndex)
1114 return std::optional<bool>(
true);
1115 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1116 "numbers to each loop");
1117 if (
CC->getLoopCost(*OuterLoop) ==
CC->getLoopCost(*InnerLoop))
1118 return std::nullopt;
1119 return std::optional<bool>(
false);
1121 return std::nullopt;
1125LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1129 int Cost = getInstrOrderCost();
1132 return std::optional<bool>(
true);
1134 return std::nullopt;
1137std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1138 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1139 for (
auto &Row : DepMatrix) {
1143 if (Row[InnerLoopId] ==
'I' || Row[InnerLoopId] ==
'=')
1144 return std::optional<bool>(
false);
1149 if (Row[OuterLoopId] !=
'I' && Row[OuterLoopId] !=
'=')
1150 return std::optional<bool>(
false);
1155 return std::optional<bool>(!DepMatrix.empty());
1158bool LoopInterchangeProfitability::isProfitable(
1159 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1160 unsigned OuterLoopId, CharMatrix &DepMatrix,
1162 std::unique_ptr<CacheCost> &
CC) {
1171 std::optional<bool> shouldInterchange =
1172 isProfitablePerLoopCacheAnalysis(CostMap,
CC);
1173 if (!shouldInterchange.has_value()) {
1174 shouldInterchange = isProfitablePerInstrOrderCost();
1175 if (!shouldInterchange.has_value())
1177 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1179 if (!shouldInterchange.has_value()) {
1184 <<
"Insufficient information to calculate the cost of loop for "
1188 }
else if (!shouldInterchange.value()) {
1193 <<
"Interchanging loops is not considered to improve cache "
1194 "locality nor vectorization.";
1201void LoopInterchangeTransform::removeChildLoop(
Loop *OuterLoop,
1203 for (
Loop *L : *OuterLoop)
1204 if (L == InnerLoop) {
1205 OuterLoop->removeChildLoop(L);
1234void LoopInterchangeTransform::restructureLoops(
1244 if (OuterLoopParent) {
1246 removeChildLoop(OuterLoopParent, NewInner);
1247 removeChildLoop(NewInner, NewOuter);
1250 removeChildLoop(NewInner, NewOuter);
1275 if (BB == OuterHeader || BB == OuterLatch)
1290bool LoopInterchangeTransform::transform() {
1291 bool Transformed =
false;
1296 auto &InductionPHIs = LIL.getInnerLoopInductions();
1297 if (InductionPHIs.empty()) {
1298 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1303 for (
PHINode *CurInductionPHI : InductionPHIs) {
1304 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1306 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1309 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1322 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1323 for (; i < WorkList.
size(); i++) {
1329 "Moving instructions with side-effects may change behavior of "
1340 for (
Value *Op : WorkList[i]->operands()) {
1358 for (
Instruction *InnerIndexVar : InnerIndexVarList)
1359 WorkList.
insert(cast<Instruction>(InnerIndexVar));
1376 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1377 if (InnerLoopPreHeader != OuterLoopHeader) {
1381 std::prev(InnerLoopPreHeader->
end()))))
1385 Transformed |= adjustLoopLinks();
1410 I->removeFromParent();
1425 std::vector<DominatorTree::UpdateType> &DTUpdates,
1426 bool MustUpdateOnce =
true) {
1427 assert((!MustUpdateOnce ||
1431 }) == 1) &&
"BI must jump to OldBB exactly once.");
1432 bool Changed =
false;
1440 DTUpdates.push_back(
1441 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
1442 DTUpdates.push_back(
1443 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
1445 assert(Changed &&
"Expected a successor to be updated");
1462 assert(
P.getNumIncomingValues() == 1 &&
1463 "Only loops with a single exit are supported!");
1466 auto IncI = cast<Instruction>(
P.getIncomingValueForBlock(InnerLatch));
1469 auto IncIInnerMost = cast<Instruction>(
followLCSSA(IncI));
1472 if (IncIInnerMost->getParent() != InnerLatch &&
1473 IncIInnerMost->
getParent() != InnerHeader)
1477 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
1478 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1479 IncI->getParent() == InnerHeader) ||
1480 cast<PHINode>(U)->getParent() == OuterExit;
1482 "Can only replace phis iff the uses are in the loop nest exit or "
1483 "the incoming value is defined in the inner header (it will "
1484 "dominate all loop blocks after interchanging)");
1485 P.replaceAllUsesWith(IncI);
1486 P.eraseFromParent();
1516 if (
P.getNumIncomingValues() != 1)
1520 auto I = dyn_cast<Instruction>(
P.getIncomingValue(0));
1524 PHINode *NewPhi = dyn_cast<PHINode>(
P.clone());
1530 if (Pred == OuterLatch)
1535 P.setIncomingValue(0, NewPhi);
1545bool LoopInterchangeTransform::adjustLoopBranches() {
1547 std::vector<DominatorTree::UpdateType> DTUpdates;
1549 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1552 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1553 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
1554 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
1559 if (isa<PHINode>(OuterLoopPreHeader->
begin()) ||
1561 OuterLoopPreHeader =
1563 if (InnerLoopPreHeader == OuterLoop->getHeader())
1564 InnerLoopPreHeader =
1569 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1571 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1587 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1588 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1593 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->
getTerminator());
1595 dyn_cast<BranchInst>(OuterLoopPredecessor->
getTerminator());
1597 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1600 if (!InnerLoopHeaderSuccessor)
1608 InnerLoopPreHeader, DTUpdates,
false);
1618 InnerLoopHeaderSuccessor, DTUpdates,
1626 OuterLoopPreHeader, DTUpdates);
1629 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
1630 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
1632 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
1635 InnerLoopLatchSuccessor, DTUpdates);
1637 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
1638 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
1640 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
1643 OuterLoopLatchSuccessor, DTUpdates);
1644 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1648 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1649 OuterLoopPreHeader);
1651 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1652 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
1658 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1662 if (OuterInnerReductions.contains(&
PHI))
1666 if (OuterInnerReductions.contains(&
PHI))
1675 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1680 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1703bool LoopInterchangeTransform::adjustLoopLinks() {
1705 bool Changed = adjustLoopBranches();
1710 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1724 std::unique_ptr<CacheCost>
CC =
1727 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT,
CC, &ORE).run(LN))
1729 U.markLoopNestChanged(
true);
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the interface for the loop cache analysis.
Loop::LoopBounds::Direction Direction
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static const unsigned MaxLoopNestDepth
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static const unsigned MaxMemInstrCount
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static bool isLexicographicallyPositive(std::vector< char > &DV)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const BasicBlock * getParent() const
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const Loop * getLoop() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.