55#define DEBUG_TYPE "loop-interchange"
57STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
61 cl::desc(
"Interchange if you gain more than this number"));
65 cl::desc(
"Maximum number of load/store instructions squared in relation to "
66 "the total number of instructions. Higher value may lead to more "
67 "interchanges at the cost of compile-time"));
81using CharMatrix = std::vector<std::vector<char>>;
96 cl::desc(
"Minimum depth of loop nest considered for the transform"));
101 cl::desc(
"Maximum depth of loop nest considered for the transform"));
107 cl::desc(
"List of profitability heuristics to be used. They are applied in "
110 RuleTy::ForVectorization}),
112 "Prioritize loop cache cost"),
113 clEnumValN(RuleTy::PerInstrOrderCost,
"instorder",
114 "Prioritize the IVs order of each instruction"),
115 clEnumValN(RuleTy::ForVectorization,
"vectorize",
116 "Prioritize vectorization"),
118 "Ignore profitability, force interchange (does not "
119 "work with other options)")));
124 cl::desc(
"Support for the inner-loop reduction pattern."));
129 for (RuleTy Rule : Rules) {
130 if (!Set.insert(Rule).second)
132 if (Rule == RuleTy::Ignore)
139 for (
auto &Row : DepMatrix) {
152 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
153 "Expected Src and Dst to be different instructions in the same BB");
155 bool FoundSrc =
false;
176 unsigned NumInsts = 0;
202 unsigned NumMemInstr = MemInstr.
size();
204 <<
" Loads and Stores to analyze\n");
208 L->getStartLoc(), L->getHeader())
209 <<
"Number of loads/stores exceeded, the supported maximum can be "
210 "increased with option -loop-interchange-max-mem-instr-ratio.";
220 for (
I = MemInstr.
begin(), IE = MemInstr.
end();
I != IE; ++
I) {
221 for (J =
I, JE = MemInstr.
end(); J != JE; ++J) {
222 std::vector<char> Dep;
229 if (
auto D = DI->
depends(Src, Dst)) {
230 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
233 if (
D->normalize(SE))
236 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
237 dbgs() <<
"Found " << DepType
238 <<
" dependency between Src and Dst\n"
239 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
240 unsigned Levels =
D->getLevels();
242 for (
unsigned II = 1;
II <= Levels; ++
II) {
249 unsigned Dir =
D->getDirection(
II);
263 if (
D->isConfused()) {
264 assert(Dep.empty() &&
"Expected empty dependency vector");
265 Dep.assign(Level,
'*');
268 while (Dep.size() != Level) {
277 L->getStartLoc(), L->getHeader())
278 <<
"All loops have dependencies in all directions.";
284 bool IsKnownForward =
true;
285 if (Src->getParent() != Dst->getParent()) {
289 IsKnownForward =
false;
295 "Unexpected instructions");
300 bool IsReversed =
D->getSrc() != Src;
302 IsKnownForward =
false;
318 DepMatrix.push_back(Dep);
325 DepMatrix[Ite->second].back() =
'*';
337 for (
auto &Row : DepMatrix)
346static std::optional<bool>
359 unsigned InnerLoopId,
360 unsigned OuterLoopId) {
361 unsigned NumRows = DepMatrix.size();
362 std::vector<char> Cur;
364 for (
unsigned Row = 0; Row < NumRows; ++Row) {
367 Cur = DepMatrix[Row];
380 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
389 << L.getHeader()->getParent()->getName() <<
" Loop: %"
390 << L.getHeader()->getName() <<
'\n');
391 assert(LoopList.
empty() &&
"LoopList should initially be empty!");
392 Loop *CurrentLoop = &L;
393 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
394 while (!Vec->empty()) {
398 if (Vec->size() != 1) {
404 CurrentLoop = Vec->front();
412 unsigned LoopNestDepth = LoopList.
size();
414 LLVM_DEBUG(
dbgs() <<
"Unsupported depth of loop nest " << LoopNestDepth
422 <<
"Unsupported depth of loop nest, the supported range is ["
433 for (
Loop *L : LoopList) {
439 if (L->getNumBackEdges() != 1) {
443 if (!L->getExitingBlock()) {
454class LoopInterchangeLegality {
456 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
457 OptimizationRemarkEmitter *ORE, DominatorTree *DT)
458 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), DT(DT), ORE(ORE) {}
461 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
462 CharMatrix &DepMatrix);
466 bool isLoopStructureUnderstood();
468 bool currentLimitations();
470 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions()
const {
471 return OuterInnerReductions;
475 return InnerLoopInductions;
479 return HasNoWrapReductions;
488 struct InnerReduction {
496 StoreInst *LcssaStore;
503 return InnerReductions;
507 bool tightlyNested(Loop *Outer, Loop *Inner);
508 bool containsUnsafeInstructions(BasicBlock *BB, Instruction *Skip);
520 bool checkInductionsAndReductions(Loop *OuterLoop);
532 bool isInnerReduction(Loop *L, PHINode *Phi,
533 SmallVectorImpl<Instruction *> &HasNoWrapInsts);
542 OptimizationRemarkEmitter *ORE;
546 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
554 SmallVector<Instruction *, 4> HasNoWrapReductions;
558 SmallVector<Instruction *, 4> HasNoInfInsts;
567class CacheCostManager {
569 LoopStandardAnalysisResults *AR;
574 std::optional<std::unique_ptr<CacheCost>> CC;
578 DenseMap<const Loop *, unsigned> CostMap;
580 void computeIfUnitinialized();
583 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
585 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
586 CacheCost *getCacheCost();
587 const DenseMap<const Loop *, unsigned> &getCostMap();
592class LoopInterchangeProfitability {
594 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
595 OptimizationRemarkEmitter *ORE)
596 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
599 bool isProfitable(
const Loop *InnerLoop,
const Loop *OuterLoop,
600 unsigned InnerLoopId,
unsigned OuterLoopId,
601 CharMatrix &DepMatrix, CacheCostManager &CCM);
604 int getInstrOrderCost();
605 std::optional<bool> isProfitablePerLoopCacheAnalysis(
606 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
607 std::optional<bool> isProfitablePerInstrOrderCost();
608 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
609 unsigned OuterLoopId,
610 CharMatrix &DepMatrix);
618 OptimizationRemarkEmitter *ORE;
622class LoopInterchangeTransform {
624 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
625 LoopInfo *LI, DominatorTree *DT,
626 const LoopInterchangeLegality &LIL)
627 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
632 void reduction2Memory();
633 void restructureLoops(Loop *NewInner, Loop *NewOuter,
634 BasicBlock *OrigInnerPreHeader,
635 BasicBlock *OrigOuterPreHeader);
636 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
639 bool adjustLoopLinks();
640 bool adjustLoopBranches();
651 const LoopInterchangeLegality &LIL;
654struct LoopInterchange {
655 ScalarEvolution *SE =
nullptr;
656 LoopInfo *LI =
nullptr;
657 DependenceInfo *DI =
nullptr;
658 DominatorTree *DT =
nullptr;
659 LoopStandardAnalysisResults *AR =
nullptr;
662 OptimizationRemarkEmitter *ORE;
664 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
665 DominatorTree *DT, LoopStandardAnalysisResults *AR,
666 OptimizationRemarkEmitter *ORE)
667 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
670 if (
L->getParentLoop())
672 SmallVector<Loop *, 8> LoopList;
674 return processLoopList(LoopList);
677 bool run(LoopNest &LN) {
678 SmallVector<Loop *, 8> LoopList(LN.
getLoops());
679 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
680 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
682 return processLoopList(LoopList);
688 return LoopList.
size() - 1;
691 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
696 "Unsupported depth of loop nest.");
698 unsigned LoopNestDepth = LoopList.
size();
701 dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
702 <<
" containing the following loops:\n";
703 for (
auto *L : LoopList) {
709 CharMatrix DependencyMatrix;
710 Loop *OuterMostLoop = *(LoopList.begin());
712 OuterMostLoop, DI, SE, ORE)) {
724 <<
"' needs an unique exit block");
728 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
729 CacheCostManager CCM(LoopList[0], AR, DI);
734 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
735 bool ChangedPerIter =
false;
736 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
738 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
739 ChangedPerIter |= Interchanged;
750 bool processLoop(SmallVectorImpl<Loop *> &LoopList,
unsigned InnerLoopId,
751 unsigned OuterLoopId,
752 std::vector<std::vector<char>> &DependencyMatrix,
753 CacheCostManager &CCM) {
754 Loop *OuterLoop = LoopList[OuterLoopId];
755 Loop *InnerLoop = LoopList[InnerLoopId];
757 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
758 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE, DT);
759 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
760 LLVM_DEBUG(
dbgs() <<
"Cannot prove legality, not interchanging loops '"
761 << OuterLoop->
getName() <<
"' and '"
762 << InnerLoop->
getName() <<
"'\n");
767 <<
"' are legal to interchange\n");
768 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
769 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
770 DependencyMatrix, CCM)) {
772 <<
"' and '" << InnerLoop->
getName()
773 <<
"' not profitable.\n");
778 return OptimizationRemark(
DEBUG_TYPE,
"Interchanged",
781 <<
"Loop interchanged with enclosing loop.";
784 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
785 LIT.transform(LIL.getHasNoWrapReductions(), LIL.getHasNoInfInsts());
787 << OuterLoop->
getName() <<
"' and inner loop '"
788 << InnerLoop->
getName() <<
"'\n");
794 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
807bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB,
809 return any_of(*BB, [Skip](
const Instruction &
I) {
812 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
816bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
822 <<
"' and '" << InnerLoop->
getName()
823 <<
"' are tightly nested\n");
843 for (BasicBlock *Succ :
successors(OuterLoopHeader))
844 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader())
847 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
853 assert(InnerReductions.size() <= 1 &&
854 "So far we only support at most one reduction.");
855 if (InnerReductions.size() == 1)
856 Skip = InnerReductions[0].LcssaStore;
860 if (containsUnsafeInstructions(OuterLoopHeader, Skip) ||
861 containsUnsafeInstructions(OuterLoopLatch, Skip))
867 if (InnerLoopPreHeader != OuterLoopHeader &&
868 containsUnsafeInstructions(InnerLoopPreHeader, Skip))
876 if (&SuccInner != OuterLoopLatch) {
878 <<
" does not lead to the outer loop latch.\n";);
884 if (containsUnsafeInstructions(InnerLoopExit, Skip))
892bool LoopInterchangeLegality::isLoopStructureUnderstood() {
894 for (PHINode *InnerInduction : InnerLoopInductions) {
895 unsigned Num = InnerInduction->getNumOperands();
896 for (
unsigned i = 0; i < Num; ++i) {
897 Value *Val = InnerInduction->getOperand(i);
907 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
908 InnerLoopPreheader &&
922 CondBrInst *InnerLoopLatchBI =
924 if (!InnerLoopLatchBI)
943 std::function<bool(
Value *)> IsPathToInnerIndVar;
944 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
953 return IsPathToInnerIndVar(
I->getOperand(0));
955 return IsPathToInnerIndVar(
I->getOperand(0)) &&
956 IsPathToInnerIndVar(
I->getOperand(1));
962 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
970 }
else if (IsPathToInnerIndVar(Op1) && !
isa<Constant>(Op1)) {
992 if (
PHI->getNumIncomingValues() != 1)
1075 assert(
I->getOpcode() == OpCode &&
1076 "Expected the instruction to be the reduction operation");
1081 if (
I->hasNoSignedWrap() ||
I->hasNoUnsignedWrap())
1105 if (
PHI->getNumIncomingValues() == 1)
1118bool LoopInterchangeLegality::isInnerReduction(
1119 Loop *L, PHINode *Phi, SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
1123 if (!
L->isInnermost()) {
1124 LLVM_DEBUG(
dbgs() <<
"Only supported when the loop is the innermost.\n");
1126 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1127 L->getStartLoc(),
L->getHeader())
1128 <<
"Only supported when the loop is the innermost.";
1133 if (
Phi->getNumIncomingValues() != 2)
1136 Value *Init =
Phi->getIncomingValueForBlock(
L->getLoopPreheader());
1137 Value *
Next =
Phi->getIncomingValueForBlock(
L->getLoopLatch());
1143 <<
"Only supported for the reduction with a constant initial value.\n");
1145 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1146 L->getStartLoc(),
L->getHeader())
1147 <<
"Only supported for the reduction with a constant initial "
1156 if (!
L->contains(BB))
1161 if (!
Phi->hasOneUser())
1173 PHINode *Lcssa = NULL;
1174 for (
auto *U :
Next->users()) {
1179 if (Lcssa == NULL &&
P->getParent() == ExitBlock &&
1180 P->getIncomingValueForBlock(
L->getLoopLatch()) ==
Next)
1191 LLVM_DEBUG(
dbgs() <<
"Only supported when the reduction is used once in "
1192 "the outer loop.\n");
1194 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1195 L->getStartLoc(),
L->getHeader())
1196 <<
"Only supported when the reduction is used once in the outer "
1202 StoreInst *LcssaStore =
1204 if (!LcssaStore || LcssaStore->
getParent() != ExitBlock)
1217 LLVM_DEBUG(
dbgs() <<
"Only supported when memory reference dominate "
1218 "the inner loop.\n");
1220 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1221 L->getStartLoc(),
L->getHeader())
1222 <<
"Only supported when memory reference dominate the inner "
1233 SR.LcssaPhi = Lcssa;
1234 SR.LcssaStore = LcssaStore;
1238 InnerReductions.push_back(SR);
1242bool LoopInterchangeLegality::checkInductionsAndReductions(Loop *OuterLoop) {
1243 auto ChildLoop = [](Loop *
L) {
1244 assert(
L->getSubLoops().size() <= 1 &&
1245 "Expect at most one child loop for now.");
1246 return L->getSubLoops().empty() ? nullptr :
L->getSubLoops().front();
1249 Loop *InnerLoop = ChildLoop(OuterLoop);
1250 for (Loop *CurLoop = OuterLoop; CurLoop; CurLoop = ChildLoop(CurLoop)) {
1251 for (PHINode &
PHI : CurLoop->getHeader()->phis()) {
1252 InductionDescriptor
ID;
1254 if (CurLoop == InnerLoop) {
1255 const SCEV *Step =
ID.getStep();
1258 InnerLoopInductions.push_back(&
PHI);
1263 if (CurLoop == OuterLoop) {
1265 if (
PHI.getNumIncomingValues() != 2) {
1266 LLVM_DEBUG(
dbgs() <<
"Only PHI nodes in the outer loop header with 2 "
1267 "incoming values are supported.\n");
1275 InnerLoop, V, HasNoWrapReductions, HasNoInfInsts);
1295 [InnerRedPhi](User *U) { return U == InnerRedPhi; })) {
1298 <<
"Failed to recognize PHI as an induction or reduction.\n");
1300 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIOuter",
1303 <<
"Only outer loops with induction or reduction PHI nodes "
1304 "can be interchanged currently.";
1309 OuterInnerReductions.insert(&
PHI);
1310 OuterInnerReductions.insert(InnerRedPhi);
1312 if (OuterInnerReductions.count(&
PHI)) {
1313 LLVM_DEBUG(
dbgs() <<
"Found a reduction across the outer loop.\n");
1315 isInnerReduction(CurLoop, &
PHI, HasNoWrapReductions)) {
1320 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIInner",
1321 CurLoop->getStartLoc(),
1322 CurLoop->getHeader())
1323 <<
"Only inner loops with induction or reduction PHI nodes "
1324 "can be interchanged currently.";
1332 if (InnerReductions.size() > 1) {
1335 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1336 CurLoop->getStartLoc(),
1337 CurLoop->getHeader())
1338 <<
"Only supports at most one reduction.";
1344 return !InnerLoopInductions.empty();
1349bool LoopInterchangeLegality::currentLimitations() {
1359 dbgs() <<
"Loops where the latch is not the exiting block are not"
1360 <<
" supported currently.\n");
1362 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExitingNotLatch",
1365 <<
"Loops where the latch is not the exiting block cannot be"
1366 " interchange currently.";
1372 if (!isLoopStructureUnderstood()) {
1375 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedStructureInner",
1378 <<
"Inner loop structure not understood currently.";
1385 for (Loop *L : {OuterLoop, InnerLoop}) {
1388 if (
L->contains(Pred))
1392 dbgs() <<
"Indirect branch found in the loop predecessor.\n");
1394 return OptimizationRemarkMissed(
DEBUG_TYPE,
"IndirectBranchPreheader",
1395 L->getStartLoc(),
L->getHeader())
1396 <<
"Indirect branch found in the loop predecessor.";
1405 SmallPtrSet<BasicBlock *, 2> InnerLoopHeaderSuccs;
1407 if (!InnerLoopHeaderSuccs.
insert(Succ).second)
1430 if (
PHI.getNumIncomingValues() > 1)
1434 if (&
PHI == LcssaReduction)
1437 PHINode *PN = dyn_cast<PHINode>(U);
1440 if (Reductions.count(PN))
1442 BasicBlock *PB = PN->getParent();
1443 if (!OuterL->contains(PB))
1445 return PB != OuterL->getLoopLatch();
1462 for (
Value *Incoming :
PHI.incoming_values()) {
1516 for (
unsigned I = 0;
I < Worklist.
size(); ++
I) {
1528bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
1529 unsigned OuterLoopId,
1530 CharMatrix &DepMatrix) {
1532 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
1533 <<
" and OuterLoopId = " << OuterLoopId
1534 <<
" due to dependence\n");
1536 return OptimizationRemarkMissed(
DEBUG_TYPE,
"Dependence",
1539 <<
"Cannot interchange loops due to dependences.";
1544 for (
auto *BB : OuterLoop->
blocks())
1545 for (Instruction &
I : *BB) {
1552 if (!
I.mayHaveSideEffects() && !
I.mayReadFromMemory())
1557 <<
"Loops contain instructions that cannot be safely interchanged\n");
1559 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsafeInst",
1560 I.getDebugLoc(),
I.getParent())
1561 <<
"Cannot interchange loops due to instruction that is "
1562 "potentially unsafe to interchange.";
1568 if (!checkInductionsAndReductions(OuterLoop)) {
1569 LLVM_DEBUG(
dbgs() <<
"Failed to find inner loop inductions or found "
1570 "unsupported reductions.\n");
1575 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1577 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerLatchPHI",
1580 <<
"Cannot interchange loops because unsupported PHI nodes found "
1581 "in inner loop latch.";
1588 if (currentLimitations()) {
1589 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1594 if (!tightlyNested(OuterLoop, InnerLoop)) {
1597 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotTightlyNested",
1600 <<
"Cannot interchange loops because they are not tightly "
1608 PHINode *LcssaReduction =
nullptr;
1609 assert(InnerReductions.size() <= 1 &&
1610 "So far we only support at most one reduction.");
1611 if (InnerReductions.size() == 1)
1612 LcssaReduction = InnerReductions[0].LcssaPhi;
1616 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1618 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1621 <<
"Found unsupported PHI node in loop exit.";
1627 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1629 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1632 <<
"Found unsupported PHI node in loop exit.";
1638 [](PHINode &
PHI) { return PHI.getNumIncomingValues() != 1; })) {
1639 LLVM_DEBUG(
dbgs() <<
"Only outer loop latch PHI nodes with one incoming "
1640 "value are supported.\n");
1642 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedLatchPHI",
1645 <<
"Only outer loop latch PHI nodes with one incoming value are "
1659 if (
any_of(
PHI.users(), [](
const User *U) { return !isa<PHINode>(U); })) {
1660 LLVM_DEBUG(
dbgs() <<
"Outer loop latch PHI has a non-PHI user.\n");
1662 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedLatchPHI",
1665 <<
"Cannot interchange loops because an outer loop latch PHI "
1666 "node has a non-PHI user.";
1674void CacheCostManager::computeIfUnitinialized() {
1689 for (
const auto &[Idx,
Cost] :
enumerate((*CC)->getLoopCosts()))
1690 CostMap[
Cost.first] = Idx;
1693CacheCost *CacheCostManager::getCacheCost() {
1694 computeIfUnitinialized();
1698const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1699 computeIfUnitinialized();
1709static std::optional<const SCEV *>
1715 return std::nullopt;
1720 return std::nullopt;
1723 std::optional<const SCEV *> Coeff =
1725 if (!Coeff.has_value())
1726 return std::nullopt;
1729 assert(!*Coeff &&
"Found more than one addrec for the same loop");
1735int LoopInterchangeProfitability::getInstrOrderCost() {
1736 SmallPtrSet<const SCEV *, 4> GoodBasePtrs, BadBasePtrs;
1737 for (BasicBlock *BB : InnerLoop->
blocks()) {
1738 for (Instruction &Ins : *BB) {
1743 std::optional<const SCEV *> OuterCoeff =
1745 std::optional<const SCEV *> InnerCoeff =
1748 if (!OuterCoeff.has_value() || !*OuterCoeff || !InnerCoeff.has_value() ||
1758 const SCEV *OuterStep = SE->
getAbsExpr(*OuterCoeff,
false);
1759 const SCEV *InnerStep = SE->
getAbsExpr(*InnerCoeff,
false);
1779 GoodBasePtrs.
insert(BasePtr);
1781 BadBasePtrs.
insert(BasePtr);
1785 int GoodOrder = GoodBasePtrs.
size();
1786 int BadOrder = BadBasePtrs.
size();
1787 return GoodOrder - BadOrder;
1791LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1792 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1796 auto InnerLoopIt = CostMap.
find(InnerLoop);
1797 if (InnerLoopIt == CostMap.
end())
1798 return std::nullopt;
1799 auto OuterLoopIt = CostMap.
find(OuterLoop);
1800 if (OuterLoopIt == CostMap.
end())
1801 return std::nullopt;
1804 return std::nullopt;
1805 unsigned InnerIndex = InnerLoopIt->second;
1806 unsigned OuterIndex = OuterLoopIt->second;
1808 <<
", OuterIndex = " << OuterIndex <<
"\n");
1809 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1810 "numbers to each loop");
1811 return std::optional<bool>(InnerIndex < OuterIndex);
1815LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1819 int Cost = getInstrOrderCost();
1822 return std::optional<bool>(
true);
1824 return std::nullopt;
1829 for (
const auto &Dep : DepMatrix) {
1830 char Dir = Dep[LoopId];
1831 char DepType = Dep.back();
1832 assert((DepType ==
'<' || DepType ==
'*') &&
1833 "Unexpected element in dependency vector");
1836 if (Dir ==
'=' || Dir ==
'I')
1842 if (Dir ==
'<' && DepType ==
'<')
1851std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1852 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1868 return std::nullopt;
1871bool LoopInterchangeProfitability::isProfitable(
1872 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1873 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1882 if (InnerBTC && InnerBTC->
isZero()) {
1883 LLVM_DEBUG(
dbgs() <<
"Inner loop back-edge isn't taken, rejecting "
1884 "single iteration loop\n");
1887 if (OuterBTC && OuterBTC->
isZero()) {
1888 LLVM_DEBUG(
dbgs() <<
"Outer loop back-edge isn't taken, rejecting "
1889 "single iteration loop\n");
1897 "Duplicate rules and option 'ignore' are not allowed");
1907 std::optional<bool> shouldInterchange;
1910 case RuleTy::PerLoopCacheAnalysis: {
1911 CacheCost *CC = CCM.getCacheCost();
1912 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1913 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1916 case RuleTy::PerInstrOrderCost:
1917 shouldInterchange = isProfitablePerInstrOrderCost();
1919 case RuleTy::ForVectorization:
1921 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1923 case RuleTy::Ignore:
1930 if (shouldInterchange.has_value())
1934 if (!shouldInterchange.has_value()) {
1936 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1939 <<
"Insufficient information to calculate the cost of loop for "
1943 }
else if (!shouldInterchange.value()) {
1945 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1948 <<
"Interchanging loops is not considered to improve cache "
1949 "locality nor vectorization.";
1956void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1958 for (Loop *L : *OuterLoop)
1959 if (L == InnerLoop) {
1960 OuterLoop->removeChildLoop(L);
1989void LoopInterchangeTransform::restructureLoops(
1990 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1991 BasicBlock *OrigOuterPreHeader) {
1992 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1999 if (OuterLoopParent) {
2001 removeChildLoop(OuterLoopParent, NewInner);
2002 removeChildLoop(NewInner, NewOuter);
2005 removeChildLoop(NewInner, NewOuter);
2013 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->
blocks());
2017 for (BasicBlock *BB : NewInner->
blocks())
2025 for (BasicBlock *BB : OrigInnerBBs) {
2030 if (BB == OuterHeader || BB == OuterLatch)
2068void LoopInterchangeTransform::reduction2Memory() {
2070 LIL.getInnerReductions();
2073 "So far we only support at most one reduction.");
2075 LoopInterchangeLegality::InnerReduction SR = InnerReductions[0];
2081 PHINode *FirstIter =
2082 Builder.CreatePHI(Type::getInt1Ty(
Context), 2,
"first.iter");
2087 assert(FirstIter->
isComplete() &&
"The FirstIter PHI node is not complete.");
2092 Instruction *LoadMem = Builder.CreateLoad(SR.ElemTy, SR.MemRef);
2095 Value *NewVar = Builder.CreateSelect(FirstIter, SR.Init, LoadMem,
"new.var");
2106bool LoopInterchangeTransform::transform(
2109 bool Transformed =
false;
2112 LIL.getInnerReductions();
2113 if (InnerReductions.
size() == 1)
2117 auto &InductionPHIs = LIL.getInnerLoopInductions();
2118 if (InductionPHIs.empty()) {
2119 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
2123 SmallVector<Instruction *, 8> InnerIndexVarList;
2124 for (PHINode *CurInductionPHI : InductionPHIs) {
2126 CurInductionPHI->getIncomingValueForBlock(InnerLoop->
getLoopLatch()));
2128 "Incoming value from loop latch isn't an instruction");
2131 InnerIndexVarList.
push_back(IncomingValue);
2142 SmallSetVector<Instruction *, 4> WorkList;
2144 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
2145 for (; i < WorkList.
size(); i++) {
2149 "MoveInstructions does not support PHI nodes");
2155 "Moving instructions with side-effects may change behavior of "
2166 for (
Value *
Op : WorkList[i]->operands()) {
2183 for (Instruction *InnerIndexVar : InnerIndexVarList)
2201 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2203 if (InnerLoopPreHeader != OuterLoopHeader) {
2207 "Expected equivalent incoming values in inner loop preheader");
2208 P.replaceAllUsesWith(
P.getIncomingValue(0));
2209 P.eraseFromParent();
2211 for (Instruction &
I :
2213 std::prev(InnerLoopPreHeader->
end()))))
2217 Transformed |= adjustLoopLinks();
2225 for (Instruction *
Reduction : DropNoWrapInsts) {
2229 for (Instruction *
I : DropNoInfInsts)
2230 I->setHasNoInfs(
false);
2251 I->removeFromParent();
2266 std::vector<DominatorTree::UpdateType> &DTUpdates,
2267 bool MustUpdateOnce =
true) {
2269 "BI must jump to OldBB exactly once.");
2271 for (
Use &
Op : Term->operands())
2278 DTUpdates.push_back(
2279 {DominatorTree::UpdateKind::Insert, Term->getParent(), NewBB});
2280 DTUpdates.push_back(
2281 {DominatorTree::UpdateKind::Delete, Term->getParent(), OldBB});
2300 assert(
P.getNumIncomingValues() == 1 &&
2301 "Only loops with a single exit are supported!");
2303 Value *IncomingValue =
P.getIncomingValueForBlock(InnerLatch);
2310 "Expected non-instruction incoming value to be loop invariant");
2311 P.replaceAllUsesWith(IncomingValue);
2312 P.eraseFromParent();
2323 if (!IncIInnerMost || (IncIInnerMost->getParent() != InnerLatch &&
2324 IncIInnerMost->
getParent() != InnerHeader))
2328 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
2329 return (cast<PHINode>(U)->getParent() == OuterHeader &&
2330 IncI->getParent() == InnerHeader) ||
2331 cast<PHINode>(U)->getParent() == OuterExit;
2333 "Can only replace phis iff the uses are in the loop nest exit or "
2334 "the incoming value is defined in the inner header (it will "
2335 "dominate all loop blocks after interchanging)");
2336 P.replaceAllUsesWith(IncI);
2337 P.eraseFromParent();
2365 if (
P.getNumIncomingValues() != 1)
2379 if (Pred == OuterLatch)
2384 P.setIncomingValue(0, NewPhi);
2424 if (OuterLoopLatch == InnerLoopExit)
2431 assert(Phi->getNumIncomingValues() == 1 &&
"Single input phi expected");
2432 LLVM_DEBUG(
dbgs() <<
"Removing 1-input phi in non-exit block: " << *Phi
2434 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
2435 Phi->eraseFromParent();
2439bool LoopInterchangeTransform::adjustLoopBranches() {
2441 std::vector<DominatorTree::UpdateType> DTUpdates;
2443 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2446 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
2447 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
2448 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
2458 OuterLoopPreHeader =
2460 if (InnerLoopPreHeader == OuterLoop->getHeader())
2461 InnerLoopPreHeader =
2466 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2468 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
2475 CondBrInst *OuterLoopLatchBI =
2477 CondBrInst *InnerLoopLatchBI =
2482 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
2483 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
2491 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
2494 if (!InnerLoopHeaderSuccessor)
2502 InnerLoopPreHeader, DTUpdates,
false);
2512 InnerLoopHeaderSuccessor, DTUpdates,
2520 OuterLoopPreHeader, DTUpdates);
2523 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
2524 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
2526 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
2529 InnerLoopLatchSuccessor, DTUpdates);
2531 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
2532 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
2534 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
2537 OuterLoopLatchSuccessor, DTUpdates);
2538 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
2542 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
2543 OuterLoopPreHeader);
2545 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
2546 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
2552 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
2555 for (PHINode &
PHI : InnerLoopHeader->
phis())
2556 if (OuterInnerReductions.contains(&
PHI))
2559 for (PHINode &
PHI : OuterLoopHeader->
phis())
2560 if (OuterInnerReductions.contains(&
PHI))
2566 for (PHINode *
PHI : OuterLoopPHIs) {
2569 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2571 for (PHINode *
PHI : InnerLoopPHIs) {
2574 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2587 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2588 for (Instruction &
I :
2596bool LoopInterchangeTransform::adjustLoopLinks() {
2598 bool Changed = adjustLoopBranches();
2603 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2624 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
2632 <<
"Computed dependence info, invoking the transform.";
2636 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT, &AR, &ORE).run(LN))
2638 U.markLoopNestChanged(
true);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Loop::LoopBounds::Direction Direction
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static void updateSuccessor(Instruction *Term, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static cl::opt< bool > EnableReduction2Memory("loop-interchange-reduction-to-mem", cl::init(false), cl::Hidden, cl::desc("Support for the inner-loop reduction pattern."))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static cl::opt< unsigned int > MaxMemInstrRatio("loop-interchange-max-mem-instr-ratio", cl::init(4), cl::Hidden, cl::desc("Maximum number of load/store instructions squared in relation to " "the total number of instructions. Higher value may lead to more " "interchanges at the cost of compile-time"))
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts, SmallVectorImpl< Instruction * > &HasNoInfInsts)
static bool areInnerLoopExitPHIsSupported(Loop *OuterL, Loop *InnerL, SmallPtrSetImpl< PHINode * > &Reductions, PHINode *LcssaReduction)
We currently only support LCSSA PHI nodes in the inner loop exit if their users are either of the fol...
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool areInnerLoopLatchPHIsSupported(Loop *InnerLoop)
The transform clones the inner latch's exit condition into the new latch (see MoveInstructions in Loo...
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static bool checkReductionKind(Loop *L, PHINode *PHI, SmallVectorImpl< Instruction * > &HasNoWrapInsts, SmallVectorImpl< Instruction * > &HasNoInfInsts)
static std::optional< const SCEV * > getAddRecCoefficient(ScalarEvolution &SE, const SCEV *S, const Loop *L)
If \S contains an affine addrec for L, return the step recurrence of it.
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
static bool processLoop(Loop &L, const AArch64Subtarget &ST, DataLayout DL)
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
static LLVM_ABI std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, ArrayRef< const SCEVPredicate * > NoWrapPreds={}, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void changeLoopFor(const BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
The main scalar evolution driver.
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Represent a constant reference to a string, i.e.
constexpr size_t size() const
Get the string size.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...