55#define DEBUG_TYPE "loop-interchange"
57STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
61 cl::desc(
"Interchange if you gain more than this number"));
67 "Maximum number of load-store instructions that should be handled "
68 "in the dependency matrix. Higher value may lead to more interchanges "
69 "at the cost of compile-time"));
83using CharMatrix = std::vector<std::vector<char>>;
98 cl::desc(
"Minimum depth of loop nest considered for the transform"));
103 cl::desc(
"Maximum depth of loop nest considered for the transform"));
109 cl::desc(
"List of profitability heuristics to be used. They are applied in "
112 RuleTy::PerInstrOrderCost,
113 RuleTy::ForVectorization}),
115 "Prioritize loop cache cost"),
116 clEnumValN(RuleTy::PerInstrOrderCost,
"instorder",
117 "Prioritize the IVs order of each instruction"),
118 clEnumValN(RuleTy::ForVectorization,
"vectorize",
119 "Prioritize vectorization"),
121 "Ignore profitability, force interchange (does not "
122 "work with other options)")));
127 cl::desc(
"Support for the inner-loop reduction pattern."));
132 for (RuleTy Rule : Rules) {
133 if (!Set.insert(Rule).second)
135 if (Rule == RuleTy::Ignore)
142 for (
auto &Row : DepMatrix) {
155 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
156 "Expected Src and Dst to be different instructions in the same BB");
158 bool FoundSrc =
false;
199 <<
" Loads and Stores to analyze\n");
205 L->getStartLoc(), L->getHeader())
206 <<
"Number of loads/stores exceeded, the supported maximum "
207 "can be increased with option "
208 "-loop-interchange-maxmeminstr-count.";
218 for (
I = MemInstr.
begin(), IE = MemInstr.
end();
I != IE; ++
I) {
219 for (J =
I, JE = MemInstr.
end(); J != JE; ++J) {
220 std::vector<char> Dep;
227 if (
auto D = DI->
depends(Src, Dst)) {
228 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
231 if (
D->normalize(SE))
234 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
235 dbgs() <<
"Found " << DepType
236 <<
" dependency between Src and Dst\n"
237 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
238 unsigned Levels =
D->getLevels();
240 for (
unsigned II = 1;
II <= Levels; ++
II) {
247 unsigned Dir =
D->getDirection(
II);
261 if (
D->isConfused()) {
262 assert(Dep.empty() &&
"Expected empty dependency vector");
263 Dep.assign(Level,
'*');
266 while (Dep.size() != Level) {
275 L->getStartLoc(), L->getHeader())
276 <<
"All loops have dependencies in all directions.";
282 bool IsKnownForward =
true;
283 if (Src->getParent() != Dst->getParent()) {
287 IsKnownForward =
false;
293 "Unexpected instructions");
298 bool IsReversed =
D->getSrc() != Src;
300 IsKnownForward =
false;
316 DepMatrix.push_back(Dep);
323 DepMatrix[Ite->second].back() =
'*';
335 for (
auto &Row : DepMatrix)
344static std::optional<bool>
357 unsigned InnerLoopId,
358 unsigned OuterLoopId) {
359 unsigned NumRows = DepMatrix.size();
360 std::vector<char> Cur;
362 for (
unsigned Row = 0; Row < NumRows; ++Row) {
365 Cur = DepMatrix[Row];
378 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
387 << L.getHeader()->getParent()->getName() <<
" Loop: %"
388 << L.getHeader()->getName() <<
'\n');
389 assert(LoopList.
empty() &&
"LoopList should initially be empty!");
390 Loop *CurrentLoop = &L;
391 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
392 while (!Vec->empty()) {
396 if (Vec->size() != 1) {
402 CurrentLoop = Vec->front();
410 unsigned LoopNestDepth = LoopList.
size();
412 LLVM_DEBUG(
dbgs() <<
"Unsupported depth of loop nest " << LoopNestDepth
420 <<
"Unsupported depth of loop nest, the supported range is ["
431 for (
Loop *L : LoopList) {
437 if (L->getNumBackEdges() != 1) {
441 if (!L->getExitingBlock()) {
452class LoopInterchangeLegality {
454 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
455 OptimizationRemarkEmitter *ORE, DominatorTree *DT)
456 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), DT(DT), ORE(ORE) {}
459 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
460 CharMatrix &DepMatrix);
464 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
468 bool isLoopStructureUnderstood();
470 bool currentLimitations();
472 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions()
const {
473 return OuterInnerReductions;
477 return InnerLoopInductions;
481 return HasNoWrapReductions;
488 struct InnerReduction {
496 StoreInst *LcssaStore;
503 return InnerReductions;
507 bool tightlyNested(Loop *Outer, Loop *Inner);
508 bool containsUnsafeInstructions(BasicBlock *BB, Instruction *Skip);
514 bool findInductionAndReductions(Loop *L,
528 bool isInnerReduction(Loop *L, PHINode *Phi,
529 SmallVectorImpl<Instruction *> &HasNoWrapInsts);
538 OptimizationRemarkEmitter *ORE;
542 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
550 SmallVector<Instruction *, 4> HasNoWrapReductions;
559class CacheCostManager {
561 LoopStandardAnalysisResults *AR;
566 std::optional<std::unique_ptr<CacheCost>> CC;
570 DenseMap<const Loop *, unsigned> CostMap;
572 void computeIfUnitinialized();
575 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
577 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
578 CacheCost *getCacheCost();
579 const DenseMap<const Loop *, unsigned> &getCostMap();
584class LoopInterchangeProfitability {
586 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
587 OptimizationRemarkEmitter *ORE)
588 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
591 bool isProfitable(
const Loop *InnerLoop,
const Loop *OuterLoop,
592 unsigned InnerLoopId,
unsigned OuterLoopId,
593 CharMatrix &DepMatrix, CacheCostManager &CCM);
596 int getInstrOrderCost();
597 std::optional<bool> isProfitablePerLoopCacheAnalysis(
598 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
599 std::optional<bool> isProfitablePerInstrOrderCost();
600 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
601 unsigned OuterLoopId,
602 CharMatrix &DepMatrix);
610 OptimizationRemarkEmitter *ORE;
614class LoopInterchangeTransform {
616 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
617 LoopInfo *LI, DominatorTree *DT,
618 const LoopInterchangeLegality &LIL)
619 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
623 void reduction2Memory();
624 void restructureLoops(Loop *NewInner, Loop *NewOuter,
625 BasicBlock *OrigInnerPreHeader,
626 BasicBlock *OrigOuterPreHeader);
627 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
630 bool adjustLoopLinks();
631 bool adjustLoopBranches();
642 const LoopInterchangeLegality &LIL;
645struct LoopInterchange {
646 ScalarEvolution *SE =
nullptr;
647 LoopInfo *LI =
nullptr;
648 DependenceInfo *DI =
nullptr;
649 DominatorTree *DT =
nullptr;
650 LoopStandardAnalysisResults *AR =
nullptr;
653 OptimizationRemarkEmitter *ORE;
655 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
656 DominatorTree *DT, LoopStandardAnalysisResults *AR,
657 OptimizationRemarkEmitter *ORE)
658 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
661 if (
L->getParentLoop())
663 SmallVector<Loop *, 8> LoopList;
665 return processLoopList(LoopList);
668 bool run(LoopNest &LN) {
669 SmallVector<Loop *, 8> LoopList(LN.
getLoops());
670 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
671 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
673 return processLoopList(LoopList);
679 return LoopList.
size() - 1;
682 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
687 "Unsupported depth of loop nest.");
689 unsigned LoopNestDepth = LoopList.
size();
692 dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
693 <<
" containing the following loops:\n";
694 for (
auto *L : LoopList) {
700 CharMatrix DependencyMatrix;
701 Loop *OuterMostLoop = *(LoopList.begin());
703 OuterMostLoop, DI, SE, ORE)) {
715 <<
"' needs an unique exit block");
719 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
720 CacheCostManager CCM(LoopList[0], AR, DI);
725 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
726 bool ChangedPerIter =
false;
727 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
729 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
730 ChangedPerIter |= Interchanged;
741 bool processLoop(SmallVectorImpl<Loop *> &LoopList,
unsigned InnerLoopId,
742 unsigned OuterLoopId,
743 std::vector<std::vector<char>> &DependencyMatrix,
744 CacheCostManager &CCM) {
745 Loop *OuterLoop = LoopList[OuterLoopId];
746 Loop *InnerLoop = LoopList[InnerLoopId];
748 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
749 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE, DT);
750 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
751 LLVM_DEBUG(
dbgs() <<
"Cannot prove legality, not interchanging loops '"
752 << OuterLoop->
getName() <<
"' and '"
753 << InnerLoop->
getName() <<
"'\n");
758 <<
"' are legal to interchange\n");
759 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
760 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
761 DependencyMatrix, CCM)) {
763 <<
"' and '" << InnerLoop->
getName()
764 <<
"' not profitable.\n");
769 return OptimizationRemark(
DEBUG_TYPE,
"Interchanged",
772 <<
"Loop interchanged with enclosing loop.";
775 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
776 LIT.transform(LIL.getHasNoWrapReductions());
778 << OuterLoop->
getName() <<
"' and inner loop '"
779 << InnerLoop->
getName() <<
"'\n");
785 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
798bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB,
800 return any_of(*BB, [Skip](
const Instruction &
I) {
803 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
807bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
813 <<
"' and '" << InnerLoop->
getName()
814 <<
"' are tightly nested\n");
819 BranchInst *OuterLoopHeaderBI =
821 if (!OuterLoopHeaderBI)
824 for (BasicBlock *Succ :
successors(OuterLoopHeaderBI))
825 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
826 Succ != OuterLoopLatch)
829 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
835 assert(InnerReductions.size() <= 1 &&
836 "So far we only support at most one reduction.");
837 if (InnerReductions.size() == 1)
838 Skip = InnerReductions[0].LcssaStore;
842 if (containsUnsafeInstructions(OuterLoopHeader, Skip) ||
843 containsUnsafeInstructions(OuterLoopLatch, Skip))
849 if (InnerLoopPreHeader != OuterLoopHeader &&
850 containsUnsafeInstructions(InnerLoopPreHeader, Skip))
858 if (&SuccInner != OuterLoopLatch) {
860 <<
" does not lead to the outer loop latch.\n";);
866 if (containsUnsafeInstructions(InnerLoopExit, Skip))
874bool LoopInterchangeLegality::isLoopStructureUnderstood() {
876 for (PHINode *InnerInduction : InnerLoopInductions) {
877 unsigned Num = InnerInduction->getNumOperands();
878 for (
unsigned i = 0; i < Num; ++i) {
879 Value *Val = InnerInduction->getOperand(i);
889 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
890 InnerLoopPreheader &&
904 BranchInst *InnerLoopLatchBI =
908 if (CmpInst *InnerLoopCmp =
910 Value *Op0 = InnerLoopCmp->getOperand(0);
911 Value *Op1 = InnerLoopCmp->getOperand(1);
922 std::function<bool(
Value *)> IsPathToInnerIndVar;
923 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
932 return IsPathToInnerIndVar(
I->getOperand(0));
934 return IsPathToInnerIndVar(
I->getOperand(0)) &&
935 IsPathToInnerIndVar(
I->getOperand(1));
941 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
949 }
else if (IsPathToInnerIndVar(Op1) && !
isa<Constant>(Op1)) {
972 if (
PHI->getNumIncomingValues() != 1)
1029 assert(
I->getOpcode() == OpCode &&
1030 "Expected the instruction to be the reduction operation");
1035 if (
I->hasNoSignedWrap() ||
I->hasNoUnsignedWrap())
1058 if (
PHI->getNumIncomingValues() == 1)
1071bool LoopInterchangeLegality::isInnerReduction(
1072 Loop *L, PHINode *Phi, SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
1076 if (!
L->isInnermost()) {
1077 LLVM_DEBUG(
dbgs() <<
"Only supported when the loop is the innermost.\n");
1079 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1080 L->getStartLoc(),
L->getHeader())
1081 <<
"Only supported when the loop is the innermost.";
1086 if (
Phi->getNumIncomingValues() != 2)
1089 Value *Init =
Phi->getIncomingValueForBlock(
L->getLoopPreheader());
1090 Value *
Next =
Phi->getIncomingValueForBlock(
L->getLoopLatch());
1096 <<
"Only supported for the reduction with a constant initial value.\n");
1098 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1099 L->getStartLoc(),
L->getHeader())
1100 <<
"Only supported for the reduction with a constant initial "
1109 if (!
L->contains(BB))
1114 if (!
Phi->hasOneUser())
1126 PHINode *Lcssa = NULL;
1127 for (
auto *U :
Next->users()) {
1132 if (Lcssa == NULL &&
P->getParent() == ExitBlock &&
1133 P->getIncomingValueForBlock(
L->getLoopLatch()) ==
Next)
1144 LLVM_DEBUG(
dbgs() <<
"Only supported when the reduction is used once in "
1145 "the outer loop.\n");
1147 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1148 L->getStartLoc(),
L->getHeader())
1149 <<
"Only supported when the reduction is used once in the outer "
1155 StoreInst *LcssaStore =
1157 if (!LcssaStore || LcssaStore->
getParent() != ExitBlock)
1170 LLVM_DEBUG(
dbgs() <<
"Only supported when memory reference dominate "
1171 "the inner loop.\n");
1173 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1174 L->getStartLoc(),
L->getHeader())
1175 <<
"Only supported when memory reference dominate the inner "
1186 SR.LcssaPhi = Lcssa;
1187 SR.LcssaStore = LcssaStore;
1191 InnerReductions.push_back(SR);
1195bool LoopInterchangeLegality::findInductionAndReductions(
1197 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
1199 for (PHINode &
PHI :
L->getHeader()->phis()) {
1200 InductionDescriptor
ID;
1207 if (OuterInnerReductions.count(&
PHI)) {
1208 LLVM_DEBUG(
dbgs() <<
"Found a reduction across the outer loop.\n");
1210 isInnerReduction(L, &
PHI, HasNoWrapReductions)) {
1216 assert(
PHI.getNumIncomingValues() == 2 &&
1217 "Phis in loop header should have exactly 2 incoming values");
1221 PHINode *InnerRedPhi =
1227 <<
"Failed to recognize PHI as an induction or reduction.\n");
1230 OuterInnerReductions.insert(&
PHI);
1231 OuterInnerReductions.insert(InnerRedPhi);
1237 if (InnerReductions.size() > 1) {
1240 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1241 L->getStartLoc(),
L->getHeader())
1242 <<
"Only supports at most one reduction.";
1252bool LoopInterchangeLegality::currentLimitations() {
1262 dbgs() <<
"Loops where the latch is not the exiting block are not"
1263 <<
" supported currently.\n");
1265 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExitingNotLatch",
1268 <<
"Loops where the latch is not the exiting block cannot be"
1269 " interchange currently.";
1275 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1277 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
1278 <<
"are supported currently.\n");
1280 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIOuter",
1283 <<
"Only outer loops with induction or reduction PHI nodes can be"
1284 " interchanged currently.";
1293 Loop *CurLevelLoop = OuterLoop;
1296 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
1297 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
1299 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
1300 <<
"are supported currently.\n");
1302 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIInner",
1305 <<
"Only inner loops with induction or reduction PHI nodes can be"
1306 " interchange currently.";
1313 if (!isLoopStructureUnderstood()) {
1316 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedStructureInner",
1319 <<
"Inner loop structure not understood currently.";
1327bool LoopInterchangeLegality::findInductions(
1328 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1329 for (PHINode &
PHI :
L->getHeader()->phis()) {
1330 InductionDescriptor
ID;
1334 return !Inductions.
empty();
1348 if (
PHI.getNumIncomingValues() > 1)
1350 if (&
PHI == LcssaReduction)
1353 PHINode *PN = dyn_cast<PHINode>(U);
1355 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
1420 for (
auto *U :
PHI.users()) {
1429bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
1430 unsigned OuterLoopId,
1431 CharMatrix &DepMatrix) {
1433 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
1434 <<
" and OuterLoopId = " << OuterLoopId
1435 <<
" due to dependence\n");
1437 return OptimizationRemarkMissed(
DEBUG_TYPE,
"Dependence",
1440 <<
"Cannot interchange loops due to dependences.";
1445 for (
auto *BB : OuterLoop->
blocks())
1449 if (CI->onlyWritesMemory())
1452 dbgs() <<
"Loops with call instructions cannot be interchanged "
1455 return OptimizationRemarkMissed(
DEBUG_TYPE,
"CallInst",
1458 <<
"Cannot interchange loops due to call instruction.";
1464 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1465 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
1470 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1472 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerLatchPHI",
1475 <<
"Cannot interchange loops because unsupported PHI nodes found "
1476 "in inner loop latch.";
1483 if (currentLimitations()) {
1484 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1489 if (!tightlyNested(OuterLoop, InnerLoop)) {
1492 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotTightlyNested",
1495 <<
"Cannot interchange loops because they are not tightly "
1503 PHINode *LcssaReduction =
nullptr;
1504 assert(InnerReductions.size() <= 1 &&
1505 "So far we only support at most one reduction.");
1506 if (InnerReductions.size() == 1)
1507 LcssaReduction = InnerReductions[0].LcssaPhi;
1511 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1513 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1516 <<
"Found unsupported PHI node in loop exit.";
1522 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1524 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1527 <<
"Found unsupported PHI node in loop exit.";
1535void CacheCostManager::computeIfUnitinialized() {
1550 for (
const auto &[Idx,
Cost] :
enumerate((*CC)->getLoopCosts()))
1551 CostMap[
Cost.first] = Idx;
1554CacheCost *CacheCostManager::getCacheCost() {
1555 computeIfUnitinialized();
1559const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1560 computeIfUnitinialized();
1564int LoopInterchangeProfitability::getInstrOrderCost() {
1565 unsigned GoodOrder, BadOrder;
1566 BadOrder = GoodOrder = 0;
1567 for (BasicBlock *BB : InnerLoop->
blocks()) {
1568 for (Instruction &Ins : *BB) {
1570 bool FoundInnerInduction =
false;
1571 bool FoundOuterInduction =
false;
1577 const SCEV *OperandVal = SE->
getSCEV(
Op);
1587 if (AR->
getLoop() == InnerLoop) {
1590 FoundInnerInduction =
true;
1591 if (FoundOuterInduction) {
1601 if (AR->
getLoop() == OuterLoop) {
1604 FoundOuterInduction =
true;
1605 if (FoundInnerInduction) {
1614 return GoodOrder - BadOrder;
1618LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1619 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1623 auto InnerLoopIt = CostMap.
find(InnerLoop);
1624 if (InnerLoopIt == CostMap.
end())
1625 return std::nullopt;
1626 auto OuterLoopIt = CostMap.
find(OuterLoop);
1627 if (OuterLoopIt == CostMap.
end())
1628 return std::nullopt;
1631 return std::nullopt;
1632 unsigned InnerIndex = InnerLoopIt->second;
1633 unsigned OuterIndex = OuterLoopIt->second;
1635 <<
", OuterIndex = " << OuterIndex <<
"\n");
1636 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1637 "numbers to each loop");
1638 return std::optional<bool>(InnerIndex < OuterIndex);
1642LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1646 int Cost = getInstrOrderCost();
1649 return std::optional<bool>(
true);
1651 return std::nullopt;
1656 for (
const auto &Dep : DepMatrix) {
1657 char Dir = Dep[LoopId];
1658 char DepType = Dep.back();
1659 assert((DepType ==
'<' || DepType ==
'*') &&
1660 "Unexpected element in dependency vector");
1663 if (Dir ==
'=' || Dir ==
'I')
1669 if (Dir ==
'<' && DepType ==
'<')
1678std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1679 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1695 return std::nullopt;
1698bool LoopInterchangeProfitability::isProfitable(
1699 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1700 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1709 if (InnerBTC && InnerBTC->
isZero()) {
1710 LLVM_DEBUG(
dbgs() <<
"Inner loop back-edge isn't taken, rejecting "
1711 "single iteration loop\n");
1714 if (OuterBTC && OuterBTC->
isZero()) {
1715 LLVM_DEBUG(
dbgs() <<
"Outer loop back-edge isn't taken, rejecting "
1716 "single iteration loop\n");
1724 "Duplicate rules and option 'ignore' are not allowed");
1734 std::optional<bool> shouldInterchange;
1737 case RuleTy::PerLoopCacheAnalysis: {
1738 CacheCost *CC = CCM.getCacheCost();
1739 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1740 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1743 case RuleTy::PerInstrOrderCost:
1744 shouldInterchange = isProfitablePerInstrOrderCost();
1746 case RuleTy::ForVectorization:
1748 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1750 case RuleTy::Ignore:
1757 if (shouldInterchange.has_value())
1761 if (!shouldInterchange.has_value()) {
1763 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1766 <<
"Insufficient information to calculate the cost of loop for "
1770 }
else if (!shouldInterchange.value()) {
1772 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1775 <<
"Interchanging loops is not considered to improve cache "
1776 "locality nor vectorization.";
1783void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1785 for (Loop *L : *OuterLoop)
1786 if (L == InnerLoop) {
1787 OuterLoop->removeChildLoop(L);
1816void LoopInterchangeTransform::restructureLoops(
1817 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1818 BasicBlock *OrigOuterPreHeader) {
1819 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1826 if (OuterLoopParent) {
1828 removeChildLoop(OuterLoopParent, NewInner);
1829 removeChildLoop(NewInner, NewOuter);
1832 removeChildLoop(NewInner, NewOuter);
1840 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->
blocks());
1844 for (BasicBlock *BB : NewInner->
blocks())
1852 for (BasicBlock *BB : OrigInnerBBs) {
1857 if (BB == OuterHeader || BB == OuterLatch)
1895void LoopInterchangeTransform::reduction2Memory() {
1897 LIL.getInnerReductions();
1900 "So far we only support at most one reduction.");
1902 LoopInterchangeLegality::InnerReduction SR = InnerReductions[0];
1908 PHINode *FirstIter =
1909 Builder.CreatePHI(Type::getInt1Ty(
Context), 2,
"first.iter");
1914 assert(FirstIter->
isComplete() &&
"The FirstIter PHI node is not complete.");
1919 Instruction *LoadMem = Builder.CreateLoad(SR.ElemTy, SR.MemRef);
1922 Value *NewVar = Builder.CreateSelect(FirstIter, SR.Init, LoadMem,
"new.var");
1933bool LoopInterchangeTransform::transform(
1935 bool Transformed =
false;
1938 LIL.getInnerReductions();
1939 if (InnerReductions.
size() == 1)
1945 auto &InductionPHIs = LIL.getInnerLoopInductions();
1946 if (InductionPHIs.empty()) {
1947 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1951 SmallVector<Instruction *, 8> InnerIndexVarList;
1952 for (PHINode *CurInductionPHI : InductionPHIs) {
1953 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1969 SmallSetVector<Instruction *, 4> WorkList;
1971 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1972 for (; i < WorkList.
size(); i++) {
1978 "Moving instructions with side-effects may change behavior of "
1989 for (
Value *
Op : WorkList[i]->operands()) {
2007 for (Instruction *InnerIndexVar : InnerIndexVarList)
2026 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2027 if (InnerLoopPreHeader != OuterLoopHeader) {
2028 for (Instruction &
I :
2030 std::prev(InnerLoopPreHeader->
end()))))
2034 Transformed |= adjustLoopLinks();
2042 for (Instruction *
Reduction : DropNoWrapInsts) {
2066 I->removeFromParent();
2081 std::vector<DominatorTree::UpdateType> &DTUpdates,
2082 bool MustUpdateOnce =
true) {
2084 "BI must jump to OldBB exactly once.");
2093 DTUpdates.push_back(
2094 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
2095 DTUpdates.push_back(
2096 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
2115 assert(
P.getNumIncomingValues() == 1 &&
2116 "Only loops with a single exit are supported!");
2125 if (IncIInnerMost->getParent() != InnerLatch &&
2126 IncIInnerMost->
getParent() != InnerHeader)
2130 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
2131 return (cast<PHINode>(U)->getParent() == OuterHeader &&
2132 IncI->getParent() == InnerHeader) ||
2133 cast<PHINode>(U)->getParent() == OuterExit;
2135 "Can only replace phis iff the uses are in the loop nest exit or "
2136 "the incoming value is defined in the inner header (it will "
2137 "dominate all loop blocks after interchanging)");
2138 P.replaceAllUsesWith(IncI);
2139 P.eraseFromParent();
2167 if (
P.getNumIncomingValues() != 1)
2181 if (Pred == OuterLatch)
2186 P.setIncomingValue(0, NewPhi);
2226 if (OuterLoopLatch == InnerLoopExit)
2233 assert(Phi->getNumIncomingValues() == 1 &&
"Single input phi expected");
2234 LLVM_DEBUG(
dbgs() <<
"Removing 1-input phi in non-exit block: " << *Phi
2236 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
2237 Phi->eraseFromParent();
2241bool LoopInterchangeTransform::adjustLoopBranches() {
2243 std::vector<DominatorTree::UpdateType> DTUpdates;
2245 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2248 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
2249 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
2250 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
2260 OuterLoopPreHeader =
2262 if (InnerLoopPreHeader == OuterLoop->getHeader())
2263 InnerLoopPreHeader =
2268 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2270 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
2277 BranchInst *OuterLoopLatchBI =
2279 BranchInst *InnerLoopLatchBI =
2281 BranchInst *OuterLoopHeaderBI =
2283 BranchInst *InnerLoopHeaderBI =
2286 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
2287 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
2291 BranchInst *InnerLoopLatchPredecessorBI =
2293 BranchInst *OuterLoopPredecessorBI =
2296 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
2299 if (!InnerLoopHeaderSuccessor)
2307 InnerLoopPreHeader, DTUpdates,
false);
2317 InnerLoopHeaderSuccessor, DTUpdates,
2325 OuterLoopPreHeader, DTUpdates);
2328 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
2329 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
2331 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
2334 InnerLoopLatchSuccessor, DTUpdates);
2336 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
2337 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
2339 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
2342 OuterLoopLatchSuccessor, DTUpdates);
2343 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
2347 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
2348 OuterLoopPreHeader);
2350 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
2351 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
2357 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
2360 for (PHINode &
PHI : InnerLoopHeader->
phis())
2361 if (OuterInnerReductions.contains(&
PHI))
2364 for (PHINode &
PHI : OuterLoopHeader->
phis())
2365 if (OuterInnerReductions.contains(&
PHI))
2371 for (PHINode *
PHI : OuterLoopPHIs) {
2374 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2376 for (PHINode *
PHI : InnerLoopPHIs) {
2379 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2392 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2393 for (Instruction &
I :
2401bool LoopInterchangeTransform::adjustLoopLinks() {
2403 bool Changed = adjustLoopBranches();
2408 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2433 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
2441 <<
"Computed dependence info, invoking the transform.";
2445 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT, &AR, &ORE).run(LN))
2447 U.markLoopNestChanged(
true);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Loop::LoopBounds::Direction Direction
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static cl::opt< bool > EnableReduction2Memory("loop-interchange-reduction-to-mem", cl::init(false), cl::Hidden, cl::desc("Support for the inner-loop reduction pattern."))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions, PHINode *LcssaReduction)
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool checkReductionKind(Loop *L, PHINode *PHI, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
const Loop * getLoop() const
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr size_t size() const
size - Get the string size.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto map_range(ContainerTy &&C, FuncTy F)
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...