77 #define DEBUG_TYPE "block-placement"
79 STATISTIC(NumCondBranches,
"Number of conditional branches");
80 STATISTIC(NumUncondBranches,
"Number of unconditional branches");
82 "Potential frequency of taking conditional branches");
84 "Potential frequency of taking unconditional branches");
88 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
89 "(e.g 4 means align on 16B boundaries)."),
93 "align-all-nofallthru-blocks",
94 cl::desc(
"Force the alignment of all blocks that have no fall-through "
95 "predecessors (i.e. don't add nops that are executed). In log2 "
96 "format (e.g 4 means align on 16B boundaries)."),
100 "max-bytes-for-alignment",
101 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
107 "block-placement-exit-block-bias",
108 cl::desc(
"Block frequency percentage a loop exit block needs "
109 "over the original exit to be considered the new exit."),
116 "loop-to-cold-block-ratio",
117 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
118 "(frequency of block) is greater than this ratio"),
122 "force-loop-cold-block",
123 cl::desc(
"Force outlining cold blocks from loops."),
128 cl::desc(
"Model the cost of loop rotation more "
129 "precisely by using profile data."),
134 cl::desc(
"Force the use of precise cost "
135 "loop rotation strategy."),
140 cl::desc(
"Cost that models the probabilistic risk of an instruction "
141 "misfetch due to a jump comparing to falling through, whose cost "
146 cl::desc(
"Cost of jump instructions."),
150 cl::desc(
"Perform tail duplication during placement. "
151 "Creates more fallthrough opportunites in "
152 "outline branches."),
157 cl::desc(
"Perform branch folding during placement. "
158 "Reduces code size."),
163 "tail-dup-placement-threshold",
164 cl::desc(
"Instruction cutoff for tail duplication during layout. "
165 "Tail merging during layout is forced to have a threshold "
166 "that won't conflict."),
cl::init(2),
171 "tail-dup-placement-aggressive-threshold",
172 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
173 "layout. Used at -O3. Tail merging during layout is forced to "
174 "have a threshold that won't conflict."),
cl::init(4),
179 "tail-dup-placement-penalty",
180 cl::desc(
"Cost penalty for blocks that can avoid breaking CFG by copying. "
181 "Copying can increase fallthrough, but it also increases icache "
182 "pressure. This parameter controls the penalty to account for that. "
183 "Percent as integer."),
189 "tail-dup-profile-percent-threshold",
190 cl::desc(
"If profile count information is used in tail duplication cost "
191 "model, the gained fall through number from tail duplication "
192 "should be at least this percent of hot count."),
197 "triangle-chain-count",
198 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
199 "triangle tail duplication heuristic to kick in. 0 to disable."),
251 BlockToChainMapType &BlockToChain;
260 : Blocks(1,
BB), BlockToChain(BlockToChain) {
261 assert(
BB &&
"Cannot create a chain with a null basic block");
262 BlockToChain[
BB] =
this;
270 iterator
begin() {
return Blocks.begin(); }
271 const_iterator
begin()
const {
return Blocks.begin(); }
274 iterator
end() {
return Blocks.end(); }
275 const_iterator
end()
const {
return Blocks.end(); }
294 assert(
BB &&
"Can't merge a null block.");
295 assert(!Blocks.empty() &&
"Can't merge into an empty chain.");
300 "Passed chain is null, but BB has entry in BlockToChain.");
301 Blocks.push_back(
BB);
302 BlockToChain[
BB] =
this;
306 assert(
BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
307 assert(Chain->begin() != Chain->end());
312 Blocks.push_back(ChainBB);
313 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
314 BlockToChain[ChainBB] =
this;
335 unsigned UnscheduledPredecessors = 0;
343 struct BlockAndTailDupResult {
349 struct WeightedEdge {
369 std::unique_ptr<MBFIWrapper> MBFI;
402 bool UseProfileCount;
432 if (UseProfileCount) {
433 auto Count = MBFI->getBlockProfileCount(
BB);
439 return MBFI->getBlockFreq(
BB);
444 void initDupThreshold();
448 void markChainSuccessors(
450 const BlockFilterSet *BlockFilter =
nullptr);
454 void markBlockSuccessors(
457 const BlockFilterSet *BlockFilter =
nullptr);
460 collectViableSuccessors(
462 const BlockFilterSet *BlockFilter,
465 BlockFilterSet *BlockFilter);
468 BlockFilterSet *BlockFilter);
469 bool repeatedlyTailDuplicateBlock(
472 BlockChain &Chain, BlockFilterSet *BlockFilter,
474 bool maybeTailDuplicateBlock(
476 BlockChain &Chain, BlockFilterSet *BlockFilter,
478 bool &DuplicatedToLPred);
479 bool hasBetterLayoutPredecessor(
483 const BlockFilterSet *BlockFilter);
484 BlockAndTailDupResult selectBestSuccessor(
486 const BlockFilterSet *BlockFilter);
490 const BlockChain &PlacedChain,
492 const BlockFilterSet *BlockFilter);
501 const BlockFilterSet *BlockFilter);
504 BlockFilterSet *BlockFilter =
nullptr);
508 const BlockFilterSet &LoopBlockSet);
510 const BlockFilterSet &LoopBlockSet);
514 const BlockFilterSet &LoopBlockSet);
516 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
518 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
520 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet,
522 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
527 void rotateLoopWithProfile(
529 const BlockFilterSet &LoopBlockSet);
530 void buildCFGChains();
531 void optimizeBranches();
538 bool isProfitableToTailDup(
541 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
546 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
549 BlockAndTailDupResult getBestTrellisSuccessor(
553 const BlockFilterSet *BlockFilter);
556 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
562 bool canTailDuplicateUnplacedPreds(
564 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
568 void precomputeTriangleChains();
574 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
577 void createCFGChainExtTsp();
588 bool allowTailDupPlacement()
const {
612 "Branch Probability Basic Block Placement",
false,
false)
629 OS <<
" ('" <<
BB->getName() <<
"')";
641 void MachineBlockPlacement::markChainSuccessors(
643 const BlockFilterSet *BlockFilter) {
647 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
657 void MachineBlockPlacement::markBlockSuccessors(
665 if (BlockFilter && !BlockFilter->count(Succ))
667 BlockChain &SuccChain = *BlockToChain[Succ];
669 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
674 if (SuccChain.UnscheduledPredecessors == 0 ||
675 --SuccChain.UnscheduledPredecessors > 0)
678 auto *NewBB = *SuccChain.begin();
679 if (NewBB->isEHPad())
680 EHPadWorkList.push_back(NewBB);
682 BlockWorkList.push_back(NewBB);
692 const BlockFilterSet *BlockFilter,
712 bool SkipSucc =
false;
713 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
716 BlockChain *SuccChain = BlockToChain[Succ];
717 if (SuccChain == &Chain) {
719 }
else if (Succ != *SuccChain->begin()) {
721 <<
" -> Mid chain!\n");
728 Successors.push_back(Succ);
731 return AdjustedSumProb;
742 if (SuccProbN >= SuccProbD)
754 if (
BB.succ_size() != Successors.
size())
760 if (!Successors.
count(Succ))
774 if (
BB->succ_size() == 1)
789 return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
797 bool MachineBlockPlacement::isProfitableToTailDup(
800 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
827 auto AdjustedSuccSumProb =
828 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
830 auto BBFreq = MBFI->getBlockFreq(
BB);
831 auto SuccFreq = MBFI->getBlockFreq(Succ);
834 uint64_t EntryFreq = MBFI->getEntryFreq();
837 if (SuccSuccs.size() == 0)
844 if (Prob > BestSuccSucc)
856 if (SuccPred == Succ || SuccPred ==
BB
857 || BlockToChain[SuccPred] == &Chain
858 || (BlockFilter && !BlockFilter->count(SuccPred)))
860 auto Freq = MBFI->getBlockFreq(SuccPred)
862 if (Freq > SuccBestPred)
930 if (UProb > AdjustedSuccSumProb / 2 &&
931 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
939 (Qout +
std::min(Qin,
F) * AdjustedSuccSumProb +
951 bool MachineBlockPlacement::isTrellis(
954 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
957 if (
BB->succ_size() != 2 || ViableSuccs.size() != 2)
969 if (Successors.count(SuccPred)) {
972 if (!Successors.count(CheckSucc))
976 const BlockChain *PredChain = BlockToChain[SuccPred];
977 if (SuccPred ==
BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
978 PredChain == &Chain || PredChain == BlockToChain[Succ])
982 if (!SeenPreds.
insert(SuccPred).second)
1000 std::pair<MachineBlockPlacement::WeightedEdge,
1001 MachineBlockPlacement::WeightedEdge>
1002 MachineBlockPlacement::getBestNonConflictingEdges(
1013 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1017 auto BestA = Edges[0].begin();
1018 auto BestB = Edges[1].begin();
1021 if (BestA->Src == BestB->Src) {
1023 auto SecondBestA = std::next(BestA);
1024 auto SecondBestB = std::next(BestB);
1027 if (BestAScore < BestBScore)
1028 BestA = SecondBestA;
1030 BestB = SecondBestB;
1033 if (BestB->Src ==
BB)
1035 return std::make_pair(*BestA, *BestB);
1045 MachineBlockPlacement::BlockAndTailDupResult
1046 MachineBlockPlacement::getBestTrellisSuccessor(
1050 const BlockFilterSet *BlockFilter) {
1052 BlockAndTailDupResult
Result = {
nullptr,
false};
1059 if (Successors.size() != 2 || ViableSuccs.size() != 2)
1065 for (
auto Succ : ViableSuccs) {
1069 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1070 BlockToChain[SuccPred] == &Chain ||
1071 BlockToChain[SuccPred] == BlockToChain[Succ])
1075 Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
1081 WeightedEdge BestA, BestB;
1082 std::tie(BestA, BestB) = getBestNonConflictingEdges(
BB, Edges);
1084 if (BestA.Src !=
BB) {
1088 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1095 if (BestA.Dest == BestB.Src) {
1101 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1102 canTailDuplicateUnplacedPreds(
BB, Succ2, Chain, BlockFilter) &&
1104 Chain, BlockFilter)) {
1108 <<
", probability: " << Succ2Prob
1109 <<
" (Tail Duplicate)\n");
1111 Result.ShouldTailDup =
true;
1117 ComputedEdges[BestB.Src] = { BestB.Dest,
false };
1119 auto TrellisSucc = BestA.Dest;
1123 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1131 bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1133 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1134 if (!shouldTailDuplicate(Succ))
1138 bool Duplicate =
true;
1140 unsigned int NumDup = 0;
1149 if (Pred ==
BB || (BlockFilter && !BlockFilter->count(Pred))
1150 || BlockToChain[Pred] == &Chain)
1200 if (
F->getFunction().hasProfileData())
1231 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1254 void MachineBlockPlacement::precomputeTriangleChains() {
1255 struct TriangleChain {
1256 std::vector<MachineBasicBlock *> Edges;
1259 : Edges({src, dst}) {}
1262 assert(getKey()->isSuccessor(dst) &&
1263 "Attempting to append a block that is not a successor.");
1264 Edges.push_back(dst);
1267 unsigned count()
const {
return Edges.size() - 1; }
1270 return Edges.
back();
1283 if (
BB.succ_size() != 2)
1294 if (PDom ==
nullptr)
1301 if (!shouldTailDuplicate(PDom))
1303 bool CanTailDuplicate =
true;
1310 CanTailDuplicate =
false;
1316 if (!CanTailDuplicate)
1323 auto Found = TriangleChainMap.
find(&
BB);
1326 if (Found != TriangleChainMap.
end()) {
1327 TriangleChain Chain =
std::move(Found->second);
1328 TriangleChainMap.
erase(Found);
1330 TriangleChainMap.
insert(std::make_pair(Chain.getKey(),
std::move(Chain)));
1332 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &
BB, PDom);
1333 assert(InsertResult.second &&
"Block seen twice.");
1341 for (
auto &ChainPair : TriangleChainMap) {
1342 TriangleChain &Chain = ChainPair.second;
1349 Chain.Edges.pop_back();
1353 <<
" as pre-computed based on triangles.\n");
1355 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1356 assert(InsertResult.second &&
"Block seen twice.");
1368 if (!
BB->getParent()->getFunction().hasProfileData())
1370 if (
BB->succ_size() == 2) {
1398 bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1402 const BlockFilterSet *BlockFilter) {
1405 if (SuccChain.UnscheduledPredecessors == 0)
1526 bool BadCFGConflict =
false;
1529 BlockChain *PredChain = BlockToChain[Pred];
1530 if (Pred == Succ || PredChain == &SuccChain ||
1531 (BlockFilter && !BlockFilter->count(Pred)) ||
1532 PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
1553 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1554 BadCFGConflict =
true;
1559 if (BadCFGConflict) {
1561 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1578 MachineBlockPlacement::BlockAndTailDupResult
1579 MachineBlockPlacement::selectBestSuccessor(
1581 const BlockFilterSet *BlockFilter) {
1584 BlockAndTailDupResult BestSucc = {
nullptr,
false };
1588 auto AdjustedSumProb =
1589 collectViableSuccessors(
BB, Chain, BlockFilter, Successors);
1596 auto FoundEdge = ComputedEdges.
find(
BB);
1597 if (FoundEdge != ComputedEdges.
end()) {
1599 ComputedEdges.
erase(FoundEdge);
1600 BlockChain *SuccChain = BlockToChain[Succ];
1601 if (
BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1602 SuccChain != &Chain && Succ == *SuccChain->begin())
1603 return FoundEdge->second;
1608 if (isTrellis(
BB, Successors, Chain, BlockFilter))
1609 return getBestTrellisSuccessor(
BB, Successors, AdjustedSumProb, Chain,
1622 BlockChain &SuccChain = *BlockToChain[Succ];
1625 if (hasBetterLayoutPredecessor(
BB, Succ, SuccChain, SuccProb, RealSuccProb,
1626 Chain, BlockFilter)) {
1628 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1635 <<
", probability: " << SuccProb
1636 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1639 if (BestSucc.BB && BestProb >= SuccProb) {
1646 BestProb = SuccProb;
1654 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1655 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1656 return std::get<0>(L) > std::get<0>(R);
1658 for (
auto &Tup : DupCandidates) {
1661 std::tie(DupProb, Succ) = Tup;
1662 if (DupProb < BestProb)
1664 if (canTailDuplicateUnplacedPreds(
BB, Succ, Chain, BlockFilter)
1665 && (isProfitableToTailDup(
BB, Succ, BestProb, Chain, BlockFilter))) {
1667 <<
", probability: " << DupProb
1668 <<
" (Tail Duplicate)\n");
1670 BestSucc.ShouldTailDup =
true;
1698 return BlockToChain.
lookup(
BB) == &Chain;
1701 if (WorkList.empty())
1704 bool IsEHPad = WorkList[0]->isEHPad();
1710 "EHPad mismatch between block and work list.");
1712 BlockChain &SuccChain = *BlockToChain[
MBB];
1713 if (&SuccChain == &Chain)
1716 assert(SuccChain.UnscheduledPredecessors == 0 &&
1717 "Found CFG-violating block");
1721 MBFI->printBlockFreq(
dbgs(), CandidateFreq) <<
" (freq)\n");
1741 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1745 BestFreq = CandidateFreq;
1759 const BlockChain &PlacedChain,
1761 const BlockFilterSet *BlockFilter) {
1764 if (BlockFilter && !BlockFilter->count(&*
I))
1766 if (BlockToChain[&*
I] != &PlacedChain) {
1767 PrevUnplacedBlockIt =
I;
1771 return *BlockToChain[&*
I]->
begin();
1777 void MachineBlockPlacement::fillWorkLists(
1780 const BlockFilterSet *BlockFilter =
nullptr) {
1781 BlockChain &Chain = *BlockToChain[
MBB];
1782 if (!UpdatedPreds.
insert(&Chain).second)
1786 Chain.UnscheduledPredecessors == 0 &&
1787 "Attempting to place block with unscheduled predecessors in worklist.");
1789 assert(BlockToChain[ChainBB] == &Chain &&
1790 "Block in chain doesn't match BlockToChain map.");
1792 if (BlockFilter && !BlockFilter->count(Pred))
1794 if (BlockToChain[Pred] == &Chain)
1796 ++Chain.UnscheduledPredecessors;
1800 if (Chain.UnscheduledPredecessors != 0)
1805 EHPadWorkList.push_back(
BB);
1807 BlockWorkList.push_back(
BB);
1810 void MachineBlockPlacement::buildChain(
1812 BlockFilterSet *BlockFilter) {
1813 assert(HeadBB &&
"BB must not be null.\n");
1814 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1818 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1821 assert(
BB &&
"null block found at end of chain in loop.");
1822 assert(BlockToChain[
BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1823 assert(*std::prev(Chain.end()) ==
BB &&
"BB Not found at end of chain.");
1828 auto Result = selectBestSuccessor(
BB, Chain, BlockFilter);
1830 bool ShouldTailDup =
Result.ShouldTailDup;
1831 if (allowTailDupPlacement())
1832 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
BB, BestSucc,
1840 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1842 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1845 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
1849 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1850 "layout successor until the CFG reduces\n");
1855 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1856 repeatedlyTailDuplicateBlock(BestSucc,
BB, LoopHeaderBB, Chain,
1857 BlockFilter, PrevUnplacedBlockIt);
1860 if (!
BB->isSuccessor(BestSucc))
1865 BlockChain &SuccChain = *BlockToChain[BestSucc];
1868 SuccChain.UnscheduledPredecessors = 0;
1871 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1872 Chain.merge(BestSucc, &SuccChain);
1873 BB = *std::prev(Chain.end());
1895 MachineBlockPlacement::canMoveBottomBlockToTop(
1905 if (OtherBB == BottomBlock)
1907 if (OtherBB == OldTop)
1915 MachineBlockPlacement::TopFallThroughFreq(
1917 const BlockFilterSet &LoopBlockSet) {
1920 BlockChain *PredChain = BlockToChain[Pred];
1921 if (!LoopBlockSet.count(Pred) &&
1922 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1929 BlockChain *SuccChain = BlockToChain[Succ];
1932 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1933 (!SuccChain || Succ == *SuccChain->begin())) {
1941 if (EdgeFreq > MaxFreq)
1971 MachineBlockPlacement::FallThroughGains(
1975 const BlockFilterSet &LoopBlockSet) {
1976 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
1979 FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
1988 if (!LoopBlockSet.count(Pred))
1990 BlockChain *PredChain = BlockToChain[Pred];
1991 if (!PredChain || Pred == *std::prev(PredChain->end())) {
1994 if (EdgeFreq > FallThroughFromPred) {
1995 FallThroughFromPred = EdgeFreq;
2006 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2008 if (ComputedEdges.
find(Succ) != ComputedEdges.
end())
2010 BlockChain *SuccChain = BlockToChain[Succ];
2011 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2012 (SuccChain == BlockToChain[BestPred]))
2016 if (EdgeFreq > NewFreq)
2021 if (NewFreq > OrigEdgeFreq) {
2026 FallThroughFromPred = 0;
2033 FallThroughFromPred;
2062 MachineBlockPlacement::findBestLoopTopHelper(
2065 const BlockFilterSet &LoopBlockSet) {
2069 BlockChain &HeaderChain = *BlockToChain[OldTop];
2070 if (!LoopBlockSet.count(*HeaderChain.begin()))
2072 if (OldTop != *HeaderChain.begin())
2081 if (!LoopBlockSet.count(Pred))
2086 << Pred->
succ_size() <<
" successors, ";
2087 MBFI->printBlockFreq(
dbgs(), Pred) <<
" freq\n");
2094 if (OtherBB == OldTop)
2098 if (!canMoveBottomBlockToTop(Pred, OldTop))
2103 if ((Gains > 0) && (Gains > BestGains ||
2118 (*BestPred->
pred_begin())->succ_size() == 1 &&
2131 MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2132 const BlockFilterSet &LoopBlockSet) {
2140 bool OptForSize =
F->getFunction().hasOptSize() ||
2147 while (NewTop != OldTop) {
2149 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2150 if (NewTop != OldTop)
2151 ComputedEdges[NewTop] = { OldTop,
false };
2162 MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2163 const BlockFilterSet &LoopBlockSet,
2173 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
2174 if (!LoopBlockSet.count(*HeaderChain.begin()))
2178 unsigned BestExitLoopDepth = 0;
2188 BlockChain &Chain = *BlockToChain[
MBB];
2191 if (
MBB != *std::prev(Chain.end()))
2200 bool HasLoopingSucc =
false;
2206 BlockChain &SuccChain = *BlockToChain[Succ];
2208 if (&Chain == &SuccChain) {
2215 if (LoopBlockSet.count(Succ)) {
2218 HasLoopingSucc =
true;
2222 unsigned SuccLoopDepth = 0;
2224 SuccLoopDepth = ExitLoop->getLoopDepth();
2225 if (ExitLoop->contains(&L))
2233 MBFI->printBlockFreq(
dbgs(), ExitEdgeFreq) <<
")\n");
2239 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2240 ExitEdgeFreq > BestExitEdgeFreq ||
2242 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2243 BestExitEdgeFreq = ExitEdgeFreq;
2248 if (!HasLoopingSucc) {
2250 ExitingBB = OldExitingBB;
2251 BestExitEdgeFreq = OldBestExitEdgeFreq;
2258 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2262 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2269 if (!BlocksExitingToOuterLoop.
empty() &&
2270 !BlocksExitingToOuterLoop.
count(ExitingBB))
2275 ExitFreq = BestExitEdgeFreq;
2284 MachineBlockPlacement::hasViableTopFallthrough(
2286 const BlockFilterSet &LoopBlockSet) {
2288 BlockChain *PredChain = BlockToChain[Pred];
2289 if (!LoopBlockSet.count(Pred) &&
2290 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2297 BlockChain *SuccChain = BlockToChain[Succ];
2300 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2318 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2321 const BlockFilterSet &LoopBlockSet) {
2329 if (Bottom == ExitingBB)
2336 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2341 if (ViableTopFallthrough) {
2343 BlockChain *SuccChain = BlockToChain[Succ];
2344 if (!LoopBlockSet.count(Succ) &&
2345 (!SuccChain || Succ == *SuccChain->begin()))
2351 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2352 if (FallThrough2Top >= ExitFreq)
2356 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2357 if (ExitIt == LoopChain.end())
2379 if (ViableTopFallthrough) {
2380 assert(std::next(ExitIt) != LoopChain.end() &&
2381 "Exit should not be last BB");
2390 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2406 void MachineBlockPlacement::rotateLoopWithProfile(
2408 const BlockFilterSet &LoopBlockSet) {
2409 auto RotationPos = LoopChain.end();
2434 BlockChain *PredChain = BlockToChain[Pred];
2435 if (!LoopBlockSet.count(Pred) &&
2436 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2437 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2439 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2443 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2444 HeaderFallThroughCost =
std::max(HeaderFallThroughCost, FallThruCost);
2453 for (
auto BB : LoopChain) {
2455 for (
auto *Succ :
BB->successors()) {
2456 BlockChain *SuccChain = BlockToChain[Succ];
2457 if (!LoopBlockSet.count(Succ) &&
2458 (!SuccChain || Succ == *SuccChain->begin())) {
2460 LargestExitEdgeProb =
std::max(LargestExitEdgeProb, SuccProb);
2464 auto ExitFreq = MBFI->getBlockFreq(
BB) * LargestExitEdgeProb;
2472 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2473 EndIter = LoopChain.end();
2474 Iter != EndIter; Iter++, TailIter++) {
2477 if (TailIter == LoopChain.end())
2478 TailIter = LoopChain.begin();
2480 auto TailBB = *TailIter;
2488 if (Iter != LoopChain.begin())
2489 Cost += HeaderFallThroughCost;
2493 for (
auto &ExitWithFreq : ExitsWithFreq)
2494 if (TailBB != ExitWithFreq.first)
2495 Cost += ExitWithFreq.second;
2511 if (TailBB->isSuccessor(*Iter)) {
2512 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2513 if (TailBB->succ_size() == 1)
2514 Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
2516 else if (TailBB->succ_size() == 2) {
2518 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2520 ? TailBBFreq * TailToHeadProb.
getCompl()
2522 Cost += ScaleBlockFrequency(TailToHeadFreq,
MisfetchCost) +
2531 if (Cost < SmallestRotationCost) {
2532 SmallestRotationCost = Cost;
2537 if (RotationPos != LoopChain.end()) {
2539 <<
" to the top\n");
2540 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2549 MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2550 BlockFilterSet LoopBlockSet;
2565 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2569 if (LoopBlockSet.count(LoopBB))
2574 BlockChain *Chain = BlockToChain[LoopBB];
2576 LoopBlockSet.
insert(ChainBB);
2581 return LoopBlockSet;
2590 void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2594 buildLoopChains(*InnerLoop);
2596 assert(BlockWorkList.empty() &&
2597 "BlockWorkList not empty when starting to build loop chains.");
2598 assert(EHPadWorkList.empty() &&
2599 "EHPadWorkList not empty when starting to build loop chains.");
2600 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2605 bool RotateLoopWithProfile =
2621 PreferredLoopExit =
nullptr;
2623 if (!RotateLoopWithProfile && LoopTop == L.getHeader())
2624 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2626 BlockChain &LoopChain = *BlockToChain[LoopTop];
2632 assert(LoopChain.UnscheduledPredecessors == 0 &&
2633 "LoopChain should not have unscheduled predecessors.");
2634 UpdatedPreds.
insert(&LoopChain);
2637 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2639 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2641 if (RotateLoopWithProfile)
2642 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2644 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2648 bool BadLoop =
false;
2649 if (LoopChain.UnscheduledPredecessors) {
2651 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2652 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n"
2653 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2657 if (!LoopBlockSet.remove(ChainBB)) {
2661 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2662 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
2663 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2668 if (!LoopBlockSet.empty()) {
2671 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2672 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
2673 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2676 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2679 BlockWorkList.
clear();
2680 EHPadWorkList.
clear();
2683 void MachineBlockPlacement::buildCFGChains() {
2691 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain,
BB);
2704 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2705 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2708 Chain->merge(NextBB,
nullptr);
2710 BlocksWithUnanalyzableExits.
insert(&*
BB);
2718 PreferredLoopExit =
nullptr;
2720 buildLoopChains(*L);
2722 assert(BlockWorkList.empty() &&
2723 "BlockWorkList should be empty before building final chain.");
2724 assert(EHPadWorkList.empty() &&
2725 "EHPadWorkList should be empty before building final chain.");
2729 fillWorkLists(&
MBB, UpdatedPreds);
2731 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2732 buildChain(&
F->front(), FunctionChain);
2739 bool BadFunc =
false;
2740 FunctionBlockSetType FunctionBlockSet;
2742 FunctionBlockSet.insert(&
MBB);
2745 if (!FunctionBlockSet.erase(ChainBB)) {
2747 dbgs() <<
"Function chain contains a block not in the function!\n"
2748 <<
" Bad block: " << getBlockName(ChainBB) <<
"\n";
2751 if (!FunctionBlockSet.empty()) {
2753 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2754 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2755 <<
" Bad block: " << getBlockName(RemainingBB) <<
"\n";
2757 assert(!BadFunc &&
"Detected problems with the block placement.");
2763 F->getNumBlockIDs());
2766 for (
auto &
MBB : *
F) {
2767 if (LastMBB !=
nullptr)
2771 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2778 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2782 F->splice(InsertPos, ChainBB);
2787 if (ChainBB == *FunctionChain.begin())
2798 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2804 "Unexpected block with un-analyzable fallthrough!");
2806 TBB = FBB =
nullptr;
2844 BlockWorkList.
clear();
2845 EHPadWorkList.
clear();
2848 void MachineBlockPlacement::optimizeBranches() {
2849 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2864 if (TBB && !
Cond.empty() && FBB &&
2881 void MachineBlockPlacement::alignBlocks() {
2887 if (
F->getFunction().hasMinSize() ||
2890 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2891 if (FunctionChain.begin() == FunctionChain.end())
2898 if (ChainBB == *FunctionChain.begin())
2916 if (Freq < WeightedEntryFreq)
2923 if (Freq < (LoopHeaderFreq * ColdProb))
2936 auto DetermineMaxAlignmentPadding = [&]() {
2943 ChainBB->setMaxBytesForAlignment(MaxBytes);
2949 ChainBB->setAlignment(
Align);
2950 DetermineMaxAlignmentPadding();
2960 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
2961 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
2962 ChainBB->setAlignment(
Align);
2963 DetermineMaxAlignmentPadding();
2983 bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
2986 BlockChain &Chain, BlockFilterSet *BlockFilter,
2988 bool Removed, DuplicatedToLPred;
2989 bool DuplicatedToOriginalLPred;
2990 Removed = maybeTailDuplicateBlock(
BB, LPred, Chain, BlockFilter,
2991 PrevUnplacedBlockIt,
2995 DuplicatedToOriginalLPred = DuplicatedToLPred;
3000 while (DuplicatedToLPred && Removed) {
3007 BlockChain::iterator ChainEnd = Chain.end();
3008 DupBB = *(--ChainEnd);
3010 if (ChainEnd == Chain.begin())
3012 DupPred = *std::prev(ChainEnd);
3013 Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
3014 PrevUnplacedBlockIt,
3022 LPred = *std::prev(Chain.end());
3023 if (DuplicatedToOriginalLPred)
3024 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3041 bool MachineBlockPlacement::maybeTailDuplicateBlock(
3043 BlockChain &Chain, BlockFilterSet *BlockFilter,
3045 bool &DuplicatedToLPred) {
3046 DuplicatedToLPred =
false;
3047 if (!shouldTailDuplicate(
BB))
3050 LLVM_DEBUG(
dbgs() <<
"Redoing tail duplication for Succ#" <<
BB->getNumber()
3055 bool Removed =
false;
3056 auto RemovalCallback =
3062 bool InWorkList =
true;
3064 if (BlockToChain.
count(RemBB)) {
3065 BlockChain *Chain = BlockToChain[RemBB];
3066 InWorkList = Chain->UnscheduledPredecessors == 0;
3067 Chain->remove(RemBB);
3068 BlockToChain.
erase(RemBB);
3072 if (&(*PrevUnplacedBlockIt) == RemBB) {
3073 PrevUnplacedBlockIt++;
3079 if (RemBB->isEHPad())
3080 RemoveList = EHPadWorkList;
3086 BlockFilter->remove(RemBB);
3090 MLI->removeBlock(RemBB);
3091 if (RemBB == PreferredLoopExit)
3092 PreferredLoopExit =
nullptr;
3097 auto RemovalCallbackRef =
3104 if (
F->getFunction().hasProfileData()) {
3106 findDuplicateCandidates(CandidatePreds,
BB, BlockFilter);
3107 if (CandidatePreds.size() == 0)
3109 if (CandidatePreds.size() <
BB->pred_size())
3110 CandidatePtr = &CandidatePreds;
3113 &RemovalCallbackRef, CandidatePtr);
3116 DuplicatedToLPred =
false;
3119 BlockChain* PredChain = BlockToChain[Pred];
3121 DuplicatedToLPred =
true;
3122 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
3123 || PredChain == &Chain)
3126 if (BlockFilter && !BlockFilter->count(NewSucc))
3128 BlockChain *NewChain = BlockToChain[NewSucc];
3129 if (NewChain != &Chain && NewChain != PredChain)
3130 NewChain->UnscheduledPredecessors++;
3140 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3156 BlockFilterSet *BlockFilter) {
3159 if (BlockFilter && !BlockFilter->count(Pred))
3161 BlockChain *PredChain = BlockToChain[Pred];
3162 if (PredChain && (Pred != *std::prev(PredChain->end())))
3169 if (BlockFilter && !BlockFilter->count(Succ))
3171 BlockChain *SuccChain = BlockToChain[Succ];
3172 if (SuccChain && (Succ != *SuccChain->begin()))
3175 if (SuccProb > BestProb)
3176 BestProb = SuccProb;
3180 if (BBProb <= BestProb)
3187 return Gain > scaleThreshold(
BB);
3192 void MachineBlockPlacement::findDuplicateCandidates(
3195 BlockFilterSet *BlockFilter) {
3207 return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(
B);
3212 auto SuccIt = Succs.begin();
3213 if (SuccIt != Succs.end()) {
3264 if (!Fallthrough && isBestSuccessor(
BB, Pred, BlockFilter)) {
3266 if (SuccIt != Succs.end())
3272 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3274 if (SuccIt == Succs.end()) {
3276 if (Succs.size() > 0)
3277 DupCost += PredFreq;
3280 DupCost += PredFreq;
3284 assert(OrigCost >= DupCost);
3285 OrigCost -= DupCost;
3286 if (OrigCost > BBDupThreshold) {
3287 Candidates.push_back(Pred);
3288 if (SuccIt != Succs.end())
3296 if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {
3297 Candidates[0] = Candidates.back();
3298 Candidates.pop_back();
3303 void MachineBlockPlacement::initDupThreshold() {
3305 if (!
F->getFunction().hasProfileData())
3311 UseProfileCount =
true;
3325 DupThreshold = MaxFreq * ThresholdProb;
3326 UseProfileCount =
false;
3329 bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
3334 if (std::next(MF.
begin()) == MF.
end())
3338 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3339 MBFI = std::make_unique<MBFIWrapper>(
3340 getAnalysis<MachineBlockFrequencyInfo>());
3341 MLI = &getAnalysis<MachineLoopInfo>();
3345 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3351 PreferredLoopExit =
nullptr;
3354 "BlockToChain map should be empty before starting placement.");
3356 "Computed Edge map should be empty before starting placement.");
3381 TailDupSize =
TII->getTailDuplicateSize(PassConfig->
getOptLevel());
3383 if (allowTailDupPlacement()) {
3384 MPDT = &getAnalysis<MachinePostDominatorTree>();
3389 bool PreRegAlloc =
false;
3390 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3392 precomputeTriangleChains();
3404 if (MF.
size() > 3 && EnableTailMerge) {
3412 BlockToChain.
clear();
3413 ComputedEdges.
clear();
3429 createCFGChainExtTsp();
3435 BlockToChain.
clear();
3436 ComputedEdges.
clear();
3439 bool HasMaxBytesOverride =
3445 if (HasMaxBytesOverride)
3454 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
3455 auto LayoutPred = std::prev(MBI);
3457 if (HasMaxBytesOverride)
3468 MBFI->view(
"MBP." + MF.getName(),
false);
3476 void MachineBlockPlacement::applyExtTsp() {
3480 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3481 CurrentBlockOrder.reserve(
F->size());
3482 size_t NumBlocks = 0;
3484 BlockIndex[&
MBB] = NumBlocks++;
3485 CurrentBlockOrder.push_back(&
MBB);
3488 auto BlockSizes = std::vector<uint64_t>(
F->size());
3489 auto BlockCounts = std::vector<uint64_t>(
F->size());
3503 int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3504 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3509 auto Edge = std::make_pair(BlockIndex[&
MBB], BlockIndex[Succ]);
3514 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3515 <<
" with profile = " <<
F->getFunction().hasProfileData()
3516 <<
" (" <<
F->getName().str() <<
")"
3519 dbgs() <<
format(
" original layout score: %0.2f\n",
3524 std::vector<const MachineBasicBlock *> NewBlockOrder;
3525 NewBlockOrder.reserve(
F->size());
3527 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3534 assignBlockOrder(NewBlockOrder);
3537 void MachineBlockPlacement::assignBlockOrder(
3538 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3539 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3540 F->RenumberBlocks();
3542 bool HasChanges =
false;
3543 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3544 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3554 for (
auto &
MBB : *
F) {
3561 NewIndex[
MBB] = NewIndex.
size();
3564 return NewIndex[&L] < NewIndex[&
R];
3571 for (
auto &
MBB : *
F) {
3578 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3592 F->verify(
this,
"After optimized block reordering");
3596 void MachineBlockPlacement::createCFGChainExtTsp() {
3597 BlockToChain.
clear();
3598 ComputedEdges.
clear();
3602 BlockChain *FunctionChain =
3603 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3608 FunctionChain->merge(&
MBB,
nullptr);
3651 "Basic Block Placement Stats",
false,
false)
3657 bool MachineBlockPlacementStats::runOnMachineFunction(
MachineFunction &
F) {
3659 if (std::next(
F.begin()) ==
F.end())
3662 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3663 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
3668 (
MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3670 (
MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;