78#define DEBUG_TYPE "block-placement"
80STATISTIC(NumCondBranches,
"Number of conditional branches");
81STATISTIC(NumUncondBranches,
"Number of unconditional branches");
83 "Potential frequency of taking conditional branches");
85 "Potential frequency of taking unconditional branches");
89 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
90 "(e.g 4 means align on 16B boundaries)."),
94 "align-all-nofallthru-blocks",
95 cl::desc(
"Force the alignment of all blocks that have no fall-through "
96 "predecessors (i.e. don't add nops that are executed). In log2 "
97 "format (e.g 4 means align on 16B boundaries)."),
101 "max-bytes-for-alignment",
102 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-exit-block-bias",
109 cl::desc(
"Block frequency percentage a loop exit block needs "
110 "over the original exit to be considered the new exit."),
117 "loop-to-cold-block-ratio",
118 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
119 "(frequency of block) is greater than this ratio"),
123 "force-loop-cold-block",
124 cl::desc(
"Force outlining cold blocks from loops."),
129 cl::desc(
"Model the cost of loop rotation more "
130 "precisely by using profile data."),
135 cl::desc(
"Force the use of precise cost "
136 "loop rotation strategy."),
141 cl::desc(
"Cost that models the probabilistic risk of an instruction "
142 "misfetch due to a jump comparing to falling through, whose cost "
147 cl::desc(
"Cost of jump instructions."),
151 cl::desc(
"Perform tail duplication during placement. "
152 "Creates more fallthrough opportunites in "
153 "outline branches."),
158 cl::desc(
"Perform branch folding during placement. "
159 "Reduces code size."),
164 "tail-dup-placement-threshold",
165 cl::desc(
"Instruction cutoff for tail duplication during layout. "
166 "Tail merging during layout is forced to have a threshold "
167 "that won't conflict."),
cl::init(2),
172 "tail-dup-placement-aggressive-threshold",
173 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
174 "layout. Used at -O3. Tail merging during layout is forced to "
175 "have a threshold that won't conflict."),
cl::init(4),
180 "tail-dup-placement-penalty",
181 cl::desc(
"Cost penalty for blocks that can avoid breaking CFG by copying. "
182 "Copying can increase fallthrough, but it also increases icache "
183 "pressure. This parameter controls the penalty to account for that. "
184 "Percent as integer."),
190 "tail-dup-profile-percent-threshold",
191 cl::desc(
"If profile count information is used in tail duplication cost "
192 "model, the gained fall through number from tail duplication "
193 "should be at least this percent of hot count."),
198 "triangle-chain-count",
199 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
200 "triangle tail duplication heuristic to kick in. 0 to disable."),
210 "renumber-blocks-before-view",
212 "If true, basic blocks are re-numbered before MBP layout is printed "
213 "into a dot graph. Only used when a function is being printed."),
263 BlockToChainMapType &BlockToChain;
272 :
Blocks(1, BB), BlockToChain(BlockToChain) {
273 assert(BB &&
"Cannot create a chain with a null basic block");
274 BlockToChain[BB] =
this;
282 iterator begin() {
return Blocks.begin(); }
286 iterator end() {
return Blocks.end(); }
290 for(iterator i = begin(); i != end(); ++i) {
306 assert(BB &&
"Can't merge a null block.");
307 assert(!
Blocks.empty() &&
"Can't merge into an empty chain.");
311 assert(!BlockToChain[BB] &&
312 "Passed chain is null, but BB has entry in BlockToChain.");
314 BlockToChain[BB] =
this;
318 assert(BB == *Chain->
begin() &&
"Passed BB is not head of Chain.");
319 assert(Chain->begin() != Chain->end());
324 Blocks.push_back(ChainBB);
325 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
326 BlockToChain[ChainBB] =
this;
347 unsigned UnscheduledPredecessors = 0;
355 struct BlockAndTailDupResult {
361 struct WeightedEdge {
381 std::unique_ptr<MBFIWrapper> MBFI;
414 bool UseProfileCount =
false;
444 if (UseProfileCount) {
445 auto Count = MBFI->getBlockProfileCount(BB);
451 return MBFI->getBlockFreq(BB);
456 void initDupThreshold();
460 void markChainSuccessors(
462 const BlockFilterSet *BlockFilter =
nullptr);
466 void markBlockSuccessors(
469 const BlockFilterSet *BlockFilter =
nullptr);
472 collectViableSuccessors(
474 const BlockFilterSet *BlockFilter,
477 BlockFilterSet *BlockFilter);
480 BlockFilterSet *BlockFilter);
481 bool repeatedlyTailDuplicateBlock(
484 BlockChain &Chain, BlockFilterSet *BlockFilter,
486 bool maybeTailDuplicateBlock(
488 BlockChain &Chain, BlockFilterSet *BlockFilter,
490 bool &DuplicatedToLPred);
491 bool hasBetterLayoutPredecessor(
495 const BlockFilterSet *BlockFilter);
496 BlockAndTailDupResult selectBestSuccessor(
498 const BlockFilterSet *BlockFilter);
502 const BlockChain &PlacedChain,
504 const BlockFilterSet *BlockFilter);
513 const BlockFilterSet *BlockFilter);
516 BlockFilterSet *BlockFilter =
nullptr);
520 const BlockFilterSet &LoopBlockSet);
522 const BlockFilterSet &LoopBlockSet);
526 const BlockFilterSet &LoopBlockSet);
528 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
530 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
532 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet,
534 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
539 void rotateLoopWithProfile(
541 const BlockFilterSet &LoopBlockSet);
542 void buildCFGChains();
543 void optimizeBranches();
550 bool isProfitableToTailDup(
553 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
558 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
561 BlockAndTailDupResult getBestTrellisSuccessor(
565 const BlockFilterSet *BlockFilter);
568 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
574 bool canTailDuplicateUnplacedPreds(
576 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
580 void precomputeTriangleChains();
586 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
589 void createCFGChainExtTsp();
600 bool allowTailDupPlacement()
const {
619char MachineBlockPlacement::ID = 0;
624 "Branch Probability Basic Block Placement",
false,
false)
653void MachineBlockPlacement::markChainSuccessors(
655 const BlockFilterSet *BlockFilter) {
659 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
669void MachineBlockPlacement::markBlockSuccessors(
677 if (BlockFilter && !BlockFilter->count(Succ))
679 BlockChain &SuccChain = *BlockToChain[Succ];
681 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
686 if (SuccChain.UnscheduledPredecessors == 0 ||
687 --SuccChain.UnscheduledPredecessors > 0)
690 auto *NewBB = *SuccChain.
begin();
691 if (NewBB->isEHPad())
704 const BlockFilterSet *BlockFilter,
724 bool SkipSucc =
false;
725 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
728 BlockChain *SuccChain = BlockToChain[Succ];
729 if (SuccChain == &Chain) {
731 }
else if (Succ != *SuccChain->begin()) {
733 <<
" -> Mid chain!\n");
743 return AdjustedSumProb;
754 if (SuccProbN >= SuccProbD)
769 if (Successors.
count(&BB))
772 if (!Successors.
count(Succ))
801 return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
809bool MachineBlockPlacement::isProfitableToTailDup(
812 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
839 auto AdjustedSuccSumProb =
840 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
842 auto BBFreq = MBFI->getBlockFreq(BB);
843 auto SuccFreq = MBFI->getBlockFreq(Succ);
846 uint64_t EntryFreq = MBFI->getEntryFreq();
849 if (SuccSuccs.
size() == 0)
856 if (Prob > BestSuccSucc)
868 if (SuccPred == Succ || SuccPred == BB
869 || BlockToChain[SuccPred] == &Chain
870 || (BlockFilter && !BlockFilter->count(SuccPred)))
872 auto Freq = MBFI->getBlockFreq(SuccPred)
874 if (Freq > SuccBestPred)
942 if (UProb > AdjustedSuccSumProb / 2 &&
943 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
947 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
951 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
952 std::max(Qin,
F) * UProb),
963bool MachineBlockPlacement::isTrellis(
966 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
981 if (Successors.count(SuccPred)) {
984 if (!Successors.count(CheckSucc))
988 const BlockChain *PredChain = BlockToChain[SuccPred];
989 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
990 PredChain == &Chain || PredChain == BlockToChain[Succ])
994 if (!SeenPreds.
insert(SuccPred).second)
1012std::pair<MachineBlockPlacement::WeightedEdge,
1013 MachineBlockPlacement::WeightedEdge>
1014MachineBlockPlacement::getBestNonConflictingEdges(
1025 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1029 auto BestA = Edges[0].begin();
1030 auto BestB = Edges[1].begin();
1033 if (BestA->Src == BestB->Src) {
1035 auto SecondBestA = std::next(BestA);
1036 auto SecondBestB = std::next(BestB);
1039 if (BestAScore < BestBScore)
1040 BestA = SecondBestA;
1042 BestB = SecondBestB;
1045 if (BestB->Src == BB)
1047 return std::make_pair(*BestA, *BestB);
1057MachineBlockPlacement::BlockAndTailDupResult
1058MachineBlockPlacement::getBestTrellisSuccessor(
1062 const BlockFilterSet *BlockFilter) {
1064 BlockAndTailDupResult
Result = {
nullptr,
false};
1071 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1077 for (
auto *Succ : ViableSuccs) {
1081 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1082 BlockToChain[SuccPred] == &Chain ||
1083 BlockToChain[SuccPred] == BlockToChain[Succ])
1087 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1093 WeightedEdge BestA, BestB;
1094 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1096 if (BestA.Src != BB) {
1100 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1107 if (BestA.Dest == BestB.Src) {
1113 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1114 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1116 Chain, BlockFilter)) {
1120 <<
", probability: " << Succ2Prob
1121 <<
" (Tail Duplicate)\n");
1123 Result.ShouldTailDup =
true;
1129 ComputedEdges[BestB.Src] = { BestB.Dest,
false };
1131 auto TrellisSucc = BestA.Dest;
1135 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1143bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1145 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1146 if (!shouldTailDuplicate(Succ))
1150 bool Duplicate =
true;
1152 unsigned int NumDup = 0;
1161 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
1162 || (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1212 if (
F->getFunction().hasProfileData())
1243 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1266void MachineBlockPlacement::precomputeTriangleChains() {
1267 struct TriangleChain {
1268 std::vector<MachineBasicBlock *> Edges;
1271 : Edges({src, dst}) {}
1274 assert(getKey()->isSuccessor(dst) &&
1275 "Attempting to append a block that is not a successor.");
1276 Edges.push_back(dst);
1279 unsigned count()
const {
return Edges.size() - 1; }
1282 return Edges.
back();
1306 if (PDom ==
nullptr)
1313 if (!shouldTailDuplicate(PDom))
1315 bool CanTailDuplicate =
true;
1322 CanTailDuplicate =
false;
1328 if (!CanTailDuplicate)
1335 auto Found = TriangleChainMap.
find(&BB);
1338 if (Found != TriangleChainMap.
end()) {
1339 TriangleChain Chain = std::move(Found->second);
1340 TriangleChainMap.
erase(Found);
1342 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1344 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1345 assert(InsertResult.second &&
"Block seen twice.");
1353 for (
auto &ChainPair : TriangleChainMap) {
1354 TriangleChain &Chain = ChainPair.second;
1361 Chain.Edges.pop_back();
1365 <<
" as pre-computed based on triangles.\n");
1367 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1368 assert(InsertResult.second &&
"Block seen twice.");
1410bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1414 const BlockFilterSet *BlockFilter) {
1417 if (SuccChain.UnscheduledPredecessors == 0)
1537 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1538 bool BadCFGConflict =
false;
1541 BlockChain *PredChain = BlockToChain[Pred];
1542 if (Pred == Succ || PredChain == &SuccChain ||
1543 (BlockFilter && !BlockFilter->count(Pred)) ||
1544 PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
1565 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1566 BadCFGConflict =
true;
1571 if (BadCFGConflict) {
1573 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1590MachineBlockPlacement::BlockAndTailDupResult
1591MachineBlockPlacement::selectBestSuccessor(
1593 const BlockFilterSet *BlockFilter) {
1596 BlockAndTailDupResult BestSucc = {
nullptr,
false };
1600 auto AdjustedSumProb =
1601 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1608 auto FoundEdge = ComputedEdges.
find(BB);
1609 if (FoundEdge != ComputedEdges.
end()) {
1611 ComputedEdges.
erase(FoundEdge);
1612 BlockChain *SuccChain = BlockToChain[Succ];
1613 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1614 SuccChain != &Chain && Succ == *SuccChain->begin())
1615 return FoundEdge->second;
1620 if (isTrellis(BB, Successors, Chain, BlockFilter))
1621 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1634 BlockChain &SuccChain = *BlockToChain[Succ];
1637 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1638 Chain, BlockFilter)) {
1640 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1647 <<
", probability: " << SuccProb
1648 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1651 if (BestSucc.BB && BestProb >= SuccProb) {
1658 BestProb = SuccProb;
1666 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1667 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1668 return std::get<0>(L) > std::get<0>(R);
1670 for (
auto &Tup : DupCandidates) {
1673 std::tie(DupProb, Succ) = Tup;
1674 if (DupProb < BestProb)
1676 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
1677 && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1679 <<
", probability: " << DupProb
1680 <<
" (Tail Duplicate)\n");
1682 BestSucc.ShouldTailDup =
true;
1710 return BlockToChain.
lookup(BB) == &Chain;
1713 if (WorkList.
empty())
1716 bool IsEHPad = WorkList[0]->isEHPad();
1722 "EHPad mismatch between block and work list.");
1724 BlockChain &SuccChain = *BlockToChain[
MBB];
1725 if (&SuccChain == &Chain)
1728 assert(SuccChain.UnscheduledPredecessors == 0 &&
1729 "Found CFG-violating block");
1733 MBFI->printBlockFreq(
dbgs(), CandidateFreq) <<
" (freq)\n");
1753 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1757 BestFreq = CandidateFreq;
1771 const BlockChain &PlacedChain,
1773 const BlockFilterSet *BlockFilter) {
1776 if (BlockFilter && !BlockFilter->count(&*
I))
1778 if (BlockToChain[&*
I] != &PlacedChain) {
1779 PrevUnplacedBlockIt =
I;
1783 return *BlockToChain[&*
I]->
begin();
1789void MachineBlockPlacement::fillWorkLists(
1792 const BlockFilterSet *BlockFilter =
nullptr) {
1793 BlockChain &Chain = *BlockToChain[
MBB];
1794 if (!UpdatedPreds.
insert(&Chain).second)
1798 Chain.UnscheduledPredecessors == 0 &&
1799 "Attempting to place block with unscheduled predecessors in worklist.");
1801 assert(BlockToChain[ChainBB] == &Chain &&
1802 "Block in chain doesn't match BlockToChain map.");
1804 if (BlockFilter && !BlockFilter->count(Pred))
1806 if (BlockToChain[Pred] == &Chain)
1808 ++Chain.UnscheduledPredecessors;
1812 if (Chain.UnscheduledPredecessors != 0)
1822void MachineBlockPlacement::buildChain(
1824 BlockFilterSet *BlockFilter) {
1825 assert(HeadBB &&
"BB must not be null.\n");
1826 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1830 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1833 assert(BB &&
"null block found at end of chain in loop.");
1834 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1835 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1840 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1842 bool ShouldTailDup =
Result.ShouldTailDup;
1843 if (allowTailDupPlacement())
1844 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
1852 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1854 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1857 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
1861 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1862 "layout successor until the CFG reduces\n");
1867 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1868 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1869 BlockFilter, PrevUnplacedBlockIt);
1877 BlockChain &SuccChain = *BlockToChain[BestSucc];
1880 SuccChain.UnscheduledPredecessors = 0;
1883 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1884 Chain.merge(BestSucc, &SuccChain);
1885 BB = *std::prev(Chain.end());
1907MachineBlockPlacement::canMoveBottomBlockToTop(
1917 if (OtherBB == BottomBlock)
1919 if (OtherBB == OldTop)
1927MachineBlockPlacement::TopFallThroughFreq(
1929 const BlockFilterSet &LoopBlockSet) {
1932 BlockChain *PredChain = BlockToChain[Pred];
1933 if (!LoopBlockSet.count(Pred) &&
1934 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1941 BlockChain *SuccChain = BlockToChain[Succ];
1944 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1945 (!SuccChain || Succ == *SuccChain->begin())) {
1953 if (EdgeFreq > MaxFreq)
1983MachineBlockPlacement::FallThroughGains(
1987 const BlockFilterSet &LoopBlockSet) {
1988 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
1991 FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
2000 if (!LoopBlockSet.count(Pred))
2002 BlockChain *PredChain = BlockToChain[Pred];
2003 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2006 if (EdgeFreq > FallThroughFromPred) {
2007 FallThroughFromPred = EdgeFreq;
2018 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2022 BlockChain *SuccChain = BlockToChain[Succ];
2023 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2024 (SuccChain == BlockToChain[BestPred]))
2028 if (EdgeFreq > NewFreq)
2033 if (NewFreq > OrigEdgeFreq) {
2038 FallThroughFromPred = 0;
2045 FallThroughFromPred;
2074MachineBlockPlacement::findBestLoopTopHelper(
2077 const BlockFilterSet &LoopBlockSet) {
2081 BlockChain &HeaderChain = *BlockToChain[OldTop];
2082 if (!LoopBlockSet.count(*HeaderChain.begin()))
2084 if (OldTop != *HeaderChain.begin())
2093 if (!LoopBlockSet.count(Pred))
2095 if (Pred ==
L.getHeader())
2098 << Pred->
succ_size() <<
" successors, ";
2099 MBFI->printBlockFreq(
dbgs(), Pred) <<
" freq\n");
2106 if (OtherBB == OldTop)
2110 if (!canMoveBottomBlockToTop(Pred, OldTop))
2115 if ((Gains > 0) && (Gains > BestGains ||
2130 (*BestPred->
pred_begin())->succ_size() == 1 &&
2143MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2144 const BlockFilterSet &LoopBlockSet) {
2152 bool OptForSize =
F->getFunction().hasOptSize() ||
2155 return L.getHeader();
2159 while (NewTop != OldTop) {
2161 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2162 if (NewTop != OldTop)
2163 ComputedEdges[NewTop] = { OldTop,
false };
2174MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2175 const BlockFilterSet &LoopBlockSet,
2185 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2186 if (!LoopBlockSet.count(*HeaderChain.begin()))
2190 unsigned BestExitLoopDepth = 0;
2200 BlockChain &Chain = *BlockToChain[
MBB];
2203 if (
MBB != *std::prev(Chain.end()))
2212 bool HasLoopingSucc =
false;
2218 BlockChain &SuccChain = *BlockToChain[Succ];
2220 if (&Chain == &SuccChain) {
2227 if (LoopBlockSet.count(Succ)) {
2230 HasLoopingSucc =
true;
2234 unsigned SuccLoopDepth = 0;
2236 SuccLoopDepth = ExitLoop->getLoopDepth();
2237 if (ExitLoop->contains(&L))
2245 MBFI->printBlockFreq(
dbgs(), ExitEdgeFreq) <<
")\n");
2251 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2252 ExitEdgeFreq > BestExitEdgeFreq ||
2254 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2255 BestExitEdgeFreq = ExitEdgeFreq;
2260 if (!HasLoopingSucc) {
2262 ExitingBB = OldExitingBB;
2263 BestExitEdgeFreq = OldBestExitEdgeFreq;
2270 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2273 if (
L.getNumBlocks() == 1) {
2274 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2281 if (!BlocksExitingToOuterLoop.
empty() &&
2282 !BlocksExitingToOuterLoop.
count(ExitingBB))
2287 ExitFreq = BestExitEdgeFreq;
2296MachineBlockPlacement::hasViableTopFallthrough(
2298 const BlockFilterSet &LoopBlockSet) {
2300 BlockChain *PredChain = BlockToChain[Pred];
2301 if (!LoopBlockSet.count(Pred) &&
2302 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2309 BlockChain *SuccChain = BlockToChain[Succ];
2312 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2330void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2333 const BlockFilterSet &LoopBlockSet) {
2341 if (Bottom == ExitingBB)
2348 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2353 if (ViableTopFallthrough) {
2355 BlockChain *SuccChain = BlockToChain[Succ];
2356 if (!LoopBlockSet.count(Succ) &&
2357 (!SuccChain || Succ == *SuccChain->begin()))
2363 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2364 if (FallThrough2Top >= ExitFreq)
2368 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2369 if (ExitIt == LoopChain.end())
2391 if (ViableTopFallthrough) {
2392 assert(std::next(ExitIt) != LoopChain.end() &&
2393 "Exit should not be last BB");
2402 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2418void MachineBlockPlacement::rotateLoopWithProfile(
2420 const BlockFilterSet &LoopBlockSet) {
2421 auto RotationPos = LoopChain.end();
2446 BlockChain *PredChain = BlockToChain[Pred];
2447 if (!LoopBlockSet.count(Pred) &&
2448 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2449 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2451 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2455 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2456 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2465 for (
auto *BB : LoopChain) {
2468 BlockChain *SuccChain = BlockToChain[Succ];
2469 if (!LoopBlockSet.count(Succ) &&
2470 (!SuccChain || Succ == *SuccChain->begin())) {
2472 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2476 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2484 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2485 EndIter = LoopChain.end();
2486 Iter != EndIter; Iter++, TailIter++) {
2489 if (TailIter == LoopChain.end())
2490 TailIter = LoopChain.begin();
2492 auto TailBB = *TailIter;
2500 if (Iter != LoopChain.begin())
2501 Cost += HeaderFallThroughCost;
2505 for (
auto &ExitWithFreq : ExitsWithFreq)
2506 if (TailBB != ExitWithFreq.first)
2507 Cost += ExitWithFreq.second;
2523 if (TailBB->isSuccessor(*Iter)) {
2524 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2525 if (TailBB->succ_size() == 1)
2526 Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
2528 else if (TailBB->succ_size() == 2) {
2530 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2532 ? TailBBFreq * TailToHeadProb.
getCompl()
2541 <<
" to the top: " <<
Cost.getFrequency() <<
"\n");
2543 if (
Cost < SmallestRotationCost) {
2544 SmallestRotationCost =
Cost;
2549 if (RotationPos != LoopChain.end()) {
2551 <<
" to the top\n");
2552 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2561MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2562 BlockFilterSet LoopBlockSet;
2575 for (
auto *LoopPred :
L.getHeader()->predecessors())
2576 if (!
L.contains(LoopPred))
2577 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2581 if (LoopBlockSet.count(LoopBB))
2586 BlockChain *Chain = BlockToChain[LoopBB];
2588 LoopBlockSet.
insert(ChainBB);
2591 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2593 return LoopBlockSet;
2602void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2606 buildLoopChains(*InnerLoop);
2609 "BlockWorkList not empty when starting to build loop chains.");
2611 "EHPadWorkList not empty when starting to build loop chains.");
2612 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2617 bool RotateLoopWithProfile =
2633 PreferredLoopExit =
nullptr;
2635 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2636 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2638 BlockChain &LoopChain = *BlockToChain[LoopTop];
2644 assert(LoopChain.UnscheduledPredecessors == 0 &&
2645 "LoopChain should not have unscheduled predecessors.");
2646 UpdatedPreds.
insert(&LoopChain);
2649 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2651 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2653 if (RotateLoopWithProfile)
2654 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2656 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2660 bool BadLoop =
false;
2661 if (LoopChain.UnscheduledPredecessors) {
2663 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2664 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n"
2665 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2669 if (!LoopBlockSet.remove(ChainBB)) {
2673 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2674 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2675 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2680 if (!LoopBlockSet.empty()) {
2683 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2684 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2685 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2688 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2691 BlockWorkList.
clear();
2692 EHPadWorkList.
clear();
2695void MachineBlockPlacement::buildCFGChains() {
2703 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2716 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2717 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2720 Chain->merge(NextBB,
nullptr);
2722 BlocksWithUnanalyzableExits.
insert(&*BB);
2730 PreferredLoopExit =
nullptr;
2732 buildLoopChains(*L);
2735 "BlockWorkList should be empty before building final chain.");
2737 "EHPadWorkList should be empty before building final chain.");
2741 fillWorkLists(&
MBB, UpdatedPreds);
2743 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2744 buildChain(&
F->front(), FunctionChain);
2751 bool BadFunc =
false;
2752 FunctionBlockSetType FunctionBlockSet;
2754 FunctionBlockSet.insert(&
MBB);
2757 if (!FunctionBlockSet.erase(ChainBB)) {
2759 dbgs() <<
"Function chain contains a block not in the function!\n"
2760 <<
" Bad block: " << getBlockName(ChainBB) <<
"\n";
2763 if (!FunctionBlockSet.empty()) {
2765 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2766 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2767 <<
" Bad block: " << getBlockName(RemainingBB) <<
"\n";
2769 assert(!BadFunc &&
"Detected problems with the block placement.");
2775 F->getNumBlockIDs());
2778 for (
auto &
MBB : *
F) {
2779 if (LastMBB !=
nullptr)
2783 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2790 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2794 F->splice(InsertPos, ChainBB);
2799 if (ChainBB == *FunctionChain.begin())
2810 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2816 "Unexpected block with un-analyzable fallthrough!");
2818 TBB = FBB =
nullptr;
2856 BlockWorkList.
clear();
2857 EHPadWorkList.
clear();
2860void MachineBlockPlacement::optimizeBranches() {
2861 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2876 if (
TBB && !
Cond.empty() && FBB &&
2893void MachineBlockPlacement::alignBlocks() {
2899 if (
F->getFunction().hasMinSize() ||
2902 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2903 if (FunctionChain.begin() == FunctionChain.end())
2910 if (ChainBB == *FunctionChain.begin())
2928 if (Freq < WeightedEntryFreq)
2935 if (Freq < (LoopHeaderFreq * ColdProb))
2948 auto DetermineMaxAlignmentPadding = [&]() {
2955 ChainBB->setMaxBytesForAlignment(MaxBytes);
2961 ChainBB->setAlignment(
Align);
2962 DetermineMaxAlignmentPadding();
2972 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
2973 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
2974 ChainBB->setAlignment(
Align);
2975 DetermineMaxAlignmentPadding();
2995bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
2998 BlockChain &Chain, BlockFilterSet *BlockFilter,
3000 bool Removed, DuplicatedToLPred;
3001 bool DuplicatedToOriginalLPred;
3002 Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
3003 PrevUnplacedBlockIt,
3007 DuplicatedToOriginalLPred = DuplicatedToLPred;
3012 while (DuplicatedToLPred && Removed) {
3019 BlockChain::iterator ChainEnd = Chain.
end();
3020 DupBB = *(--ChainEnd);
3022 if (ChainEnd == Chain.begin())
3024 DupPred = *std::prev(ChainEnd);
3025 Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
3026 PrevUnplacedBlockIt,
3034 LPred = *std::prev(Chain.end());
3035 if (DuplicatedToOriginalLPred)
3036 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3053bool MachineBlockPlacement::maybeTailDuplicateBlock(
3055 BlockChain &Chain, BlockFilterSet *BlockFilter,
3057 bool &DuplicatedToLPred) {
3058 DuplicatedToLPred =
false;
3059 if (!shouldTailDuplicate(BB))
3067 bool Removed =
false;
3068 auto RemovalCallback =
3074 bool InWorkList =
true;
3076 if (BlockToChain.
count(RemBB)) {
3077 BlockChain *Chain = BlockToChain[RemBB];
3078 InWorkList = Chain->UnscheduledPredecessors == 0;
3079 Chain->remove(RemBB);
3080 BlockToChain.
erase(RemBB);
3084 if (&(*PrevUnplacedBlockIt) == RemBB) {
3085 PrevUnplacedBlockIt++;
3091 if (RemBB->isEHPad())
3092 RemoveList = EHPadWorkList;
3098 BlockFilter->remove(RemBB);
3102 MLI->removeBlock(RemBB);
3103 if (RemBB == PreferredLoopExit)
3104 PreferredLoopExit =
nullptr;
3109 auto RemovalCallbackRef =
3116 if (
F->getFunction().hasProfileData()) {
3118 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3119 if (CandidatePreds.
size() == 0)
3122 CandidatePtr = &CandidatePreds;
3125 &RemovalCallbackRef, CandidatePtr);
3128 DuplicatedToLPred =
false;
3131 BlockChain* PredChain = BlockToChain[Pred];
3133 DuplicatedToLPred =
true;
3134 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
3135 || PredChain == &Chain)
3138 if (BlockFilter && !BlockFilter->count(NewSucc))
3140 BlockChain *NewChain = BlockToChain[NewSucc];
3141 if (NewChain != &Chain && NewChain != PredChain)
3142 NewChain->UnscheduledPredecessors++;
3152 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3168 BlockFilterSet *BlockFilter) {
3171 if (BlockFilter && !BlockFilter->count(Pred))
3173 BlockChain *PredChain = BlockToChain[Pred];
3174 if (PredChain && (Pred != *std::prev(PredChain->end())))
3181 if (BlockFilter && !BlockFilter->count(Succ))
3183 BlockChain *SuccChain = BlockToChain[Succ];
3184 if (SuccChain && (Succ != *SuccChain->begin()))
3187 if (SuccProb > BestProb)
3188 BestProb = SuccProb;
3192 if (BBProb <= BestProb)
3199 return Gain > scaleThreshold(BB);
3204void MachineBlockPlacement::findDuplicateCandidates(
3207 BlockFilterSet *BlockFilter) {
3219 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3224 auto SuccIt = Succs.begin();
3225 if (SuccIt != Succs.end()) {
3276 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3278 if (SuccIt != Succs.end())
3284 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3286 if (SuccIt == Succs.end()) {
3288 if (Succs.size() > 0)
3289 DupCost += PredFreq;
3292 DupCost += PredFreq;
3296 assert(OrigCost >= DupCost);
3297 OrigCost -= DupCost;
3298 if (OrigCost > BBDupThreshold) {
3300 if (SuccIt != Succs.end())
3308 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3309 Candidates[0] = Candidates.
back();
3315void MachineBlockPlacement::initDupThreshold() {
3317 if (!
F->getFunction().hasProfileData())
3323 UseProfileCount =
true;
3337 DupThreshold = MaxFreq * ThresholdProb;
3338 UseProfileCount =
false;
3341bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
3346 if (std::next(MF.
begin()) == MF.
end())
3350 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3351 MBFI = std::make_unique<MBFIWrapper>(
3352 getAnalysis<MachineBlockFrequencyInfo>());
3353 MLI = &getAnalysis<MachineLoopInfo>();
3357 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3363 PreferredLoopExit =
nullptr;
3366 "BlockToChain map should be empty before starting placement.");
3368 "Computed Edge map should be empty before starting placement.");
3379 if (PassConfig->
getOptLevel() >= CodeGenOptLevel::Aggressive) {
3391 (PassConfig->
getOptLevel() < CodeGenOptLevel::Aggressive ||
3393 TailDupSize =
TII->getTailDuplicateSize(PassConfig->
getOptLevel());
3395 if (allowTailDupPlacement()) {
3396 MPDT = &getAnalysis<MachinePostDominatorTree>();
3401 bool PreRegAlloc =
false;
3402 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3404 precomputeTriangleChains();
3416 if (MF.
size() > 3 && EnableTailMerge) {
3424 BlockToChain.
clear();
3425 ComputedEdges.
clear();
3441 createCFGChainExtTsp();
3447 BlockToChain.
clear();
3448 ComputedEdges.
clear();
3451 bool HasMaxBytesOverride =
3457 if (HasMaxBytesOverride)
3466 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
3467 auto LayoutPred = std::prev(MBI);
3469 if (HasMaxBytesOverride)
3481 MF.RenumberBlocks();
3482 MBFI->view(
"MBP." + MF.getName(),
false);
3490void MachineBlockPlacement::applyExtTsp() {
3494 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3495 CurrentBlockOrder.reserve(
F->size());
3496 size_t NumBlocks = 0;
3498 BlockIndex[&
MBB] = NumBlocks++;
3499 CurrentBlockOrder.push_back(&
MBB);
3502 auto BlockSizes = std::vector<uint64_t>(
F->size());
3503 auto BlockCounts = std::vector<uint64_t>(
F->size());
3504 std::vector<codelayout::EdgeCount> JumpCounts;
3517 int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3518 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3523 JumpCounts.push_back(
3528 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3529 <<
" with profile = " <<
F->getFunction().hasProfileData()
3530 <<
" (" <<
F->getName().str() <<
")"
3533 dbgs() <<
format(
" original layout score: %0.2f\n",
3538 std::vector<const MachineBasicBlock *> NewBlockOrder;
3539 NewBlockOrder.reserve(
F->size());
3541 NewBlockOrder.push_back(CurrentBlockOrder[
Node]);
3548 assignBlockOrder(NewBlockOrder);
3551void MachineBlockPlacement::assignBlockOrder(
3552 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3553 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3554 F->RenumberBlocks();
3556 bool HasChanges =
false;
3557 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3558 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3568 for (
auto &
MBB : *
F) {
3575 NewIndex[
MBB] = NewIndex.
size();
3578 return NewIndex[&
L] < NewIndex[&
R];
3585 for (
auto &
MBB : *
F) {
3592 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3606 F->verify(
this,
"After optimized block reordering");
3610void MachineBlockPlacement::createCFGChainExtTsp() {
3611 BlockToChain.
clear();
3612 ComputedEdges.
clear();
3616 BlockChain *FunctionChain =
3617 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3622 FunctionChain->merge(&
MBB,
nullptr);
3660char MachineBlockPlacementStats::ID = 0;
3665 "Basic Block Placement Stats",
false,
false)
3673 if (std::next(
F.begin()) ==
F.end())
3679 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3680 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
3685 (
MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3687 (
MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
block placement Basic Block Placement Stats
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
Branch Probability Basic Block Placement
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, uint64_t EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunites in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
unify loop Fixup each natural loop to have a single exit block
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static uint64_t getMaxFrequency()
Returns the maximum possible frequency, the saturation value.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
Utility class to perform tail duplication.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
Target-Independent Code Generator Pass Configuration Options.
bool getEnableTailMerge() const
CodeGenOptLevel getOptLevel() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
A raw_ostream that writes to an std::string.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > ApplyExtTspWithoutProfile
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
cl::opt< unsigned > ProfileLikelyProb
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< unsigned > StaticLikelyProb
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
void initializeMachineBlockPlacementPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.