79#define DEBUG_TYPE "block-placement"
81STATISTIC(NumCondBranches,
"Number of conditional branches");
82STATISTIC(NumUncondBranches,
"Number of unconditional branches");
84 "Potential frequency of taking conditional branches");
86 "Potential frequency of taking unconditional branches");
90 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
91 "(e.g 4 means align on 16B boundaries)."),
95 "align-all-nofallthru-blocks",
96 cl::desc(
"Force the alignment of all blocks that have no fall-through "
97 "predecessors (i.e. don't add nops that are executed). In log2 "
98 "format (e.g 4 means align on 16B boundaries)."),
102 "max-bytes-for-alignment",
103 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-predecessor-limit",
109 cl::desc(
"For blocks with more predecessors, certain layout optimizations"
110 "will be disabled to prevent quadratic compile time."),
115 "block-placement-exit-block-bias",
116 cl::desc(
"Block frequency percentage a loop exit block needs "
117 "over the original exit to be considered the new exit."),
124 "loop-to-cold-block-ratio",
125 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
126 "(frequency of block) is greater than this ratio"),
131 cl::desc(
"Force outlining cold blocks from loops."),
136 cl::desc(
"Model the cost of loop rotation more "
137 "precisely by using profile data."),
142 cl::desc(
"Force the use of precise cost "
143 "loop rotation strategy."),
148 cl::desc(
"Cost that models the probabilistic risk of an instruction "
149 "misfetch due to a jump comparing to falling through, whose cost "
154 cl::desc(
"Cost of jump instructions."),
158 cl::desc(
"Perform tail duplication during placement. "
159 "Creates more fallthrough opportunities in "
160 "outline branches."),
165 cl::desc(
"Perform branch folding during placement. "
166 "Reduces code size."),
171 "tail-dup-placement-threshold",
172 cl::desc(
"Instruction cutoff for tail duplication during layout. "
173 "Tail merging during layout is forced to have a threshold "
174 "that won't conflict."),
179 "tail-dup-placement-aggressive-threshold",
180 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
181 "layout. Used at -O3. Tail merging during layout is forced to "
182 "have a threshold that won't conflict."),
187 "tail-dup-placement-penalty",
189 "Cost penalty for blocks that can avoid breaking CFG by copying. "
190 "Copying can increase fallthrough, but it also increases icache "
191 "pressure. This parameter controls the penalty to account for that. "
192 "Percent as integer."),
197 "tail-dup-profile-percent-threshold",
198 cl::desc(
"If profile count information is used in tail duplication cost "
199 "model, the gained fall through number from tail duplication "
200 "should be at least this percent of hot count."),
205 "triangle-chain-count",
206 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
207 "triangle tail duplication heuristic to kick in. 0 to disable."),
216 "renumber-blocks-before-view",
218 "If true, basic blocks are re-numbered before MBP layout is printed "
219 "into a dot graph. Only used when a function is being printed."),
223 "ext-tsp-block-placement-max-blocks",
224 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
231 cl::desc(
"Use ext-tsp for size-aware block placement."));
280 BlockToChainMapType &BlockToChain;
289 : Blocks(1, BB), BlockToChain(BlockToChain) {
290 assert(BB &&
"Cannot create a chain with a null basic block");
291 BlockToChain[BB] =
this;
307 for (
iterator i = begin(); i != end(); ++i) {
323 assert(BB &&
"Can't merge a null block.");
324 assert(!Blocks.
empty() &&
"Can't merge into an empty chain.");
328 assert(!BlockToChain[BB] &&
329 "Passed chain is null, but BB has entry in BlockToChain.");
331 BlockToChain[BB] =
this;
335 assert(BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
336 assert(Chain->begin() != Chain->end());
342 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
343 BlockToChain[ChainBB] =
this;
364 unsigned UnscheduledPredecessors = 0;
367class MachineBlockPlacement {
369 using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
372 struct BlockAndTailDupResult {
373 MachineBasicBlock *BB =
nullptr;
378 struct WeightedEdge {
379 BlockFrequency Weight;
380 MachineBasicBlock *Src =
nullptr;
381 MachineBasicBlock *Dest =
nullptr;
389 DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
392 MachineFunction *F =
nullptr;
395 const MachineBranchProbabilityInfo *MBPI =
nullptr;
398 std::unique_ptr<MBFIWrapper> MBFI;
401 MachineLoopInfo *MLI =
nullptr;
406 MachineBasicBlock *PreferredLoopExit =
nullptr;
409 const TargetInstrInfo *TII =
nullptr;
412 const TargetLoweringBase *TLI =
nullptr;
415 MachinePostDominatorTree *MPDT =
nullptr;
417 ProfileSummaryInfo *PSI =
nullptr;
430 TailDuplicator TailDup;
433 BlockFrequency DupThreshold;
435 unsigned TailDupSize;
439 bool UseProfileCount =
false;
448 SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
456 DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
463 SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
468 BlockFrequency getBlockCountOrFrequency(
const MachineBasicBlock *BB) {
469 if (UseProfileCount) {
470 auto Count = MBFI->getBlockProfileCount(BB);
472 return BlockFrequency(*
Count);
474 return BlockFrequency(0);
476 return MBFI->getBlockFreq(BB);
480 BlockFrequency scaleThreshold(MachineBasicBlock *BB);
481 void initTailDupThreshold();
485 void markChainSuccessors(
const BlockChain &Chain,
486 const MachineBasicBlock *LoopHeaderBB,
487 const BlockFilterSet *BlockFilter =
nullptr);
491 void markBlockSuccessors(
const BlockChain &Chain,
const MachineBasicBlock *BB,
492 const MachineBasicBlock *LoopHeaderBB,
493 const BlockFilterSet *BlockFilter =
nullptr);
496 collectViableSuccessors(
const MachineBasicBlock *BB,
const BlockChain &Chain,
497 const BlockFilterSet *BlockFilter,
498 SmallVector<MachineBasicBlock *, 4> &Successors);
499 bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
500 BlockFilterSet *BlockFilter);
501 void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
502 MachineBasicBlock *BB,
503 BlockFilterSet *BlockFilter);
504 bool repeatedlyTailDuplicateBlock(
505 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
506 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
507 BlockFilterSet *BlockFilter,
511 maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
512 BlockChain &Chain, BlockFilterSet *BlockFilter,
515 bool &DuplicatedToLPred);
516 bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB,
517 const MachineBasicBlock *Succ,
518 const BlockChain &SuccChain,
519 BranchProbability SuccProb,
520 BranchProbability RealSuccProb,
521 const BlockChain &Chain,
522 const BlockFilterSet *BlockFilter);
523 BlockAndTailDupResult selectBestSuccessor(
const MachineBasicBlock *BB,
524 const BlockChain &Chain,
525 const BlockFilterSet *BlockFilter);
527 selectBestCandidateBlock(
const BlockChain &Chain,
528 SmallVectorImpl<MachineBasicBlock *> &WorkList);
530 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
533 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
535 const BlockFilterSet *BlockFilter);
542 void fillWorkLists(
const MachineBasicBlock *
MBB,
543 SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
544 const BlockFilterSet *BlockFilter);
546 void buildChain(
const MachineBasicBlock *BB, BlockChain &Chain,
547 BlockFilterSet *BlockFilter =
nullptr);
548 bool canMoveBottomBlockToTop(
const MachineBasicBlock *BottomBlock,
549 const MachineBasicBlock *OldTop);
550 bool hasViableTopFallthrough(
const MachineBasicBlock *Top,
551 const BlockFilterSet &LoopBlockSet);
552 BlockFrequency TopFallThroughFreq(
const MachineBasicBlock *Top,
553 const BlockFilterSet &LoopBlockSet);
554 BlockFrequency FallThroughGains(
const MachineBasicBlock *NewTop,
555 const MachineBasicBlock *OldTop,
556 const MachineBasicBlock *ExitBB,
557 const BlockFilterSet &LoopBlockSet);
558 MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
559 const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 MachineBasicBlock *findBestLoopTop(
const MachineLoop &L,
562 const BlockFilterSet &LoopBlockSet);
563 MachineBasicBlock *findBestLoopExit(
const MachineLoop &L,
564 const BlockFilterSet &LoopBlockSet,
565 BlockFrequency &ExitFreq);
566 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
567 void buildLoopChains(
const MachineLoop &L);
568 void rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
569 BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet);
570 void rotateLoopWithProfile(BlockChain &LoopChain,
const MachineLoop &L,
571 const BlockFilterSet &LoopBlockSet);
572 void buildCFGChains();
573 void optimizeBranches();
577 bool shouldTailDuplicate(MachineBasicBlock *BB);
580 bool isProfitableToTailDup(
const MachineBasicBlock *BB,
581 const MachineBasicBlock *Succ,
582 BranchProbability QProb,
const BlockChain &Chain,
583 const BlockFilterSet *BlockFilter);
586 bool isTrellis(
const MachineBasicBlock *BB,
587 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
588 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
591 BlockAndTailDupResult getBestTrellisSuccessor(
592 const MachineBasicBlock *BB,
593 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
594 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
595 const BlockFilterSet *BlockFilter);
598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
599 const MachineBasicBlock *BB,
604 bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB,
605 MachineBasicBlock *Succ,
606 const BlockChain &Chain,
607 const BlockFilterSet *BlockFilter);
611 void precomputeTriangleChains();
614 void applyExtTsp(
bool OptForSize);
617 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
620 void createCFGChainExtTsp();
623 MachineBlockPlacement(
const MachineBranchProbabilityInfo *MBPI,
624 MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
625 std::unique_ptr<MBFIWrapper> MBFI,
626 MachinePostDominatorTree *MPDT,
bool AllowTailMerge)
627 : MBPI(MBPI), MBFI(std::
move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
628 AllowTailMerge(AllowTailMerge) {};
630 bool run(MachineFunction &F);
632 static bool allowTailDupPlacement(MachineFunction &MF) {
641 MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {}
643 bool runOnMachineFunction(MachineFunction &MF)
override {
648 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
649 auto MBFI = std::make_unique<MBFIWrapper>(
650 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
651 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
652 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
653 ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
656 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
657 auto *PassConfig = &getAnalysis<TargetPassConfig>();
658 bool AllowTailMerge = PassConfig->getEnableTailMerge();
659 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
664 void getAnalysisUsage(AnalysisUsage &AU)
const override {
665 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
666 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
668 AU.
addRequired<MachinePostDominatorTreeWrapperPass>();
678char MachineBlockPlacementLegacy::ID = 0;
683 "Branch Probability Basic Block Placement",
false,
false)
700 OS <<
" ('" << BB->
getName() <<
"')";
711void MachineBlockPlacement::markChainSuccessors(
713 const BlockFilterSet *BlockFilter) {
716 for (MachineBasicBlock *
MBB : Chain) {
717 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
727void MachineBlockPlacement::markBlockSuccessors(
728 const BlockChain &Chain,
const MachineBasicBlock *
MBB,
729 const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
735 if (BlockFilter && !BlockFilter->count(Succ))
737 BlockChain &SuccChain = *BlockToChain[Succ];
739 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
744 if (SuccChain.UnscheduledPredecessors == 0 ||
745 --SuccChain.UnscheduledPredecessors > 0)
748 auto *NewBB = *SuccChain.begin();
749 if (NewBB->isEHPad())
760BranchProbability MachineBlockPlacement::collectViableSuccessors(
761 const MachineBasicBlock *BB,
const BlockChain &Chain,
762 const BlockFilterSet *BlockFilter,
763 SmallVector<MachineBasicBlock *, 4> &Successors) {
781 for (MachineBasicBlock *Succ : BB->
successors()) {
782 bool SkipSucc =
false;
783 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
786 BlockChain *SuccChain = BlockToChain[Succ];
787 if (SuccChain == &Chain) {
789 }
else if (Succ != *SuccChain->begin()) {
791 <<
" -> Mid chain!\n");
801 return AdjustedSumProb;
806static BranchProbability
812 if (SuccProbN >= SuccProbD)
827 if (Successors.
count(&BB))
830 if (!Successors.
count(Succ))
838bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
859 return (Gain / ThresholdProb) >= EntryFreq;
867bool MachineBlockPlacement::isProfitableToTailDup(
868 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
869 BranchProbability QProb,
const BlockChain &Chain,
870 const BlockFilterSet *BlockFilter) {
894 MachineBasicBlock *PDom =
nullptr;
895 SmallVector<MachineBasicBlock *, 4> SuccSuccs;
897 auto AdjustedSuccSumProb =
898 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
900 auto BBFreq = MBFI->getBlockFreq(BB);
901 auto SuccFreq = MBFI->getBlockFreq(Succ);
902 BlockFrequency
P =
BBFreq * PProb;
903 BlockFrequency Qout =
BBFreq * QProb;
904 BlockFrequency EntryFreq = MBFI->getEntryFreq();
907 if (SuccSuccs.
size() == 0)
912 for (MachineBasicBlock *SuccSucc : SuccSuccs) {
914 if (Prob > BestSuccSucc)
924 auto SuccBestPred = BlockFrequency(0);
925 for (MachineBasicBlock *SuccPred : Succ->
predecessors()) {
926 if (SuccPred == Succ || SuccPred == BB ||
927 BlockToChain[SuccPred] == &Chain ||
928 (BlockFilter && !BlockFilter->count(SuccPred)))
932 if (Freq > SuccBestPred)
936 BlockFrequency Qin = SuccBestPred;
957 BranchProbability UProb = BestSuccSucc;
958 BranchProbability VProb = AdjustedSuccSumProb - UProb;
959 BlockFrequency
F = SuccFreq - Qin;
960 BlockFrequency
V = SuccFreq * VProb;
961 BlockFrequency QinU = std::min(Qin,
F) * UProb;
962 BlockFrequency BaseCost =
P +
V;
963 BlockFrequency DupCost = Qout + QinU + std::max(Qin,
F) * VProb;
967 BranchProbability VProb = AdjustedSuccSumProb - UProb;
968 BlockFrequency
U = SuccFreq * UProb;
969 BlockFrequency
V = SuccFreq * VProb;
970 BlockFrequency
F = SuccFreq - Qin;
1000 if (UProb > AdjustedSuccSumProb / 2 &&
1001 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
1002 Chain, BlockFilter))
1005 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
1009 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
1010 std::max(Qin,
F) * UProb),
1021bool MachineBlockPlacement::isTrellis(
1022 const MachineBasicBlock *BB,
1023 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1024 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1033 SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
1035 for (MachineBasicBlock *Succ : ViableSuccs) {
1044 if (Successors.count(SuccPred)) {
1046 for (MachineBasicBlock *CheckSucc : SuccPred->successors())
1047 if (!Successors.count(CheckSucc))
1051 const BlockChain *PredChain = BlockToChain[SuccPred];
1052 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1053 PredChain == &Chain || PredChain == BlockToChain[Succ])
1057 if (!SeenPreds.
insert(SuccPred).second)
1075std::pair<MachineBlockPlacement::WeightedEdge,
1076 MachineBlockPlacement::WeightedEdge>
1077MachineBlockPlacement::getBestNonConflictingEdges(
1078 const MachineBasicBlock *BB,
1088 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1092 auto BestA = Edges[0].begin();
1093 auto BestB = Edges[1].begin();
1096 if (BestA->Src == BestB->Src) {
1098 auto SecondBestA = std::next(BestA);
1099 auto SecondBestB = std::next(BestB);
1100 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
1101 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
1102 if (BestAScore < BestBScore)
1103 BestA = SecondBestA;
1105 BestB = SecondBestB;
1108 if (BestB->Src == BB)
1110 return std::make_pair(*BestA, *BestB);
1120MachineBlockPlacement::BlockAndTailDupResult
1121MachineBlockPlacement::getBestTrellisSuccessor(
1122 const MachineBasicBlock *BB,
1123 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1124 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
1125 const BlockFilterSet *BlockFilter) {
1127 BlockAndTailDupResult
Result = {
nullptr,
false};
1134 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1140 for (
auto *Succ : ViableSuccs) {
1141 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
1143 if (SuccPred != BB) {
1144 if (BlockFilter && !BlockFilter->count(SuccPred))
1146 const BlockChain *SuccPredChain = BlockToChain[SuccPred];
1147 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])
1150 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
1152 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1158 WeightedEdge BestA, BestB;
1159 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1161 if (BestA.Src != BB) {
1165 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1172 if (BestA.Dest == BestB.Src) {
1175 MachineBasicBlock *Succ1 = BestA.Dest;
1176 MachineBasicBlock *Succ2 = BestB.Dest;
1178 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ2) &&
1179 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1181 Chain, BlockFilter)) {
1185 <<
", probability: " << Succ2Prob
1186 <<
" (Tail Duplicate)\n");
1188 Result.ShouldTailDup =
true;
1194 ComputedEdges[BestB.Src] = {BestB.Dest,
false};
1196 auto TrellisSucc = BestA.Dest;
1200 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1208bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1209 const MachineBasicBlock *BB, MachineBasicBlock *Succ,
1210 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1211 if (!shouldTailDuplicate(Succ))
1215 bool Duplicate =
true;
1217 unsigned int NumDup = 0;
1226 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1227 (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1277 if (
F->getFunction().hasProfileData())
1308 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1331void MachineBlockPlacement::precomputeTriangleChains() {
1332 struct TriangleChain {
1333 std::vector<MachineBasicBlock *> Edges;
1335 TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
1336 : Edges({src, dst}) {}
1338 void append(MachineBasicBlock *dst) {
1339 assert(getKey()->isSuccessor(dst) &&
1340 "Attempting to append a block that is not a successor.");
1341 Edges.push_back(dst);
1344 unsigned count()
const {
return Edges.size() - 1; }
1346 MachineBasicBlock *getKey()
const {
return Edges.back(); }
1355 DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
1356 for (MachineBasicBlock &BB : *
F) {
1360 MachineBasicBlock *PDom =
nullptr;
1361 for (MachineBasicBlock *Succ : BB.
successors()) {
1369 if (PDom ==
nullptr)
1376 if (!shouldTailDuplicate(PDom))
1378 bool CanTailDuplicate =
true;
1385 CanTailDuplicate =
false;
1391 if (!CanTailDuplicate)
1398 auto Found = TriangleChainMap.
find(&BB);
1401 if (Found != TriangleChainMap.
end()) {
1402 TriangleChain Chain = std::move(Found->second);
1403 TriangleChainMap.
erase(Found);
1405 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1407 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1408 assert(InsertResult.second &&
"Block seen twice.");
1416 for (
auto &ChainPair : TriangleChainMap) {
1417 TriangleChain &Chain = ChainPair.second;
1423 MachineBasicBlock *dst = Chain.Edges.back();
1424 Chain.Edges.pop_back();
1425 for (MachineBasicBlock *src :
reverse(Chain.Edges)) {
1428 <<
" as pre-computed based on triangles.\n");
1430 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1431 assert(InsertResult.second &&
"Block seen twice.");
1441static BranchProbability
1477bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1478 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
1479 const BlockChain &SuccChain, BranchProbability SuccProb,
1480 BranchProbability RealSuccProb,
const BlockChain &Chain,
1481 const BlockFilterSet *BlockFilter) {
1484 if (SuccChain.UnscheduledPredecessors == 0)
1609 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1610 bool BadCFGConflict =
false;
1613 BlockChain *PredChain = BlockToChain[Pred];
1614 if (Pred == Succ || PredChain == &SuccChain ||
1615 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1616 Pred != *std::prev(PredChain->end()) ||
1635 BlockFrequency PredEdgeFreq =
1637 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1638 BadCFGConflict =
true;
1643 if (BadCFGConflict) {
1645 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1662MachineBlockPlacement::BlockAndTailDupResult
1663MachineBlockPlacement::selectBestSuccessor(
const MachineBasicBlock *BB,
1664 const BlockChain &Chain,
1665 const BlockFilterSet *BlockFilter) {
1668 BlockAndTailDupResult BestSucc = {
nullptr,
false};
1671 SmallVector<MachineBasicBlock *, 4> Successors;
1672 auto AdjustedSumProb =
1673 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1680 auto FoundEdge = ComputedEdges.
find(BB);
1681 if (FoundEdge != ComputedEdges.
end()) {
1682 MachineBasicBlock *Succ = FoundEdge->second.BB;
1683 ComputedEdges.
erase(FoundEdge);
1684 BlockChain *SuccChain = BlockToChain[Succ];
1685 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1686 SuccChain != &Chain && Succ == *SuccChain->begin())
1687 return FoundEdge->second;
1692 if (isTrellis(BB, Successors, Chain, BlockFilter))
1693 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1701 for (MachineBasicBlock *Succ : Successors) {
1703 BranchProbability SuccProb =
1706 BlockChain &SuccChain = *BlockToChain[Succ];
1709 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1710 Chain, BlockFilter)) {
1712 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ))
1719 <<
", probability: " << SuccProb
1720 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1723 if (BestSucc.BB && BestProb >= SuccProb) {
1730 BestProb = SuccProb;
1738 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1739 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1740 return std::get<0>(L) > std::get<0>(R);
1742 for (
auto &Tup : DupCandidates) {
1743 BranchProbability DupProb;
1744 MachineBasicBlock *Succ;
1745 std::tie(DupProb, Succ) = Tup;
1746 if (DupProb < BestProb)
1748 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1749 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1751 <<
", probability: " << DupProb
1752 <<
" (Tail Duplicate)\n");
1754 BestSucc.ShouldTailDup =
true;
1775MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
1776 const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
1782 return BlockToChain.
lookup(BB) == &Chain;
1785 if (WorkList.
empty())
1788 bool IsEHPad = WorkList[0]->isEHPad();
1790 MachineBasicBlock *BestBlock =
nullptr;
1791 BlockFrequency BestFreq;
1792 for (MachineBasicBlock *
MBB : WorkList) {
1794 "EHPad mismatch between block and work list.");
1796 BlockChain &SuccChain = *BlockToChain[
MBB];
1797 if (&SuccChain == &Chain)
1800 assert(SuccChain.UnscheduledPredecessors == 0 &&
1801 "Found CFG-violating block");
1803 BlockFrequency CandidateFreq = MBFI->getBlockFreq(
MBB);
1826 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1830 BestFreq = CandidateFreq;
1843MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1844 const BlockChain &PlacedChain,
1849 if (BlockChain *Chain = BlockToChain[&*
I]; Chain != &PlacedChain) {
1850 PrevUnplacedBlockIt =
I;
1854 return *Chain->begin();
1870MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1871 const BlockChain &PlacedChain,
1872 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1873 const BlockFilterSet *BlockFilter) {
1875 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1876 ++PrevUnplacedBlockInFilterIt) {
1877 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1878 if (
C != &PlacedChain) {
1885void MachineBlockPlacement::fillWorkLists(
1886 const MachineBasicBlock *
MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
1887 const BlockFilterSet *BlockFilter =
nullptr) {
1888 BlockChain &Chain = *BlockToChain[
MBB];
1889 if (!UpdatedPreds.
insert(&Chain).second)
1893 Chain.UnscheduledPredecessors == 0 &&
1894 "Attempting to place block with unscheduled predecessors in worklist.");
1895 for (MachineBasicBlock *ChainBB : Chain) {
1896 assert(BlockToChain[ChainBB] == &Chain &&
1897 "Block in chain doesn't match BlockToChain map.");
1898 for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
1899 if (BlockFilter && !BlockFilter->count(Pred))
1901 if (BlockToChain[Pred] == &Chain)
1903 ++Chain.UnscheduledPredecessors;
1907 if (Chain.UnscheduledPredecessors != 0)
1910 MachineBasicBlock *BB = *Chain.
begin();
1917void MachineBlockPlacement::buildChain(
const MachineBasicBlock *HeadBB,
1919 BlockFilterSet *BlockFilter) {
1920 assert(HeadBB &&
"BB must not be null.\n");
1921 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1923 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1925 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1927 const MachineBasicBlock *LoopHeaderBB = HeadBB;
1928 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1929 MachineBasicBlock *BB = *std::prev(Chain.end());
1931 assert(BB &&
"null block found at end of chain in loop.");
1932 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1933 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1937 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1938 MachineBasicBlock *BestSucc =
Result.BB;
1939 bool ShouldTailDup =
Result.ShouldTailDup;
1940 if (allowTailDupPlacement(*
F))
1941 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1942 BB, BestSucc, Chain, BlockFilter));
1948 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1950 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1954 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1957 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1961 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1962 "layout successor until the CFG reduces\n");
1967 if (allowTailDupPlacement(*
F) && BestSucc && ShouldTailDup) {
1968 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1969 BlockFilter, PrevUnplacedBlockIt,
1970 PrevUnplacedBlockInFilterIt);
1978 BlockChain &SuccChain = *BlockToChain[BestSucc];
1981 SuccChain.UnscheduledPredecessors = 0;
1984 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1985 Chain.merge(BestSucc, &SuccChain);
1986 BB = *std::prev(Chain.end());
2007bool MachineBlockPlacement::canMoveBottomBlockToTop(
2008 const MachineBasicBlock *BottomBlock,
const MachineBasicBlock *OldTop) {
2011 MachineBasicBlock *Pred = *BottomBlock->
pred_begin();
2015 MachineBasicBlock *OtherBB = *Pred->
succ_begin();
2016 if (OtherBB == BottomBlock)
2018 if (OtherBB == OldTop)
2026MachineBlockPlacement::TopFallThroughFreq(
const MachineBasicBlock *Top,
2027 const BlockFilterSet &LoopBlockSet) {
2028 BlockFrequency MaxFreq = BlockFrequency(0);
2030 BlockChain *PredChain = BlockToChain[Pred];
2031 if (!LoopBlockSet.count(Pred) &&
2032 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2037 for (MachineBasicBlock *Succ : Pred->
successors()) {
2039 BlockChain *SuccChain = BlockToChain[Succ];
2042 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
2043 (!SuccChain || Succ == *SuccChain->begin())) {
2049 BlockFrequency EdgeFreq =
2051 if (EdgeFreq > MaxFreq)
2080BlockFrequency MachineBlockPlacement::FallThroughGains(
2081 const MachineBasicBlock *NewTop,
const MachineBasicBlock *OldTop,
2082 const MachineBasicBlock *ExitBB,
const BlockFilterSet &LoopBlockSet) {
2083 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2084 BlockFrequency FallThrough2Exit = BlockFrequency(0);
2088 BlockFrequency BackEdgeFreq =
2092 MachineBasicBlock *BestPred =
nullptr;
2093 BlockFrequency FallThroughFromPred = BlockFrequency(0);
2094 for (MachineBasicBlock *Pred : NewTop->
predecessors()) {
2095 if (!LoopBlockSet.count(Pred))
2097 BlockChain *PredChain = BlockToChain[Pred];
2098 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2099 BlockFrequency EdgeFreq =
2101 if (EdgeFreq > FallThroughFromPred) {
2102 FallThroughFromPred = EdgeFreq;
2110 BlockFrequency NewFreq = BlockFrequency(0);
2112 for (MachineBasicBlock *Succ : BestPred->
successors()) {
2113 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2117 BlockChain *SuccChain = BlockToChain[Succ];
2118 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2119 (SuccChain == BlockToChain[BestPred]))
2121 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
2123 if (EdgeFreq > NewFreq)
2126 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
2128 if (NewFreq > OrigEdgeFreq) {
2132 NewFreq = BlockFrequency(0);
2133 FallThroughFromPred = BlockFrequency(0);
2137 BlockFrequency
Result = BlockFrequency(0);
2138 BlockFrequency Gains = BackEdgeFreq + NewFreq;
2139 BlockFrequency Lost =
2140 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2168MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(
2169 MachineBasicBlock *OldTop,
const MachineLoop &L,
2170 const BlockFilterSet &LoopBlockSet) {
2174 BlockChain &HeaderChain = *BlockToChain[OldTop];
2175 if (!LoopBlockSet.count(*HeaderChain.begin()))
2177 if (OldTop != *HeaderChain.begin())
2183 BlockFrequency BestGains = BlockFrequency(0);
2184 MachineBasicBlock *BestPred =
nullptr;
2185 for (MachineBasicBlock *Pred : OldTop->
predecessors()) {
2186 if (!LoopBlockSet.count(Pred))
2188 if (Pred ==
L.getHeader())
2196 MachineBasicBlock *OtherBB =
nullptr;
2199 if (OtherBB == OldTop)
2203 if (!canMoveBottomBlockToTop(Pred, OldTop))
2206 BlockFrequency Gains =
2207 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2208 if ((Gains > BlockFrequency(0)) &&
2209 (Gains > BestGains ||
2224 (*BestPred->
pred_begin())->succ_size() == 1 &&
2237MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2238 const BlockFilterSet &LoopBlockSet) {
2247 return L.getHeader();
2249 MachineBasicBlock *OldTop =
nullptr;
2250 MachineBasicBlock *NewTop =
L.getHeader();
2251 while (NewTop != OldTop) {
2253 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2254 if (NewTop != OldTop)
2255 ComputedEdges[NewTop] = {OldTop,
false};
2266MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2267 const BlockFilterSet &LoopBlockSet,
2268 BlockFrequency &ExitFreq) {
2277 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2278 if (!LoopBlockSet.count(*HeaderChain.begin()))
2281 BlockFrequency BestExitEdgeFreq;
2282 unsigned BestExitLoopDepth = 0;
2283 MachineBasicBlock *ExitingBB =
nullptr;
2287 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
2291 for (MachineBasicBlock *
MBB :
L.getBlocks()) {
2292 BlockChain &Chain = *BlockToChain[
MBB];
2295 if (
MBB != *std::prev(Chain.end()))
2302 MachineBasicBlock *OldExitingBB = ExitingBB;
2303 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
2304 bool HasLoopingSucc =
false;
2310 BlockChain &SuccChain = *BlockToChain[Succ];
2312 if (&Chain == &SuccChain) {
2319 if (LoopBlockSet.count(Succ)) {
2322 HasLoopingSucc =
true;
2326 unsigned SuccLoopDepth = 0;
2327 if (MachineLoop *ExitLoop = MLI->
getLoopFor(Succ)) {
2328 SuccLoopDepth = ExitLoop->getLoopDepth();
2329 if (ExitLoop->contains(&L))
2333 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(
MBB) * SuccProb;
2336 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2343 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2344 ExitEdgeFreq > BestExitEdgeFreq ||
2346 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2347 BestExitEdgeFreq = ExitEdgeFreq;
2352 if (!HasLoopingSucc) {
2354 ExitingBB = OldExitingBB;
2355 BestExitEdgeFreq = OldBestExitEdgeFreq;
2362 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2365 if (
L.getNumBlocks() == 1) {
2366 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2373 if (!BlocksExitingToOuterLoop.
empty() &&
2374 !BlocksExitingToOuterLoop.
count(ExitingBB))
2379 ExitFreq = BestExitEdgeFreq;
2387bool MachineBlockPlacement::hasViableTopFallthrough(
2388 const MachineBasicBlock *Top,
const BlockFilterSet &LoopBlockSet) {
2390 BlockChain *PredChain = BlockToChain[Pred];
2391 if (!LoopBlockSet.count(Pred) &&
2392 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2397 for (MachineBasicBlock *Succ : Pred->
successors()) {
2399 BlockChain *SuccChain = BlockToChain[Succ];
2402 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2420void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2421 const MachineBasicBlock *ExitingBB,
2422 BlockFrequency ExitFreq,
2423 const BlockFilterSet &LoopBlockSet) {
2427 MachineBasicBlock *Top = *LoopChain.begin();
2428 MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
2431 if (Bottom == ExitingBB)
2438 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2443 if (ViableTopFallthrough) {
2444 for (MachineBasicBlock *Succ : Bottom->
successors()) {
2445 BlockChain *SuccChain = BlockToChain[Succ];
2446 if (!LoopBlockSet.count(Succ) &&
2447 (!SuccChain || Succ == *SuccChain->begin()))
2453 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2454 if (FallThrough2Top >= ExitFreq)
2458 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2459 if (ExitIt == LoopChain.end())
2481 if (ViableTopFallthrough) {
2482 assert(std::next(ExitIt) != LoopChain.end() &&
2483 "Exit should not be last BB");
2484 MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
2492 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2508void MachineBlockPlacement::rotateLoopWithProfile(
2509 BlockChain &LoopChain,
const MachineLoop &L,
2510 const BlockFilterSet &LoopBlockSet) {
2511 auto RotationPos = LoopChain.end();
2512 MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
2522 auto ScaleBlockFrequency = [](BlockFrequency Freq,
2523 unsigned Scale) -> BlockFrequency {
2525 return BlockFrequency(0);
2528 return Freq / BranchProbability(1, Scale);
2534 BlockFrequency HeaderFallThroughCost(0);
2536 BlockChain *PredChain = BlockToChain[Pred];
2537 if (!LoopBlockSet.count(Pred) &&
2538 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2539 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2541 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2545 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2546 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2555 for (
auto *BB : LoopChain) {
2558 BlockChain *SuccChain = BlockToChain[Succ];
2559 if (!LoopBlockSet.count(Succ) &&
2560 (!SuccChain || Succ == *SuccChain->begin())) {
2562 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2566 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2574 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2575 EndIter = LoopChain.end();
2576 Iter != EndIter; Iter++, TailIter++) {
2579 if (TailIter == LoopChain.end())
2580 TailIter = LoopChain.begin();
2582 auto TailBB = *TailIter;
2585 BlockFrequency
Cost = BlockFrequency(0);
2590 if (Iter != LoopChain.begin())
2591 Cost += HeaderFallThroughCost;
2595 for (
auto &ExitWithFreq : ExitsWithFreq)
2596 if (TailBB != ExitWithFreq.first)
2597 Cost += ExitWithFreq.second;
2613 if (TailBB->isSuccessor(*Iter)) {
2614 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2615 if (TailBB->succ_size() == 1)
2617 else if (TailBB->succ_size() == 2) {
2619 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2620 auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
2621 ? TailBBFreq * TailToHeadProb.
getCompl()
2632 if (
Cost < SmallestRotationCost) {
2633 SmallestRotationCost =
Cost;
2638 if (RotationPos != LoopChain.end()) {
2640 <<
" to the top\n");
2641 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2649MachineBlockPlacement::BlockFilterSet
2650MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2654 bool operator()(
const MachineBasicBlock *
X,
2655 const MachineBasicBlock *
Y)
const {
2656 return X->getNumber() <
Y->getNumber();
2659 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2671 BlockFrequency LoopFreq(0);
2672 for (
auto *LoopPred :
L.getHeader()->predecessors())
2673 if (!
L.contains(LoopPred))
2674 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2677 for (MachineBasicBlock *LoopBB :
L.getBlocks()) {
2678 if (LoopBlockSet.count(LoopBB))
2683 BlockChain *Chain = BlockToChain[LoopBB];
2684 for (MachineBasicBlock *ChainBB : *Chain)
2685 LoopBlockSet.insert(ChainBB);
2688 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2693 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2703void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2706 for (
const MachineLoop *InnerLoop : L)
2707 buildLoopChains(*InnerLoop);
2710 "BlockWorkList not empty when starting to build loop chains.");
2712 "EHPadWorkList not empty when starting to build loop chains.");
2713 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2718 bool RotateLoopWithProfile =
2726 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
2734 PreferredLoopExit =
nullptr;
2735 BlockFrequency ExitFreq;
2736 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2737 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2739 BlockChain &LoopChain = *BlockToChain[LoopTop];
2744 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2745 assert(LoopChain.UnscheduledPredecessors == 0 &&
2746 "LoopChain should not have unscheduled predecessors.");
2747 UpdatedPreds.
insert(&LoopChain);
2749 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2750 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2752 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2754 if (RotateLoopWithProfile)
2755 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2757 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2761 bool BadLoop =
false;
2762 if (LoopChain.UnscheduledPredecessors) {
2764 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2765 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2766 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n";
2768 for (MachineBasicBlock *ChainBB : LoopChain) {
2770 if (!LoopBlockSet.remove(ChainBB)) {
2774 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2775 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2776 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2781 if (!LoopBlockSet.empty()) {
2783 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2784 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2785 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2786 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2789 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2792 BlockWorkList.
clear();
2793 EHPadWorkList.
clear();
2796void MachineBlockPlacement::buildCFGChains() {
2802 MachineBasicBlock *BB = &*FI;
2804 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2809 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2814 MachineBasicBlock *NextBB = &*NextFI;
2817 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2818 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2821 Chain->merge(NextBB,
nullptr);
2823 BlocksWithUnanalyzableExits.
insert(&*BB);
2831 PreferredLoopExit =
nullptr;
2832 for (MachineLoop *L : *MLI)
2833 buildLoopChains(*L);
2836 "BlockWorkList should be empty before building final chain.");
2838 "EHPadWorkList should be empty before building final chain.");
2840 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2841 for (MachineBasicBlock &
MBB : *
F)
2842 fillWorkLists(&
MBB, UpdatedPreds);
2844 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2845 buildChain(&
F->front(), FunctionChain);
2848 using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
2852 bool BadFunc =
false;
2853 FunctionBlockSetType FunctionBlockSet;
2854 for (MachineBasicBlock &
MBB : *
F)
2855 FunctionBlockSet.insert(&
MBB);
2857 for (MachineBasicBlock *ChainBB : FunctionChain)
2858 if (!FunctionBlockSet.erase(ChainBB)) {
2860 dbgs() <<
"Function chain contains a block not in the function!\n"
2864 if (!FunctionBlockSet.empty()) {
2866 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2867 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2868 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
2870 assert(!BadFunc &&
"Detected problems with the block placement.");
2875 SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
2876 F->getNumBlockIDs());
2878 MachineBasicBlock *LastMBB =
nullptr;
2879 for (
auto &
MBB : *
F) {
2880 if (LastMBB !=
nullptr)
2884 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2890 for (MachineBasicBlock *ChainBB : FunctionChain) {
2891 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2895 F->splice(InsertPos, ChainBB);
2900 if (ChainBB == *FunctionChain.begin())
2908 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2911 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2917 "Unexpected block with un-analyzable fallthrough!");
2919 TBB = FBB =
nullptr;
2951 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2953 MachineBasicBlock *PrevBB = &
F->
back();
2957 BlockWorkList.
clear();
2958 EHPadWorkList.
clear();
2961void MachineBlockPlacement::optimizeBranches() {
2962 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2971 for (MachineBasicBlock *ChainBB : FunctionChain) {
2973 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2994 auto Dl = ChainBB->findBranchDebugLoc();
3000void MachineBlockPlacement::alignBlocks() {
3007 if (
F->getFunction().hasMinSize() ||
3012 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
3014 if (FunctionChain.begin() == FunctionChain.end())
3017 const BranchProbability ColdProb(1, 5);
3018 BlockFrequency EntryFreq = MBFI->getBlockFreq(&
F->front());
3019 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
3020 for (MachineBasicBlock *ChainBB : FunctionChain) {
3021 if (ChainBB == *FunctionChain.begin())
3028 MachineLoop *
L = MLI->getLoopFor(ChainBB);
3033 unsigned MDAlign = 1;
3034 MDNode *LoopID =
L->getLoopID();
3043 if (S->
getString() ==
"llvm.loop.align") {
3045 "per-loop align metadata should have two operands.");
3048 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
3054 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3060 BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
3061 if (Freq < WeightedEntryFreq)
3066 MachineBasicBlock *LoopHeader =
L->getHeader();
3067 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
3068 if (Freq < (LoopHeaderFreq * ColdProb))
3078 MachineBasicBlock *LayoutPred =
3081 auto DetermineMaxAlignmentPadding = [&]() {
3088 ChainBB->setMaxBytesForAlignment(MaxBytes);
3094 ChainBB->setAlignment(LoopAlign);
3095 DetermineMaxAlignmentPadding();
3103 BranchProbability LayoutProb =
3105 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3106 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3107 ChainBB->setAlignment(LoopAlign);
3108 DetermineMaxAlignmentPadding();
3112 const bool HasMaxBytesOverride =
3117 for (MachineBasicBlock &
MBB : *
F) {
3118 if (HasMaxBytesOverride)
3127 for (
auto MBI = std::next(
F->begin()), MBE =
F->end(); MBI != MBE; ++MBI) {
3128 auto LayoutPred = std::prev(MBI);
3130 if (HasMaxBytesOverride)
3155bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3156 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
3157 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
3159 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3160 bool Removed, DuplicatedToLPred;
3161 bool DuplicatedToOriginalLPred;
3162 Removed = maybeTailDuplicateBlock(
3163 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3164 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3167 DuplicatedToOriginalLPred = DuplicatedToLPred;
3172 while (DuplicatedToLPred && Removed) {
3173 MachineBasicBlock *DupBB, *DupPred;
3179 BlockChain::iterator ChainEnd = Chain.end();
3180 DupBB = *(--ChainEnd);
3182 if (ChainEnd == Chain.begin())
3184 DupPred = *std::prev(ChainEnd);
3185 Removed = maybeTailDuplicateBlock(
3186 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3187 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3194 LPred = *std::prev(Chain.end());
3195 if (DuplicatedToOriginalLPred)
3196 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3213bool MachineBlockPlacement::maybeTailDuplicateBlock(
3214 MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
3216 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3217 bool &DuplicatedToLPred) {
3218 DuplicatedToLPred =
false;
3219 if (!shouldTailDuplicate(BB))
3227 bool Removed =
false;
3228 auto RemovalCallback = [&](MachineBasicBlock *RemBB) {
3233 if (
auto It = BlockToChain.
find(RemBB); It != BlockToChain.
end()) {
3234 It->second->remove(RemBB);
3235 BlockToChain.
erase(It);
3239 if (&(*PrevUnplacedBlockIt) == RemBB) {
3240 PrevUnplacedBlockIt++;
3244 if (RemBB->isEHPad()) {
3255 if (It != BlockFilter->end()) {
3256 if (It < PrevUnplacedBlockInFilterIt) {
3257 const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
3260 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3261 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3262 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3264 }
else if (It == PrevUnplacedBlockInFilterIt)
3267 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3269 BlockFilter->erase(It);
3274 MLI->removeBlock(RemBB);
3275 if (RemBB == PreferredLoopExit)
3276 PreferredLoopExit =
nullptr;
3281 auto RemovalCallbackRef =
3282 function_ref<void(MachineBasicBlock *)>(RemovalCallback);
3287 SmallVectorImpl<MachineBasicBlock *> *CandidatePtr =
nullptr;
3288 if (
F->getFunction().hasProfileData()) {
3290 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3291 if (CandidatePreds.
size() == 0)
3294 CandidatePtr = &CandidatePreds;
3297 &RemovalCallbackRef, CandidatePtr);
3300 DuplicatedToLPred =
false;
3301 for (MachineBasicBlock *Pred : DuplicatedPreds) {
3303 BlockChain *PredChain = BlockToChain[Pred];
3305 DuplicatedToLPred =
true;
3306 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3307 PredChain == &Chain)
3309 for (MachineBasicBlock *NewSucc : Pred->
successors()) {
3310 if (BlockFilter && !BlockFilter->count(NewSucc))
3312 BlockChain *NewChain = BlockToChain[NewSucc];
3313 if (NewChain != &Chain && NewChain != PredChain)
3314 NewChain->UnscheduledPredecessors++;
3324 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3333BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
3338bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3339 MachineBasicBlock *Pred,
3340 BlockFilterSet *BlockFilter) {
3343 if (BlockFilter && !BlockFilter->count(Pred))
3345 BlockChain *PredChain = BlockToChain[Pred];
3346 if (PredChain && (Pred != *std::prev(PredChain->end())))
3351 for (MachineBasicBlock *Succ : Pred->
successors())
3353 if (BlockFilter && !BlockFilter->count(Succ))
3355 BlockChain *SuccChain = BlockToChain[Succ];
3356 if (SuccChain && (Succ != *SuccChain->begin()))
3359 if (SuccProb > BestProb)
3360 BestProb = SuccProb;
3364 if (BBProb <= BestProb)
3369 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3370 BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3371 return Gain > scaleThreshold(BB);
3376void MachineBlockPlacement::findDuplicateCandidates(
3377 SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,
3378 BlockFilterSet *BlockFilter) {
3379 MachineBasicBlock *Fallthrough =
nullptr;
3381 BlockFrequency BBDupThreshold(scaleThreshold(BB));
3386 auto CmpSucc = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3389 auto CmpPred = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3390 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3395 auto SuccIt = Succs.begin();
3396 if (SuccIt != Succs.end()) {
3441 for (MachineBasicBlock *Pred : Preds) {
3442 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3447 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3449 if (SuccIt != Succs.end())
3455 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3456 BlockFrequency DupCost;
3457 if (SuccIt == Succs.end()) {
3459 if (Succs.size() > 0)
3460 DupCost += PredFreq;
3463 DupCost += PredFreq;
3467 assert(OrigCost >= DupCost);
3468 OrigCost -= DupCost;
3469 if (OrigCost > BBDupThreshold) {
3471 if (SuccIt != Succs.end())
3479 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3480 Candidates[0] = Candidates.
back();
3486void MachineBlockPlacement::initTailDupThreshold() {
3487 DupThreshold = BlockFrequency(0);
3488 if (
F->getFunction().hasProfileData()) {
3492 UseProfileCount =
true;
3497 BlockFrequency MaxFreq = BlockFrequency(0);
3498 for (MachineBasicBlock &
MBB : *
F) {
3499 BlockFrequency Freq = MBFI->getBlockFreq(&
MBB);
3505 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
3506 UseProfileCount =
false;
3518 if (OptLevel >= CodeGenOptLevel::Aggressive) {
3530 (OptLevel < CodeGenOptLevel::Aggressive ||
3532 TailDupSize =
TII->getTailDuplicateSize(OptLevel);
3539 auto MBFI = std::make_unique<MBFIWrapper>(
3542 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3546 .getCachedResult<ProfileSummaryAnalysis>(
3551 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3563 OS << MapClassName2PassName(
name());
3564 if (!AllowTailMerge)
3565 OS <<
"<no-tail-merge>";
3571 if (std::next(MF.
begin()) == MF.
end())
3575 OptLevel =
F->getTarget().getOptLevel();
3582 PreferredLoopExit =
nullptr;
3585 "BlockToChain map should be empty before starting placement.");
3587 "Computed Edge map should be empty before starting placement.");
3590 initTailDupThreshold();
3592 const bool OptForSize =
3597 bool UseExtTspForPerf =
false;
3598 bool UseExtTspForSize =
false;
3607 if (allowTailDupPlacement(*
F)) {
3610 const bool PreRegAlloc =
false;
3611 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3613 if (!UseExtTspForSize)
3614 precomputeTriangleChains();
3618 if (!UseExtTspForSize)
3628 if (EnableTailMerge) {
3630 BranchFolder BF(
true,
false,
3638 if (!UseExtTspForSize) {
3640 BlockToChain.
clear();
3641 ComputedEdges.
clear();
3651 if (UseExtTspForPerf || UseExtTspForSize) {
3653 !(UseExtTspForPerf && UseExtTspForSize) &&
3654 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3655 applyExtTsp(UseExtTspForSize);
3656 createCFGChainExtTsp();
3662 BlockToChain.
clear();
3663 ComputedEdges.
clear();
3672 MBFI->view(
"MBP." + MF.
getName(),
false);
3680void MachineBlockPlacement::applyExtTsp(
bool OptForSize) {
3682 DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
3684 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3685 CurrentBlockOrder.reserve(
F->size());
3686 size_t NumBlocks = 0;
3687 for (
const MachineBasicBlock &
MBB : *
F) {
3688 BlockIndex[&
MBB] = NumBlocks++;
3689 CurrentBlockOrder.push_back(&
MBB);
3692 SmallVector<uint64_t, 0> BlockCounts(
F->size());
3693 SmallVector<uint64_t, 0> BlockSizes(
F->size());
3697 for (MachineBasicBlock &
MBB : *
F) {
3699 BlockFrequency BlockFreq = MBFI->getBlockFreq(&
MBB);
3700 BlockCounts[BlockIndex[&
MBB]] = OptForSize ? 1 : BlockFreq.
getFrequency();
3709 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3710 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3715 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3726 if (FBB && FBB != FTB)
3733 const uint64_t Freq = Succs.
size() == 1 ? 110 : 100;
3734 for (
const MachineBasicBlock *Succ : Succs)
3735 JumpCounts.
push_back({BlockIndex[&
MBB], BlockIndex[Succ], Freq});
3739 BlockFrequency JumpFreq = BlockFreq * EP;
3746 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3747 <<
" with profile = " <<
F->getFunction().hasProfileData()
3748 <<
" (" <<
F->getName() <<
")" <<
"\n");
3755 std::vector<const MachineBasicBlock *> NewBlockOrder;
3756 NewBlockOrder.reserve(
F->size());
3757 for (uint64_t Node : NewOrder) {
3758 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3760 const double OptScore =
calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3764 if (OptForSize && OrgScore > OptScore)
3765 assignBlockOrder(CurrentBlockOrder);
3767 assignBlockOrder(NewBlockOrder);
3770void MachineBlockPlacement::assignBlockOrder(
3771 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3772 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3773 F->RenumberBlocks();
3775 bool HasChanges =
false;
3776 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3777 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3786 SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(
F->getNumBlockIDs());
3787 for (
auto &
MBB : *
F) {
3792 DenseMap<const MachineBasicBlock *, size_t> NewIndex;
3793 for (
const MachineBasicBlock *
MBB : NewBlockOrder) {
3794 NewIndex[
MBB] = NewIndex.
size();
3796 F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
3797 return NewIndex[&
L] < NewIndex[&
R];
3802 const TargetInstrInfo *
TII =
F->getSubtarget().getInstrInfo();
3804 for (
auto &
MBB : *
F) {
3811 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3817 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3824void MachineBlockPlacement::createCFGChainExtTsp() {
3825 BlockToChain.
clear();
3826 ComputedEdges.
clear();
3829 MachineBasicBlock *HeadBB = &
F->
front();
3830 BlockChain *FunctionChain =
3831 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3833 for (MachineBasicBlock &
MBB : *
F) {
3836 FunctionChain->merge(&
MBB,
nullptr);
3848class MachineBlockPlacementStats {
3850 const MachineBranchProbabilityInfo *MBPI;
3853 const MachineBlockFrequencyInfo *MBFI;
3856 MachineBlockPlacementStats(
const MachineBranchProbabilityInfo *MBPI,
3857 const MachineBlockFrequencyInfo *MBFI)
3858 : MBPI(MBPI), MBFI(MBFI) {}
3859 bool run(MachineFunction &MF);
3862class MachineBlockPlacementStatsLegacy :
public MachineFunctionPass {
3866 MachineBlockPlacementStatsLegacy() : MachineFunctionPass(
ID) {}
3868 bool runOnMachineFunction(MachineFunction &
F)
override {
3870 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3871 auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3872 return MachineBlockPlacementStats(MBPI, MBFI).run(
F);
3875 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3876 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
3877 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
3885char MachineBlockPlacementStatsLegacy::ID = 0;
3890 "Basic Block Placement Stats",
false,
false)
3902 MachineBlockPlacementStats(&MBPI, &MBFI).
run(MF);
3908 if (std::next(
F.begin()) ==
F.end())
3917 (
MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3919 (
MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
3922 if (
MBB.isLayoutSuccessor(Succ))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Module * getParent()
Get the module that this global value is contained inside of...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
StringRef - Represent a constant reference to a string, i.e.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
cl::opt< bool > ApplyExtTspWithoutProfile
constexpr from_range_t from_range
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > StaticLikelyProb
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
LLVM_ABI char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.