25#define DEBUG_TYPE "sample-profile-inference"
31 cl::desc(
"Try to evenly distribute flow when there are multiple equally "
36 cl::desc(
"Evenly re-distribute flow among unknown subgraphs."));
40 cl::desc(
"Join isolated components having positive flow."));
44 cl::desc(
"The cost of increasing a block's count by one."));
48 cl::desc(
"The cost of decreasing a block's count by one."));
52 cl::desc(
"The cost of increasing the entry block's count by one."));
56 cl::desc(
"The cost of decreasing the entry block's count by one."));
60 cl::desc(
"The cost of increasing a count of zero-weight block by one."));
64 cl::desc(
"The cost of increasing an unknown block's count by one."));
69static constexpr int64_t
INF = ((int64_t)1) << 50;
87 MinCostMaxFlow(
const ProfiParams &Params) : Params(Params) {}
94 Nodes = std::vector<Node>(NodeCount);
95 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
98 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
103 LLVM_DEBUG(
dbgs() <<
"Starting profi for " << Nodes.size() <<
" nodes\n");
107 size_t AugmentationIters = applyFlowAugmentation();
110 int64_t TotalCost = 0;
111 int64_t TotalFlow = 0;
112 for (
uint64_t Src = 0; Src < Nodes.size(); Src++) {
113 for (
auto &Edge : Edges[Src]) {
115 TotalCost += Edge.Cost * Edge.Flow;
117 TotalFlow += Edge.Flow;
121 LLVM_DEBUG(
dbgs() <<
"Completed profi after " << AugmentationIters
122 <<
" iterations with " << TotalFlow <<
" total flow"
123 <<
" of " << TotalCost <<
" cost\n");
125 (void)AugmentationIters;
133 assert(Capacity > 0 &&
"adding an edge of zero capacity");
134 assert(Src != Dst &&
"loop edge are not supported");
139 SrcEdge.Capacity = Capacity;
141 SrcEdge.RevEdgeIndex = Edges[Dst].size();
145 DstEdge.Cost = -
Cost;
146 DstEdge.Capacity = 0;
148 DstEdge.RevEdgeIndex = Edges[Src].size();
150 Edges[Src].push_back(SrcEdge);
151 Edges[Dst].push_back(DstEdge);
161 const std::vector<std::pair<uint64_t, int64_t>> getFlow(
uint64_t Src)
const {
162 std::vector<std::pair<uint64_t, int64_t>>
Flow;
163 for (
const auto &Edge : Edges[Src]) {
165 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
173 for (
const auto &Edge : Edges[Src]) {
174 if (Edge.Dst == Dst) {
184 size_t applyFlowAugmentation() {
185 size_t AugmentationIters = 0;
186 while (findAugmentingPath()) {
187 uint64_t PathCapacity = computeAugmentingPathCapacity();
188 while (PathCapacity > 0) {
189 bool Progress =
false;
192 identifyShortestEdges(PathCapacity);
195 auto AugmentingOrder = findAugmentingDAG();
198 Progress = augmentFlowAlongDAG(AugmentingOrder);
199 PathCapacity = computeAugmentingPathCapacity();
203 augmentFlowAlongPath(PathCapacity);
210 return AugmentationIters;
215 uint64_t computeAugmentingPathCapacity() {
218 while (Now != Source) {
219 uint64_t Pred = Nodes[Now].ParentNode;
220 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
222 assert(Edge.Capacity >= Edge.Flow &&
"incorrect edge flow");
224 PathCapacity = std::min(PathCapacity, EdgeCapacity);
232 bool findAugmentingPath() {
234 for (
auto &
Node : Nodes) {
241 std::queue<uint64_t> Queue;
243 Nodes[Source].Distance = 0;
244 Nodes[Source].Taken =
true;
245 while (!Queue.empty()) {
248 Nodes[Src].Taken =
false;
265 if (Nodes[Src].Distance > Nodes[
Target].Distance)
269 for (
uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
270 auto &Edge = Edges[Src][EdgeIdx];
271 if (Edge.Flow < Edge.Capacity) {
273 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
274 if (Nodes[Dst].Distance > NewDistance) {
276 Nodes[Dst].Distance = NewDistance;
277 Nodes[Dst].ParentNode = Src;
278 Nodes[Dst].ParentEdgeIndex = EdgeIdx;
280 if (!Nodes[Dst].Taken) {
282 Nodes[Dst].Taken =
true;
293 void augmentFlowAlongPath(
uint64_t PathCapacity) {
294 assert(PathCapacity > 0 &&
"found an incorrect augmenting path");
296 while (Now != Source) {
297 uint64_t Pred = Nodes[Now].ParentNode;
298 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
299 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
301 Edge.Flow += PathCapacity;
302 RevEdge.Flow -= PathCapacity;
315 std::vector<uint64_t> findAugmentingDAG() {
321 typedef std::pair<uint64_t, uint64_t> StackItemType;
322 std::stack<StackItemType> Stack;
323 std::vector<uint64_t> AugmentingOrder;
326 for (
auto &
Node : Nodes) {
335 Nodes[
Target].Taken =
true;
338 Stack.emplace(Source, 0);
339 Nodes[Source].Discovery = ++Time;
340 while (!Stack.empty()) {
341 auto NodeIdx = Stack.top().first;
342 auto EdgeIdx = Stack.top().second;
345 if (EdgeIdx < Edges[NodeIdx].
size()) {
346 auto &Edge = Edges[NodeIdx][EdgeIdx];
347 auto &Dst = Nodes[Edge.Dst];
348 Stack.top().second++;
350 if (Edge.OnShortestPath) {
352 if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
353 Dst.Discovery = ++Time;
354 Stack.emplace(Edge.Dst, 0);
356 }
else if (Dst.Taken && Dst.Finish != 0) {
358 Nodes[NodeIdx].Taken =
true;
365 if (!Nodes[NodeIdx].Taken) {
366 Nodes[NodeIdx].Discovery = 0;
370 Nodes[NodeIdx].Finish = ++Time;
372 if (NodeIdx != Source) {
373 assert(!Stack.empty() &&
"empty stack while running dfs");
374 Nodes[Stack.top().first].Taken =
true;
376 AugmentingOrder.push_back(NodeIdx);
381 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
384 for (
size_t Src : AugmentingOrder) {
385 AugmentingEdges[Src].clear();
386 for (
auto &Edge : Edges[Src]) {
388 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
389 Nodes[Dst].Finish < Nodes[Src].Finish) {
390 AugmentingEdges[Src].push_back(&Edge);
393 assert((Src ==
Target || !AugmentingEdges[Src].empty()) &&
394 "incorrectly constructed augmenting edges");
397 return AugmentingOrder;
404 bool augmentFlowAlongDAG(
const std::vector<uint64_t> &AugmentingOrder) {
406 for (
uint64_t Src : AugmentingOrder) {
407 Nodes[Src].FracFlow = 0;
408 Nodes[Src].IntFlow = 0;
409 for (
auto &Edge : AugmentingEdges[Src]) {
410 Edge->AugmentedFlow = 0;
416 Nodes[Source].FracFlow = 1.0;
417 for (
uint64_t Src : AugmentingOrder) {
419 "incorrectly computed fractional flow");
421 uint64_t Degree = AugmentingEdges[Src].size();
422 for (
auto &Edge : AugmentingEdges[Src]) {
423 double EdgeFlow = Nodes[Src].FracFlow / Degree;
424 Nodes[Edge->Dst].FracFlow += EdgeFlow;
425 if (Edge->Capacity ==
INF)
427 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
428 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
432 if (MaxFlowAmount == 0)
436 Nodes[Source].IntFlow = MaxFlowAmount;
437 for (
uint64_t Src : AugmentingOrder) {
442 uint64_t Degree = AugmentingEdges[Src].size();
444 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
445 for (
auto &Edge : AugmentingEdges[Src]) {
447 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
448 EdgeFlow = std::min(EdgeFlow,
uint64_t(Edge->Capacity - Edge->Flow));
449 Nodes[Dst].IntFlow += EdgeFlow;
450 Nodes[Src].IntFlow -= EdgeFlow;
451 Edge->AugmentedFlow += EdgeFlow;
455 Nodes[
Target].IntFlow = 0;
460 for (
size_t Idx = AugmentingOrder.size() - 1;
Idx > 0;
Idx--) {
464 for (
auto &Edge : AugmentingEdges[Src]) {
466 if (Nodes[Dst].IntFlow == 0)
468 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
469 Nodes[Dst].IntFlow -= EdgeFlow;
470 Nodes[Src].IntFlow += EdgeFlow;
471 Edge->AugmentedFlow -= EdgeFlow;
476 bool HasSaturatedEdges =
false;
477 for (
uint64_t Src : AugmentingOrder) {
479 assert(Src == Source || Nodes[Src].IntFlow == 0);
480 for (
auto &Edge : AugmentingEdges[Src]) {
481 assert(
uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
483 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
484 Edge->Flow += Edge->AugmentedFlow;
485 RevEdge.Flow -= Edge->AugmentedFlow;
486 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
487 HasSaturatedEdges =
true;
492 return HasSaturatedEdges;
496 void identifyShortestEdges(
uint64_t PathCapacity) {
497 assert(PathCapacity > 0 &&
"found an incorrect augmenting DAG");
505 for (
size_t Src = 0; Src < Nodes.size(); Src++) {
507 if (Nodes[Src].Distance > Nodes[
Target].Distance)
510 for (
auto &Edge : Edges[Src]) {
512 Edge.OnShortestPath =
513 Src !=
Target && Dst != Source &&
514 Nodes[Dst].Distance <= Nodes[
Target].Distance &&
515 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
516 Edge.Capacity > Edge.Flow &&
517 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
523 static constexpr uint64_t MaxDfsCalls = 10;
570 std::vector<Node> Nodes;
572 std::vector<std::vector<Edge>> Edges;
578 std::vector<std::vector<Edge *>> AugmentingEdges;
602 : Params(Params), Func(Func) {}
608 joinIsolatedComponents();
613 rebalanceUnknownSubgraphs();
618 void joinIsolatedComponents() {
620 auto Visited =
BitVector(NumBlocks(),
false);
621 findReachable(Func.Entry, Visited);
625 auto &Block = Func.Blocks[
I];
626 if (Block.Flow > 0 && !Visited[
I]) {
628 auto Path = findShortestPath(
I);
630 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
631 "incorrectly computed path adjusting control flow");
632 Func.Blocks[Func.Entry].Flow += 1;
633 for (
auto &Jump : Path) {
635 Func.Blocks[Jump->Target].Flow += 1;
637 findReachable(Jump->Target, Visited);
648 std::queue<uint64_t> Queue;
651 while (!Queue.empty()) {
654 for (
auto *Jump : Func.Blocks[Src].SuccJumps) {
656 if (Jump->Flow > 0 && !Visited[Dst]) {
666 std::vector<FlowJump *> findShortestPath(
uint64_t BlockIdx) {
668 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
670 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
673 std::vector<FlowJump *> Result;
674 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
675 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
685 return std::vector<FlowJump *>();
686 if (Func.Blocks[Source].isExit() &&
Target == AnyExitBlock)
687 return std::vector<FlowJump *>();
690 auto Distance = std::vector<int64_t>(NumBlocks(),
INF);
691 auto Parent = std::vector<FlowJump *>(NumBlocks(),
nullptr);
692 Distance[Source] = 0;
693 std::set<std::pair<uint64_t, uint64_t>> Queue;
694 Queue.insert(std::make_pair(Distance[Source], Source));
697 while (!Queue.empty()) {
698 uint64_t Src = Queue.begin()->second;
699 Queue.erase(Queue.begin());
702 (Func.Blocks[Src].isExit() &&
Target == AnyExitBlock))
705 for (
auto *Jump : Func.Blocks[Src].SuccJumps) {
707 int64_t JumpDist = jumpDistance(Jump);
708 if (Distance[Dst] > Distance[Src] + JumpDist) {
709 Queue.erase(std::make_pair(Distance[Dst], Dst));
711 Distance[Dst] = Distance[Src] + JumpDist;
714 Queue.insert(std::make_pair(Distance[Dst], Dst));
719 if (
Target == AnyExitBlock) {
721 if (Func.Blocks[
I].isExit() && Parent[
I] !=
nullptr) {
722 if (
Target == AnyExitBlock || Distance[
Target] > Distance[
I]) {
728 assert(Parent[
Target] !=
nullptr &&
"a path does not exist");
731 std::vector<FlowJump *> Result;
733 while (Now != Source) {
734 assert(Now == Parent[Now]->
Target &&
"incorrect parent jump");
735 Result.push_back(Parent[Now]);
736 Now = Parent[Now]->Source;
739 std::reverse(Result.begin(), Result.end());
752 int64_t jumpDistance(
FlowJump *Jump)
const {
756 std::max(FlowAdjuster::MinBaseDistance,
757 std::min(Func.Blocks[Func.Entry].Flow,
760 return BaseDistance + BaseDistance / Jump->
Flow;
761 return 2 * BaseDistance * (NumBlocks() + 1);
764 uint64_t NumBlocks()
const {
return Func.Blocks.size(); }
770 void rebalanceUnknownSubgraphs() {
772 for (
const FlowBlock &SrcBlock : Func.Blocks) {
774 if (!canRebalanceAtRoot(&SrcBlock))
779 std::vector<FlowBlock *> UnknownBlocks;
780 std::vector<FlowBlock *> KnownDstBlocks;
781 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
786 if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
791 if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
795 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
800 bool canRebalanceAtRoot(
const FlowBlock *SrcBlock) {
807 bool HasUnknownSuccs =
false;
809 if (Func.Blocks[Jump->
Target].HasUnknownWeight) {
810 HasUnknownSuccs =
true;
814 if (!HasUnknownSuccs)
822 void findUnknownSubgraph(
const FlowBlock *SrcBlock,
823 std::vector<FlowBlock *> &KnownDstBlocks,
824 std::vector<FlowBlock *> &UnknownBlocks) {
827 auto Visited =
BitVector(NumBlocks(),
false);
828 std::queue<uint64_t> Queue;
830 Queue.push(SrcBlock->
Index);
831 Visited[SrcBlock->
Index] =
true;
832 while (!Queue.empty()) {
833 auto &Block = Func.Blocks[Queue.front()];
836 for (
auto *Jump : Block.SuccJumps) {
838 if (ignoreJump(SrcBlock,
nullptr, Jump))
847 if (!Func.Blocks[Dst].HasUnknownWeight) {
848 KnownDstBlocks.
push_back(&Func.Blocks[Dst]);
851 UnknownBlocks.push_back(&Func.Blocks[Dst]);
859 bool canRebalanceSubgraph(
const FlowBlock *SrcBlock,
860 const std::vector<FlowBlock *> &KnownDstBlocks,
861 const std::vector<FlowBlock *> &UnknownBlocks,
864 if (UnknownBlocks.empty())
868 if (KnownDstBlocks.size() > 1)
870 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
873 for (
auto *Block : UnknownBlocks) {
874 if (Block->SuccJumps.empty()) {
876 if (DstBlock !=
nullptr)
880 size_t NumIgnoredJumps = 0;
881 for (
auto *Jump : Block->SuccJumps) {
882 if (ignoreJump(SrcBlock, DstBlock, Jump))
887 if (NumIgnoredJumps == Block->SuccJumps.size())
902 auto JumpSource = &Func.Blocks[Jump->
Source];
903 auto JumpTarget = &Func.Blocks[Jump->
Target];
906 if (DstBlock !=
nullptr && JumpTarget == DstBlock)
910 if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
914 if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
923 std::vector<FlowBlock *> &UnknownBlocks) {
925 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
926 auto fillInDegree = [&](
const FlowBlock *Block) {
927 for (
auto *Jump : Block->SuccJumps) {
928 if (ignoreJump(SrcBlock, DstBlock, Jump))
930 LocalInDegree[Jump->
Target]++;
933 fillInDegree(SrcBlock);
934 for (
auto *Block : UnknownBlocks) {
938 if (LocalInDegree[SrcBlock->
Index] > 0)
941 std::vector<FlowBlock *> AcyclicOrder;
942 std::queue<uint64_t> Queue;
943 Queue.push(SrcBlock->
Index);
944 while (!Queue.empty()) {
945 FlowBlock *Block = &Func.Blocks[Queue.front()];
948 if (DstBlock !=
nullptr && Block == DstBlock)
952 if (Block->HasUnknownWeight && Block != SrcBlock)
953 AcyclicOrder.push_back(Block);
956 for (
auto *Jump : Block->SuccJumps) {
957 if (ignoreJump(SrcBlock, DstBlock, Jump))
960 LocalInDegree[Dst]--;
961 if (LocalInDegree[Dst] == 0) {
969 if (UnknownBlocks.size() != AcyclicOrder.size())
971 UnknownBlocks = AcyclicOrder;
977 void rebalanceUnknownSubgraph(
const FlowBlock *SrcBlock,
979 const std::vector<FlowBlock *> &UnknownBlocks) {
980 assert(SrcBlock->
Flow > 0 &&
"zero-flow block in unknown subgraph");
986 if (ignoreJump(SrcBlock, DstBlock, Jump))
988 BlockFlow += Jump->
Flow;
990 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
993 for (
auto *Block : UnknownBlocks) {
994 assert(Block->HasUnknownWeight &&
"incorrect unknown subgraph");
997 for (
auto *Jump : Block->PredJumps) {
998 BlockFlow += Jump->
Flow;
1000 Block->Flow = BlockFlow;
1001 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
1010 size_t BlockDegree = 0;
1011 for (
auto *Jump : Block->SuccJumps) {
1012 if (ignoreJump(SrcBlock, DstBlock, Jump))
1017 if (DstBlock ==
nullptr && BlockDegree == 0)
1019 assert(BlockDegree > 0 &&
"all outgoing jumps are ignored");
1023 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1024 for (
auto *Jump : Block->SuccJumps) {
1025 if (ignoreJump(SrcBlock, DstBlock, Jump))
1031 assert(BlockFlow == 0 &&
"not all flow is propagated");
1037 static constexpr uint64_t MinBaseDistance = 10000;
1045std::pair<int64_t, int64_t> assignBlockCosts(
const ProfiParams &Params,
1047std::pair<int64_t, int64_t> assignJumpCosts(
const ProfiParams &Params,
1055void initializeNetwork(
const ProfiParams &Params, MinCostMaxFlow &Network,
1057 uint64_t NumBlocks = Func.Blocks.size();
1058 assert(NumBlocks > 1 &&
"Too few blocks in a function");
1059 uint64_t NumJumps = Func.Jumps.size();
1060 assert(NumJumps > 0 &&
"Too few jumps in a function");
1071 Network.initialize(2 * NumBlocks + 4, S1,
T1);
1075 auto &Block = Func.Blocks[
B];
1083 if (Block.isEntry()) {
1084 Network.addEdge(S,
Bin, 0);
1085 }
else if (Block.isExit()) {
1086 Network.addEdge(Bout,
T, 0);
1090 auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
1093 Network.addEdge(
Bin, Bout, AuxCostInc);
1094 if (Block.Weight > 0) {
1095 Network.addEdge(Bout,
Bin, Block.Weight, AuxCostDec);
1096 Network.addEdge(S1, Bout, Block.Weight, 0);
1097 Network.addEdge(
Bin,
T1, Block.Weight, 0);
1102 for (
uint64_t J = 0; J < NumJumps; J++) {
1103 auto &Jump = Func.Jumps[J];
1110 auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1113 Network.addEdge(Jin, Jout, AuxCostInc);
1115 Network.addEdge(Jout, Jin, Jump.
Weight, AuxCostDec);
1116 Network.addEdge(S1, Jout, Jump.
Weight, 0);
1117 Network.addEdge(Jin,
T1, Jump.
Weight, 0);
1122 Network.addEdge(
T, S, 0);
1126std::pair<int64_t, int64_t> assignBlockCosts(
const ProfiParams &Params,
1129 if (Block.IsUnlikely)
1136 if (Block.HasUnknownWeight) {
1142 if (Block.Weight == 0)
1145 if (Block.isEntry()) {
1150 return std::make_pair(CostInc, CostDec);
1154std::pair<int64_t, int64_t> assignJumpCosts(
const ProfiParams &Params,
1177 assert(Jump.
Weight > 0 &&
"found zero-weight jump with a positive weight");
1179 return std::make_pair(CostInc, CostDec);
1183void extractWeights(
const ProfiParams &Params, MinCostMaxFlow &Network,
1185 uint64_t NumBlocks = Func.Blocks.size();
1186 uint64_t NumJumps = Func.Jumps.size();
1189 for (
uint64_t J = 0; J < NumJumps; J++) {
1190 auto &Jump = Func.Jumps[J];
1195 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1199 Flow = int64_t(Jump.
Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1206 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1207 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1208 for (
auto &Jump : Func.Jumps) {
1213 auto &Block = Func.Blocks[
B];
1214 Block.Flow = std::max(OutFlow[
B], InFlow[
B]);
1222 assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1223 for (
size_t I = 1;
I < Func.Blocks.size();
I++) {
1224 assert(!Func.Blocks[
I].isEntry() &&
"multiple entry blocks");
1227 for (
auto &Block : Func.Blocks) {
1228 assert((!Block.isEntry() || !Block.isExit()) &&
1229 "a block cannot be an entry and an exit");
1232 for (
auto &Block : Func.Blocks) {
1233 assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1234 "non-zero weight of a block w/o weight except for an entry");
1237 for (
auto &Jump : Func.Jumps) {
1239 "non-zero weight of a jump w/o weight");
1245 const uint64_t NumBlocks = Func.Blocks.size();
1246 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1247 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1248 for (
const auto &Jump : Func.Jumps) {
1256 auto &Block = Func.Blocks[
I];
1257 if (Block.isEntry()) {
1258 TotalInFlow += Block.Flow;
1259 assert(Block.Flow == OutFlow[
I] &&
"incorrectly computed control flow");
1260 }
else if (Block.isExit()) {
1261 TotalOutFlow += Block.Flow;
1262 assert(Block.Flow == InFlow[
I] &&
"incorrectly computed control flow");
1264 assert(Block.Flow == OutFlow[
I] &&
"incorrectly computed control flow");
1265 assert(Block.Flow == InFlow[
I] &&
"incorrectly computed control flow");
1268 assert(TotalInFlow == TotalOutFlow &&
"incorrectly computed control flow");
1273 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1274 for (
const auto &Jump : Func.Jumps) {
1275 if (Jump.
Flow > 0) {
1281 std::queue<uint64_t> Queue;
1282 auto Visited =
BitVector(NumBlocks,
false);
1283 Queue.push(Func.Entry);
1284 Visited[Func.Entry] =
true;
1285 while (!Queue.empty()) {
1288 for (
uint64_t Dst : PositiveFlowEdges[Src]) {
1289 if (!Visited[Dst]) {
1291 Visited[Dst] =
true;
1299 auto &Block = Func.Blocks[
I];
1300 assert((Visited[
I] || Block.Flow == 0) &&
"an isolated flow component");
1315 auto InferenceNetwork = MinCostMaxFlow(Params);
1316 initializeNetwork(Params, InferenceNetwork, Func);
1317 InferenceNetwork.run();
1320 extractWeights(Params, InferenceNetwork, Func);
1323 auto Adjuster = FlowAdjuster(Params, Func);
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
return ToRemove size() > 0
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the profile inference algorithm, profi.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target - Wrapper for Target specific information.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)
Apply the profile inference algorithm for a given function.
A wrapper of a binary basic block.
std::vector< FlowJump * > SuccJumps
A wrapper of binary function with basic blocks and jumps.
A wrapper of a jump between two basic blocks.
Various thresholds and options controlling the behavior of the profile inference algorithm.
unsigned CostJumpUnknownFTInc
The cost of increasing an unknown fall-through jump's count by one.
unsigned CostBlockInc
The cost of increasing a block's count by one.
unsigned CostJumpFTInc
The cost of increasing a fall-through jump's count by one.
bool RebalanceUnknown
Evenly re-distribute flow among unknown subgraphs.
const int64_t CostUnlikely
The cost of taking an unlikely block/jump.
unsigned CostJumpDec
The cost of decreasing a jump's count by one.
bool JoinIslands
Join isolated components having positive flow.
unsigned CostBlockZeroInc
The cost of increasing a count of zero-weight block by one.
unsigned CostBlockEntryDec
The cost of decreasing the entry block's count by one.
unsigned CostJumpInc
The cost of increasing a jump's count by one.
unsigned CostJumpUnknownInc
The cost of increasing an unknown jump's count by one.
unsigned CostBlockUnknownInc
The cost of increasing an unknown block's count by one.
unsigned CostJumpFTDec
The cost of decreasing a fall-through jump's count by one.
unsigned CostBlockDec
The cost of decreasing a block's count by one.
unsigned CostBlockEntryInc
The cost of increasing the entry block's count by one.
bool EvenFlowDistribution
Evenly distribute flow when there are multiple equally likely options.