LLVM  15.0.0git
SampleProfileInference.cpp
Go to the documentation of this file.
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/BitVector.h"
19 #include "llvm/Support/Debug.h"
20 #include <queue>
21 #include <set>
22 #include <stack>
23 
24 using namespace llvm;
25 #define DEBUG_TYPE "sample-profile-inference"
26 
27 namespace {
28 
29 static cl::opt<bool> SampleProfileEvenCountDistribution(
30  "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
31  cl::desc("Try to evenly distribute counts when there are multiple equally "
32  "likely options."));
33 
34 static cl::opt<unsigned> SampleProfileMaxDfsCalls(
35  "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
36  cl::desc("Maximum number of dfs iterations for even count distribution."));
37 
38 static cl::opt<unsigned> SampleProfileProfiCostInc(
39  "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
40  cl::desc("A cost of increasing a block's count by one."));
41 
42 static cl::opt<unsigned> SampleProfileProfiCostDec(
43  "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
44  cl::desc("A cost of decreasing a block's count by one."));
45 
46 static cl::opt<unsigned> SampleProfileProfiCostIncZero(
47  "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
48  cl::desc("A cost of increasing a count of zero-weight block by one."));
49 
50 static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
51  "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
52  cl::desc("A cost of increasing the entry block's count by one."));
53 
54 static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
55  "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
56  cl::desc("A cost of decreasing the entry block's count by one."));
57 
58 /// A value indicating an infinite flow/capacity/weight of a block/edge.
59 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
60 /// during the execution.
61 static constexpr int64_t INF = ((int64_t)1) << 50;
62 
63 /// The minimum-cost maximum flow algorithm.
64 ///
65 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
66 /// network using a modified version of the classical Moore-Bellman-Ford
67 /// approach. The algorithm applies a number of augmentation iterations in which
68 /// flow is sent along paths of positive capacity from the source to the sink.
69 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
70 /// where m is the number of edges, n is the number of vertices, and v(f) is the
71 /// value of the maximum flow. However, the observed running time on typical
72 /// instances is sub-quadratic, that is, o(n^2).
73 ///
74 /// The input is a set of edges with specified costs and capacities, and a pair
75 /// of nodes (source and sink). The output is the flow along each edge of the
76 /// minimum total cost respecting the given edge capacities.
77 class MinCostMaxFlow {
78 public:
79  // Initialize algorithm's data structures for a network of a given size.
80  void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
81  Source = SourceNode;
82  Target = SinkNode;
83 
84  Nodes = std::vector<Node>(NodeCount);
85  Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
86  if (SampleProfileEvenCountDistribution)
87  AugmentingEdges =
88  std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
89  }
90 
91  // Run the algorithm.
92  int64_t run() {
93  // Iteratively find an augmentation path/dag in the network and send the
94  // flow along its edges
95  size_t AugmentationIters = applyFlowAugmentation();
96 
97  // Compute the total flow and its cost
98  int64_t TotalCost = 0;
99  int64_t TotalFlow = 0;
100  for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
101  for (auto &Edge : Edges[Src]) {
102  if (Edge.Flow > 0) {
103  TotalCost += Edge.Cost * Edge.Flow;
104  if (Src == Source)
105  TotalFlow += Edge.Flow;
106  }
107  }
108  }
109  LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
110  << " iterations with " << TotalFlow << " total flow"
111  << " of " << TotalCost << " cost\n");
112  (void)TotalFlow;
113  (void)AugmentationIters;
114  return TotalCost;
115  }
116 
117  /// Adding an edge to the network with a specified capacity and a cost.
118  /// Multiple edges between a pair of nodes are allowed but self-edges
119  /// are not supported.
120  void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
121  assert(Capacity > 0 && "adding an edge of zero capacity");
122  assert(Src != Dst && "loop edge are not supported");
123 
124  Edge SrcEdge;
125  SrcEdge.Dst = Dst;
126  SrcEdge.Cost = Cost;
127  SrcEdge.Capacity = Capacity;
128  SrcEdge.Flow = 0;
129  SrcEdge.RevEdgeIndex = Edges[Dst].size();
130 
131  Edge DstEdge;
132  DstEdge.Dst = Src;
133  DstEdge.Cost = -Cost;
134  DstEdge.Capacity = 0;
135  DstEdge.Flow = 0;
136  DstEdge.RevEdgeIndex = Edges[Src].size();
137 
138  Edges[Src].push_back(SrcEdge);
139  Edges[Dst].push_back(DstEdge);
140  }
141 
142  /// Adding an edge to the network of infinite capacity and a given cost.
143  void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
144  addEdge(Src, Dst, INF, Cost);
145  }
146 
147  /// Get the total flow from a given source node.
148  /// Returns a list of pairs (target node, amount of flow to the target).
149  const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
150  std::vector<std::pair<uint64_t, int64_t>> Flow;
151  for (auto &Edge : Edges[Src]) {
152  if (Edge.Flow > 0)
153  Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
154  }
155  return Flow;
156  }
157 
158  /// Get the total flow between a pair of nodes.
159  int64_t getFlow(uint64_t Src, uint64_t Dst) const {
160  int64_t Flow = 0;
161  for (auto &Edge : Edges[Src]) {
162  if (Edge.Dst == Dst) {
163  Flow += Edge.Flow;
164  }
165  }
166  return Flow;
167  }
168 
169  /// A cost of taking an unlikely jump.
170  static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
171  /// Minimum BaseDistance for the jump distance values in island joining.
172  static constexpr uint64_t MinBaseDistance = 10000;
173 
174 private:
175  /// Iteratively find an augmentation path/dag in the network and send the
176  /// flow along its edges. The method returns the number of applied iterations.
177  size_t applyFlowAugmentation() {
178  size_t AugmentationIters = 0;
179  while (findAugmentingPath()) {
180  uint64_t PathCapacity = computeAugmentingPathCapacity();
181  while (PathCapacity > 0) {
182  bool Progress = false;
183  if (SampleProfileEvenCountDistribution) {
184  // Identify node/edge candidates for augmentation
185  identifyShortestEdges(PathCapacity);
186 
187  // Find an augmenting DAG
188  auto AugmentingOrder = findAugmentingDAG();
189 
190  // Apply the DAG augmentation
191  Progress = augmentFlowAlongDAG(AugmentingOrder);
192  PathCapacity = computeAugmentingPathCapacity();
193  }
194 
195  if (!Progress) {
196  augmentFlowAlongPath(PathCapacity);
197  PathCapacity = 0;
198  }
199 
200  AugmentationIters++;
201  }
202  }
203  return AugmentationIters;
204  }
205 
206  /// Compute the capacity of the cannonical augmenting path. If the path is
207  /// saturated (that is, no flow can be sent along the path), then return 0.
208  uint64_t computeAugmentingPathCapacity() {
209  uint64_t PathCapacity = INF;
210  uint64_t Now = Target;
211  while (Now != Source) {
212  uint64_t Pred = Nodes[Now].ParentNode;
213  auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
214 
215  assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
216  uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
217  PathCapacity = std::min(PathCapacity, EdgeCapacity);
218 
219  Now = Pred;
220  }
221  return PathCapacity;
222  }
223 
224  /// Check for existence of an augmenting path with a positive capacity.
225  bool findAugmentingPath() {
226  // Initialize data structures
227  for (auto &Node : Nodes) {
228  Node.Distance = INF;
229  Node.ParentNode = uint64_t(-1);
230  Node.ParentEdgeIndex = uint64_t(-1);
231  Node.Taken = false;
232  }
233 
234  std::queue<uint64_t> Queue;
235  Queue.push(Source);
236  Nodes[Source].Distance = 0;
237  Nodes[Source].Taken = true;
238  while (!Queue.empty()) {
239  uint64_t Src = Queue.front();
240  Queue.pop();
241  Nodes[Src].Taken = false;
242  // Although the residual network contains edges with negative costs
243  // (in particular, backward edges), it can be shown that there are no
244  // negative-weight cycles and the following two invariants are maintained:
245  // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
246  // where Dist is the length of the shortest path between two nodes. This
247  // allows to prune the search-space of the path-finding algorithm using
248  // the following early-stop criteria:
249  // -- If we find a path with zero-distance from Source to Target, stop the
250  // search, as the path is the shortest since Dist[Source, Target] >= 0;
251  // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
252  // process node V, as it is guaranteed _not_ to be on a shortest path
253  // from Source to Target; it follows from inequalities
254  // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
255  // >= Dist[Source, V]
256  if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
257  break;
258  if (Nodes[Src].Distance > Nodes[Target].Distance)
259  continue;
260 
261  // Process adjacent edges
262  for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
263  auto &Edge = Edges[Src][EdgeIdx];
264  if (Edge.Flow < Edge.Capacity) {
265  uint64_t Dst = Edge.Dst;
266  int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
267  if (Nodes[Dst].Distance > NewDistance) {
268  // Update the distance and the parent node/edge
269  Nodes[Dst].Distance = NewDistance;
270  Nodes[Dst].ParentNode = Src;
271  Nodes[Dst].ParentEdgeIndex = EdgeIdx;
272  // Add the node to the queue, if it is not there yet
273  if (!Nodes[Dst].Taken) {
274  Queue.push(Dst);
275  Nodes[Dst].Taken = true;
276  }
277  }
278  }
279  }
280  }
281 
282  return Nodes[Target].Distance != INF;
283  }
284 
285  /// Update the current flow along the augmenting path.
286  void augmentFlowAlongPath(uint64_t PathCapacity) {
287  assert(PathCapacity > 0 && "found an incorrect augmenting path");
288  uint64_t Now = Target;
289  while (Now != Source) {
290  uint64_t Pred = Nodes[Now].ParentNode;
291  auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
292  auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
293 
294  Edge.Flow += PathCapacity;
295  RevEdge.Flow -= PathCapacity;
296 
297  Now = Pred;
298  }
299  }
300 
301  /// Find an Augmenting DAG order using a modified version of DFS in which we
302  /// can visit a node multiple times. In the DFS search, when scanning each
303  /// edge out of a node, continue search at Edge.Dst endpoint if it has not
304  /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
305  /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
306  /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
307  /// that starts with Source and ends with Target.
308  std::vector<uint64_t> findAugmentingDAG() {
309  // We use a stack based implemenation of DFS to avoid recursion.
310  // Defining DFS data structures:
311  // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
312  // - we are currently visiting Nodes[NodeIdx] and
313  // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
314  typedef std::pair<uint64_t, uint64_t> StackItemType;
315  std::stack<StackItemType> Stack;
316  std::vector<uint64_t> AugmentingOrder;
317 
318  // Phase 0: Initialize Node attributes and Time for DFS run
319  for (auto &Node : Nodes) {
320  Node.Discovery = 0;
321  Node.Finish = 0;
322  Node.NumCalls = 0;
323  Node.Taken = false;
324  }
325  uint64_t Time = 0;
326  // Mark Target as Taken
327  // Taken attribute will be propagated backwards from Target towards Source
328  Nodes[Target].Taken = true;
329 
330  // Phase 1: Start DFS traversal from Source
331  Stack.emplace(Source, 0);
332  Nodes[Source].Discovery = ++Time;
333  while (!Stack.empty()) {
334  auto NodeIdx = Stack.top().first;
335  auto EdgeIdx = Stack.top().second;
336 
337  // If we haven't scanned all edges out of NodeIdx, continue scanning
338  if (EdgeIdx < Edges[NodeIdx].size()) {
339  auto &Edge = Edges[NodeIdx][EdgeIdx];
340  auto &Dst = Nodes[Edge.Dst];
341  Stack.top().second++;
342 
343  if (Edge.OnShortestPath) {
344  // If we haven't seen Edge.Dst so far, continue DFS search there
345  if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
346  Dst.Discovery = ++Time;
347  Stack.emplace(Edge.Dst, 0);
348  Dst.NumCalls++;
349  } else if (Dst.Taken && Dst.Finish != 0) {
350  // Else, if Edge.Dst already have a path to Target, so that NodeIdx
351  Nodes[NodeIdx].Taken = true;
352  }
353  }
354  } else {
355  // If we are done scanning all edge out of NodeIdx
356  Stack.pop();
357  // If we haven't found a path from NodeIdx to Target, forget about it
358  if (!Nodes[NodeIdx].Taken) {
359  Nodes[NodeIdx].Discovery = 0;
360  } else {
361  // If we have found a path from NodeIdx to Target, then finish NodeIdx
362  // and propagate Taken flag to DFS parent unless at the Source
363  Nodes[NodeIdx].Finish = ++Time;
364  // NodeIdx == Source if and only if the stack is empty
365  if (NodeIdx != Source) {
366  assert(!Stack.empty() && "empty stack while running dfs");
367  Nodes[Stack.top().first].Taken = true;
368  }
369  AugmentingOrder.push_back(NodeIdx);
370  }
371  }
372  }
373  // Nodes are collected decreasing Finish time, so the order is reversed
374  std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
375 
376  // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
377  for (size_t Src : AugmentingOrder) {
378  AugmentingEdges[Src].clear();
379  for (auto &Edge : Edges[Src]) {
380  uint64_t Dst = Edge.Dst;
381  if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
382  Nodes[Dst].Finish < Nodes[Src].Finish) {
383  AugmentingEdges[Src].push_back(&Edge);
384  }
385  }
386  assert((Src == Target || !AugmentingEdges[Src].empty()) &&
387  "incorrectly constructed augmenting edges");
388  }
389 
390  return AugmentingOrder;
391  }
392 
393  /// Update the current flow along the given (acyclic) subgraph specified by
394  /// the vertex order, AugmentingOrder. The objective is to send as much flow
395  /// as possible while evenly distributing flow among successors of each node.
396  /// After the update at least one edge is saturated.
397  bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
398  // Phase 0: Initialization
399  for (uint64_t Src : AugmentingOrder) {
400  Nodes[Src].FracFlow = 0;
401  Nodes[Src].IntFlow = 0;
402  for (auto &Edge : AugmentingEdges[Src]) {
403  Edge->AugmentedFlow = 0;
404  }
405  }
406 
407  // Phase 1: Send a unit of fractional flow along the DAG
408  uint64_t MaxFlowAmount = INF;
409  Nodes[Source].FracFlow = 1.0;
410  for (uint64_t Src : AugmentingOrder) {
411  assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
412  "incorrectly computed fractional flow");
413  // Distribute flow evenly among successors of Src
414  uint64_t Degree = AugmentingEdges[Src].size();
415  for (auto &Edge : AugmentingEdges[Src]) {
416  double EdgeFlow = Nodes[Src].FracFlow / Degree;
417  Nodes[Edge->Dst].FracFlow += EdgeFlow;
418  if (Edge->Capacity == INF)
419  continue;
420  uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
421  MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
422  }
423  }
424  // Stop early if we cannot send any (integral) flow from Source to Target
425  if (MaxFlowAmount == 0)
426  return false;
427 
428  // Phase 2: Send an integral flow of MaxFlowAmount
429  Nodes[Source].IntFlow = MaxFlowAmount;
430  for (uint64_t Src : AugmentingOrder) {
431  if (Src == Target)
432  break;
433  // Distribute flow evenly among successors of Src, rounding up to make
434  // sure all flow is sent
435  uint64_t Degree = AugmentingEdges[Src].size();
436  // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
437  uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
438  for (auto &Edge : AugmentingEdges[Src]) {
439  uint64_t Dst = Edge->Dst;
440  uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
441  EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
442  Nodes[Dst].IntFlow += EdgeFlow;
443  Nodes[Src].IntFlow -= EdgeFlow;
444  Edge->AugmentedFlow += EdgeFlow;
445  }
446  }
447  assert(Nodes[Target].IntFlow <= MaxFlowAmount);
448  Nodes[Target].IntFlow = 0;
449 
450  // Phase 3: Send excess flow back traversing the nodes backwards.
451  // Because of rounding, not all flow can be sent along the edges of Src.
452  // Hence, sending the remaining flow back to maintain flow conservation
453  for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
454  uint64_t Src = AugmentingOrder[Idx - 1];
455  // Try to send excess flow back along each edge.
456  // Make sure we only send back flow we just augmented (AugmentedFlow).
457  for (auto &Edge : AugmentingEdges[Src]) {
458  uint64_t Dst = Edge->Dst;
459  if (Nodes[Dst].IntFlow == 0)
460  continue;
461  uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
462  Nodes[Dst].IntFlow -= EdgeFlow;
463  Nodes[Src].IntFlow += EdgeFlow;
464  Edge->AugmentedFlow -= EdgeFlow;
465  }
466  }
467 
468  // Phase 4: Update flow values along all edges
469  bool HasSaturatedEdges = false;
470  for (uint64_t Src : AugmentingOrder) {
471  // Verify that we have sent all the excess flow from the node
472  assert(Src == Source || Nodes[Src].IntFlow == 0);
473  for (auto &Edge : AugmentingEdges[Src]) {
474  assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
475  // Update flow values along the edge and its reverse copy
476  auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
477  Edge->Flow += Edge->AugmentedFlow;
478  RevEdge.Flow -= Edge->AugmentedFlow;
479  if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
480  HasSaturatedEdges = true;
481  }
482  }
483 
484  // The augmentation is successful iff at least one edge becomes saturated
485  return HasSaturatedEdges;
486  }
487 
488  /// Identify candidate (shortest) edges for augmentation.
489  void identifyShortestEdges(uint64_t PathCapacity) {
490  assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
491  // To make sure the augmentation DAG contains only edges with large residual
492  // capacity, we prune all edges whose capacity is below a fraction of
493  // the capacity of the augmented path.
494  // (All edges of the path itself are always in the DAG)
495  uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
496 
497  // Decide which edges are on a shortest path from Source to Target
498  for (size_t Src = 0; Src < Nodes.size(); Src++) {
499  // An edge cannot be augmenting if the endpoint has large distance
500  if (Nodes[Src].Distance > Nodes[Target].Distance)
501  continue;
502 
503  for (auto &Edge : Edges[Src]) {
504  uint64_t Dst = Edge.Dst;
505  Edge.OnShortestPath =
506  Src != Target && Dst != Source &&
507  Nodes[Dst].Distance <= Nodes[Target].Distance &&
508  Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
509  Edge.Capacity > Edge.Flow &&
510  uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
511  }
512  }
513  }
514 
515  /// A node in a flow network.
516  struct Node {
517  /// The cost of the cheapest path from the source to the current node.
518  int64_t Distance;
519  /// The node preceding the current one in the path.
520  uint64_t ParentNode;
521  /// The index of the edge between ParentNode and the current node.
522  uint64_t ParentEdgeIndex;
523  /// An indicator of whether the current node is in a queue.
524  bool Taken;
525 
526  /// Data fields utilized in DAG-augmentation:
527  /// Fractional flow.
528  double FracFlow;
529  /// Integral flow.
530  uint64_t IntFlow;
531  /// Discovery time.
532  uint64_t Discovery;
533  /// Finish time.
534  uint64_t Finish;
535  /// NumCalls.
536  uint64_t NumCalls;
537  };
538 
539  /// An edge in a flow network.
540  struct Edge {
541  /// The cost of the edge.
542  int64_t Cost;
543  /// The capacity of the edge.
544  int64_t Capacity;
545  /// The current flow on the edge.
546  int64_t Flow;
547  /// The destination node of the edge.
548  uint64_t Dst;
549  /// The index of the reverse edge between Dst and the current node.
550  uint64_t RevEdgeIndex;
551 
552  /// Data fields utilized in DAG-augmentation:
553  /// Whether the edge is currently on a shortest path from Source to Target.
554  bool OnShortestPath;
555  /// Extra flow along the edge.
556  uint64_t AugmentedFlow;
557  };
558 
559  /// The set of network nodes.
560  std::vector<Node> Nodes;
561  /// The set of network edges.
562  std::vector<std::vector<Edge>> Edges;
563  /// Source node of the flow.
565  /// Target (sink) node of the flow.
567  /// Augmenting edges.
568  std::vector<std::vector<Edge *>> AugmentingEdges;
569 };
570 
571 constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
572 constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
573 
574 /// A post-processing adjustment of control flow. It applies two steps by
575 /// rerouting some flow and making it more realistic:
576 ///
577 /// - First, it removes all isolated components ("islands") with a positive flow
578 /// that are unreachable from the entry block. For every such component, we
579 /// find the shortest from the entry to an exit passing through the component,
580 /// and increase the flow by one unit along the path.
581 ///
582 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
583 /// with no sampled counts. Then it rebalnces the flow that goes through such
584 /// a subgraph so that each branch is taken with probability 50%.
585 /// An unknown subgraph is such that for every two nodes u and v:
586 /// - u dominates v and u is not unknown;
587 /// - v post-dominates u; and
588 /// - all inner-nodes of all (u,v)-paths are unknown.
589 ///
590 class FlowAdjuster {
591 public:
592  FlowAdjuster(FlowFunction &Func) : Func(Func) {
593  assert(Func.Blocks[Func.Entry].isEntry() &&
594  "incorrect index of the entry block");
595  }
596 
597  // Run the post-processing
598  void run() {
599  /// Adjust the flow to get rid of isolated components.
600  joinIsolatedComponents();
601 
602  /// Rebalance the flow inside unknown subgraphs.
603  rebalanceUnknownSubgraphs();
604  }
605 
606 private:
607  void joinIsolatedComponents() {
608  // Find blocks that are reachable from the source
609  auto Visited = BitVector(NumBlocks(), false);
610  findReachable(Func.Entry, Visited);
611 
612  // Iterate over all non-reachable blocks and adjust their weights
613  for (uint64_t I = 0; I < NumBlocks(); I++) {
614  auto &Block = Func.Blocks[I];
615  if (Block.Flow > 0 && !Visited[I]) {
616  // Find a path from the entry to an exit passing through the block I
617  auto Path = findShortestPath(I);
618  // Increase the flow along the path
619  assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
620  "incorrectly computed path adjusting control flow");
621  Func.Blocks[Func.Entry].Flow += 1;
622  for (auto &Jump : Path) {
623  Jump->Flow += 1;
624  Func.Blocks[Jump->Target].Flow += 1;
625  // Update reachability
626  findReachable(Jump->Target, Visited);
627  }
628  }
629  }
630  }
631 
632  /// Run BFS from a given block along the jumps with a positive flow and mark
633  /// all reachable blocks.
634  void findReachable(uint64_t Src, BitVector &Visited) {
635  if (Visited[Src])
636  return;
637  std::queue<uint64_t> Queue;
638  Queue.push(Src);
639  Visited[Src] = true;
640  while (!Queue.empty()) {
641  Src = Queue.front();
642  Queue.pop();
643  for (auto Jump : Func.Blocks[Src].SuccJumps) {
644  uint64_t Dst = Jump->Target;
645  if (Jump->Flow > 0 && !Visited[Dst]) {
646  Queue.push(Dst);
647  Visited[Dst] = true;
648  }
649  }
650  }
651  }
652 
653  /// Find the shortest path from the entry block to an exit block passing
654  /// through a given block.
655  std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
656  // A path from the entry block to BlockIdx
657  auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
658  // A path from BlockIdx to an exit block
659  auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
660 
661  // Concatenate the two paths
662  std::vector<FlowJump *> Result;
663  Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
664  Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
665  return Result;
666  }
667 
668  /// Apply the Dijkstra algorithm to find the shortest path from a given
669  /// Source to a given Target block.
670  /// If Target == -1, then the path ends at an exit block.
671  std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
672  // Quit early, if possible
673  if (Source == Target)
674  return std::vector<FlowJump *>();
675  if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
676  return std::vector<FlowJump *>();
677 
678  // Initialize data structures
679  auto Distance = std::vector<int64_t>(NumBlocks(), INF);
680  auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
681  Distance[Source] = 0;
682  std::set<std::pair<uint64_t, uint64_t>> Queue;
683  Queue.insert(std::make_pair(Distance[Source], Source));
684 
685  // Run the Dijkstra algorithm
686  while (!Queue.empty()) {
687  uint64_t Src = Queue.begin()->second;
688  Queue.erase(Queue.begin());
689  // If we found a solution, quit early
690  if (Src == Target ||
691  (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
692  break;
693 
694  for (auto Jump : Func.Blocks[Src].SuccJumps) {
695  uint64_t Dst = Jump->Target;
696  int64_t JumpDist = jumpDistance(Jump);
697  if (Distance[Dst] > Distance[Src] + JumpDist) {
698  Queue.erase(std::make_pair(Distance[Dst], Dst));
699 
700  Distance[Dst] = Distance[Src] + JumpDist;
701  Parent[Dst] = Jump;
702 
703  Queue.insert(std::make_pair(Distance[Dst], Dst));
704  }
705  }
706  }
707  // If Target is not provided, find the closest exit block
708  if (Target == AnyExitBlock) {
709  for (uint64_t I = 0; I < NumBlocks(); I++) {
710  if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
711  if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
712  Target = I;
713  }
714  }
715  }
716  }
717  assert(Parent[Target] != nullptr && "a path does not exist");
718 
719  // Extract the constructed path
720  std::vector<FlowJump *> Result;
721  uint64_t Now = Target;
722  while (Now != Source) {
723  assert(Now == Parent[Now]->Target && "incorrect parent jump");
724  Result.push_back(Parent[Now]);
725  Now = Parent[Now]->Source;
726  }
727  // Reverse the path, since it is extracted from Target to Source
728  std::reverse(Result.begin(), Result.end());
729  return Result;
730  }
731 
732  /// A distance of a path for a given jump.
733  /// In order to incite the path to use blocks/jumps with large positive flow,
734  /// and avoid changing branch probability of outgoing edges drastically,
735  /// set the jump distance so as:
736  /// - to minimize the number of unlikely jumps used and subject to that,
737  /// - to minimize the number of Flow == 0 jumps used and subject to that,
738  /// - minimizes total multiplicative Flow increase for the remaining edges.
739  /// To capture this objective with integer distances, we round off fractional
740  /// parts to a multiple of 1 / BaseDistance.
741  int64_t jumpDistance(FlowJump *Jump) const {
742  uint64_t BaseDistance =
743  std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance),
744  std::min(Func.Blocks[Func.Entry].Flow,
745  MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
746  if (Jump->IsUnlikely)
747  return MinCostMaxFlow::AuxCostUnlikely;
748  if (Jump->Flow > 0)
749  return BaseDistance + BaseDistance / Jump->Flow;
750  return BaseDistance * NumBlocks();
751  };
752 
753  uint64_t NumBlocks() const { return Func.Blocks.size(); }
754 
755  /// Rebalance unknown subgraphs so that the flow is split evenly across the
756  /// outgoing branches of every block of the subgraph. The method iterates over
757  /// blocks with known weight and identifies unknown subgraphs rooted at the
758  /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
759  void rebalanceUnknownSubgraphs() {
760  // Try to find unknown subgraphs from each block
761  for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
762  auto SrcBlock = &Func.Blocks[I];
763  // Verify if rebalancing rooted at SrcBlock is feasible
764  if (!canRebalanceAtRoot(SrcBlock))
765  continue;
766 
767  // Find an unknown subgraphs starting at SrcBlock. Along the way,
768  // fill in known destinations and intermediate unknown blocks.
769  std::vector<FlowBlock *> UnknownBlocks;
770  std::vector<FlowBlock *> KnownDstBlocks;
771  findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
772 
773  // Verify if rebalancing of the subgraph is feasible. If the search is
774  // successful, find the unique destination block (which can be null)
775  FlowBlock *DstBlock = nullptr;
776  if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
777  DstBlock))
778  continue;
779 
780  // We cannot rebalance subgraphs containing cycles among unknown blocks
781  if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
782  continue;
783 
784  // Rebalance the flow
785  rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
786  }
787  }
788 
789  /// Verify if rebalancing rooted at a given block is possible.
790  bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
791  // Do not attempt to find unknown subgraphs from an unknown or a
792  // zero-flow block
793  if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
794  return false;
795 
796  // Do not attempt to process subgraphs from a block w/o unknown sucessors
797  bool HasUnknownSuccs = false;
798  for (auto Jump : SrcBlock->SuccJumps) {
799  if (Func.Blocks[Jump->Target].UnknownWeight) {
800  HasUnknownSuccs = true;
801  break;
802  }
803  }
804  if (!HasUnknownSuccs)
805  return false;
806 
807  return true;
808  }
809 
810  /// Find an unknown subgraph starting at block SrcBlock. The method sets
811  /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
812  void findUnknownSubgraph(const FlowBlock *SrcBlock,
813  std::vector<FlowBlock *> &KnownDstBlocks,
814  std::vector<FlowBlock *> &UnknownBlocks) {
815  // Run BFS from SrcBlock and make sure all paths are going through unknown
816  // blocks and end at a known DstBlock
817  auto Visited = BitVector(NumBlocks(), false);
818  std::queue<uint64_t> Queue;
819 
820  Queue.push(SrcBlock->Index);
821  Visited[SrcBlock->Index] = true;
822  while (!Queue.empty()) {
823  auto &Block = Func.Blocks[Queue.front()];
824  Queue.pop();
825  // Process blocks reachable from Block
826  for (auto Jump : Block.SuccJumps) {
827  // If Jump can be ignored, skip it
828  if (ignoreJump(SrcBlock, nullptr, Jump))
829  continue;
830 
831  uint64_t Dst = Jump->Target;
832  // If Dst has been visited, skip Jump
833  if (Visited[Dst])
834  continue;
835  // Process block Dst
836  Visited[Dst] = true;
837  if (!Func.Blocks[Dst].UnknownWeight) {
838  KnownDstBlocks.push_back(&Func.Blocks[Dst]);
839  } else {
840  Queue.push(Dst);
841  UnknownBlocks.push_back(&Func.Blocks[Dst]);
842  }
843  }
844  }
845  }
846 
847  /// Verify if rebalancing of the subgraph is feasible. If the checks are
848  /// successful, set the unique destination block, DstBlock (can be null).
849  bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
850  const std::vector<FlowBlock *> &KnownDstBlocks,
851  const std::vector<FlowBlock *> &UnknownBlocks,
852  FlowBlock *&DstBlock) {
853  // If the list of unknown blocks is empty, we don't need rebalancing
854  if (UnknownBlocks.empty())
855  return false;
856 
857  // If there are multiple known sinks, we can't rebalance
858  if (KnownDstBlocks.size() > 1)
859  return false;
860  DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
861 
862  // Verify sinks of the subgraph
863  for (auto Block : UnknownBlocks) {
864  if (Block->SuccJumps.empty()) {
865  // If there are multiple (known and unknown) sinks, we can't rebalance
866  if (DstBlock != nullptr)
867  return false;
868  continue;
869  }
870  size_t NumIgnoredJumps = 0;
871  for (auto Jump : Block->SuccJumps) {
872  if (ignoreJump(SrcBlock, DstBlock, Jump))
873  NumIgnoredJumps++;
874  }
875  // If there is a non-sink block in UnknownBlocks with all jumps ignored,
876  // then we can't rebalance
877  if (NumIgnoredJumps == Block->SuccJumps.size())
878  return false;
879  }
880 
881  return true;
882  }
883 
884  /// Decide whether the Jump is ignored while processing an unknown subgraphs
885  /// rooted at basic block SrcBlock with the destination block, DstBlock.
886  bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
887  const FlowJump *Jump) {
888  // Ignore unlikely jumps with zero flow
889  if (Jump->IsUnlikely && Jump->Flow == 0)
890  return true;
891 
892  auto JumpSource = &Func.Blocks[Jump->Source];
893  auto JumpTarget = &Func.Blocks[Jump->Target];
894 
895  // Do not ignore jumps coming into DstBlock
896  if (DstBlock != nullptr && JumpTarget == DstBlock)
897  return false;
898 
899  // Ignore jumps out of SrcBlock to known blocks
900  if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
901  return true;
902 
903  // Ignore jumps to known blocks with zero flow
904  if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
905  return true;
906 
907  return false;
908  }
909 
910  /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
911  /// UnknownBlocks in the topological order (so that all jumps are "forward").
912  bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
913  std::vector<FlowBlock *> &UnknownBlocks) {
914  // Extract local in-degrees in the considered subgraph
915  auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
916  auto fillInDegree = [&](const FlowBlock *Block) {
917  for (auto Jump : Block->SuccJumps) {
918  if (ignoreJump(SrcBlock, DstBlock, Jump))
919  continue;
920  LocalInDegree[Jump->Target]++;
921  }
922  };
923  fillInDegree(SrcBlock);
924  for (auto Block : UnknownBlocks) {
925  fillInDegree(Block);
926  }
927  // A loop containing SrcBlock
928  if (LocalInDegree[SrcBlock->Index] > 0)
929  return false;
930 
931  std::vector<FlowBlock *> AcyclicOrder;
932  std::queue<uint64_t> Queue;
933  Queue.push(SrcBlock->Index);
934  while (!Queue.empty()) {
935  FlowBlock *Block = &Func.Blocks[Queue.front()];
936  Queue.pop();
937  // Stop propagation once we reach DstBlock, if any
938  if (DstBlock != nullptr && Block == DstBlock)
939  break;
940 
941  // Keep an acyclic order of unknown blocks
942  if (Block->UnknownWeight && Block != SrcBlock)
943  AcyclicOrder.push_back(Block);
944 
945  // Add to the queue all successors with zero local in-degree
946  for (auto Jump : Block->SuccJumps) {
947  if (ignoreJump(SrcBlock, DstBlock, Jump))
948  continue;
949  uint64_t Dst = Jump->Target;
950  LocalInDegree[Dst]--;
951  if (LocalInDegree[Dst] == 0) {
952  Queue.push(Dst);
953  }
954  }
955  }
956 
957  // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
958  // of all blocks
959  if (UnknownBlocks.size() != AcyclicOrder.size())
960  return false;
961  UnknownBlocks = AcyclicOrder;
962  return true;
963  }
964 
965  /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
966  /// having UnknownBlocks intermediate blocks.
967  void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
968  const FlowBlock *DstBlock,
969  const std::vector<FlowBlock *> &UnknownBlocks) {
970  assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
971 
972  // Ditribute flow from the source block
973  uint64_t BlockFlow = 0;
974  // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
975  for (auto Jump : SrcBlock->SuccJumps) {
976  if (ignoreJump(SrcBlock, DstBlock, Jump))
977  continue;
978  BlockFlow += Jump->Flow;
979  }
980  rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
981 
982  // Ditribute flow from the remaining blocks
983  for (auto Block : UnknownBlocks) {
984  assert(Block->UnknownWeight && "incorrect unknown subgraph");
985  uint64_t BlockFlow = 0;
986  // Block's flow is the sum of incoming flows
987  for (auto Jump : Block->PredJumps) {
988  BlockFlow += Jump->Flow;
989  }
990  Block->Flow = BlockFlow;
991  rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
992  }
993  }
994 
995  /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
996  /// and ending at DstBlock.
997  void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
998  const FlowBlock *Block, uint64_t BlockFlow) {
999  // Process all successor jumps and update corresponding flow values
1000  size_t BlockDegree = 0;
1001  for (auto Jump : Block->SuccJumps) {
1002  if (ignoreJump(SrcBlock, DstBlock, Jump))
1003  continue;
1004  BlockDegree++;
1005  }
1006  // If all successor jumps of the block are ignored, skip it
1007  if (DstBlock == nullptr && BlockDegree == 0)
1008  return;
1009  assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1010 
1011  // Each of the Block's successors gets the following amount of flow.
1012  // Rounding the value up so that all flow is propagated
1013  uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1014  for (auto Jump : Block->SuccJumps) {
1015  if (ignoreJump(SrcBlock, DstBlock, Jump))
1016  continue;
1017  uint64_t Flow = std::min(SuccFlow, BlockFlow);
1018  Jump->Flow = Flow;
1019  BlockFlow -= Flow;
1020  }
1021  assert(BlockFlow == 0 && "not all flow is propagated");
1022  }
1023 
1024  /// A constant indicating an arbitrary exit block of a function.
1025  static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1026 
1027  /// The function.
1028  FlowFunction &Func;
1029 };
1030 
1031 /// Initializing flow network for a given function.
1032 ///
1033 /// Every block is split into three nodes that are responsible for (i) an
1034 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
1035 /// reduction of the block weight.
1036 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
1037  uint64_t NumBlocks = Func.Blocks.size();
1038  assert(NumBlocks > 1 && "Too few blocks in a function");
1039  LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
1040 
1041  // Pre-process data: make sure the entry weight is at least 1
1042  if (Func.Blocks[Func.Entry].Weight == 0) {
1043  Func.Blocks[Func.Entry].Weight = 1;
1044  }
1045  // Introducing dummy source/sink pairs to allow flow circulation.
1046  // The nodes corresponding to blocks of Func have indicies in the range
1047  // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
1048  uint64_t S = 3 * NumBlocks;
1049  uint64_t T = S + 1;
1050  uint64_t S1 = S + 2;
1051  uint64_t T1 = S + 3;
1052 
1053  Network.initialize(3 * NumBlocks + 4, S1, T1);
1054 
1055  // Create three nodes for every block of the function
1056  for (uint64_t B = 0; B < NumBlocks; B++) {
1057  auto &Block = Func.Blocks[B];
1058  assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1059  "non-zero weight of a block w/o weight except for an entry");
1060 
1061  // Split every block into two nodes
1062  uint64_t Bin = 3 * B;
1063  uint64_t Bout = 3 * B + 1;
1064  uint64_t Baux = 3 * B + 2;
1065  if (Block.Weight > 0) {
1066  Network.addEdge(S1, Bout, Block.Weight, 0);
1067  Network.addEdge(Bin, T1, Block.Weight, 0);
1068  }
1069 
1070  // Edges from S and to T
1071  assert((!Block.isEntry() || !Block.isExit()) &&
1072  "a block cannot be an entry and an exit");
1073  if (Block.isEntry()) {
1074  Network.addEdge(S, Bin, 0);
1075  } else if (Block.isExit()) {
1076  Network.addEdge(Bout, T, 0);
1077  }
1078 
1079  // An auxiliary node to allow increase/reduction of block counts:
1080  // We assume that decreasing block counts is more expensive than increasing,
1081  // and thus, setting separate costs here. In the future we may want to tune
1082  // the relative costs so as to maximize the quality of generated profiles.
1083  int64_t AuxCostInc = SampleProfileProfiCostInc;
1084  int64_t AuxCostDec = SampleProfileProfiCostDec;
1085  if (Block.UnknownWeight) {
1086  // Do not penalize changing weights of blocks w/o known profile count
1087  AuxCostInc = 0;
1088  AuxCostDec = 0;
1089  } else {
1090  // Increasing the count for "cold" blocks with zero initial count is more
1091  // expensive than for "hot" ones
1092  if (Block.Weight == 0) {
1093  AuxCostInc = SampleProfileProfiCostIncZero;
1094  }
1095  // Modifying the count of the entry block is expensive
1096  if (Block.isEntry()) {
1097  AuxCostInc = SampleProfileProfiCostIncEntry;
1098  AuxCostDec = SampleProfileProfiCostDecEntry;
1099  }
1100  }
1101  // For blocks with self-edges, do not penalize a reduction of the count,
1102  // as all of the increase can be attributed to the self-edge
1103  if (Block.HasSelfEdge) {
1104  AuxCostDec = 0;
1105  }
1106 
1107  Network.addEdge(Bin, Baux, AuxCostInc);
1108  Network.addEdge(Baux, Bout, AuxCostInc);
1109  if (Block.Weight > 0) {
1110  Network.addEdge(Bout, Baux, AuxCostDec);
1111  Network.addEdge(Baux, Bin, AuxCostDec);
1112  }
1113  }
1114 
1115  // Creating edges for every jump
1116  for (auto &Jump : Func.Jumps) {
1117  uint64_t Src = Jump.Source;
1118  uint64_t Dst = Jump.Target;
1119  if (Src != Dst) {
1120  uint64_t SrcOut = 3 * Src + 1;
1121  uint64_t DstIn = 3 * Dst;
1122  uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
1123  Network.addEdge(SrcOut, DstIn, Cost);
1124  }
1125  }
1126 
1127  // Make sure we have a valid flow circulation
1128  Network.addEdge(T, S, 0);
1129 }
1130 
1131 /// Extract resulting block and edge counts from the flow network.
1132 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
1133  uint64_t NumBlocks = Func.Blocks.size();
1134 
1135  // Extract resulting block counts
1136  for (uint64_t Src = 0; Src < NumBlocks; Src++) {
1137  auto &Block = Func.Blocks[Src];
1138  uint64_t SrcOut = 3 * Src + 1;
1139  int64_t Flow = 0;
1140  for (auto &Adj : Network.getFlow(SrcOut)) {
1141  uint64_t DstIn = Adj.first;
1142  int64_t DstFlow = Adj.second;
1143  bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
1144  if (!IsAuxNode || Block.HasSelfEdge) {
1145  Flow += DstFlow;
1146  }
1147  }
1148  Block.Flow = Flow;
1149  assert(Flow >= 0 && "negative block flow");
1150  }
1151 
1152  // Extract resulting jump counts
1153  for (auto &Jump : Func.Jumps) {
1154  uint64_t Src = Jump.Source;
1155  uint64_t Dst = Jump.Target;
1156  int64_t Flow = 0;
1157  if (Src != Dst) {
1158  uint64_t SrcOut = 3 * Src + 1;
1159  uint64_t DstIn = 3 * Dst;
1160  Flow = Network.getFlow(SrcOut, DstIn);
1161  } else {
1162  uint64_t SrcOut = 3 * Src + 1;
1163  uint64_t SrcAux = 3 * Src + 2;
1164  int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
1165  if (AuxFlow > 0)
1166  Flow = AuxFlow;
1167  }
1168  Jump.Flow = Flow;
1169  assert(Flow >= 0 && "negative jump flow");
1170  }
1171 }
1172 
1173 #ifndef NDEBUG
1174 /// Verify that the computed flow values satisfy flow conservation rules
1175 void verifyWeights(const FlowFunction &Func) {
1176  const uint64_t NumBlocks = Func.Blocks.size();
1177  auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1178  auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1179  for (auto &Jump : Func.Jumps) {
1180  InFlow[Jump.Target] += Jump.Flow;
1181  OutFlow[Jump.Source] += Jump.Flow;
1182  }
1183 
1184  uint64_t TotalInFlow = 0;
1185  uint64_t TotalOutFlow = 0;
1186  for (uint64_t I = 0; I < NumBlocks; I++) {
1187  auto &Block = Func.Blocks[I];
1188  if (Block.isEntry()) {
1189  TotalInFlow += Block.Flow;
1190  assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1191  } else if (Block.isExit()) {
1192  TotalOutFlow += Block.Flow;
1193  assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1194  } else {
1195  assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1196  assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1197  }
1198  }
1199  assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1200 
1201  // Verify that there are no isolated flow components
1202  // One could modify FlowFunction to hold edges indexed by the sources, which
1203  // will avoid a creation of the object
1204  auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1205  for (auto &Jump : Func.Jumps) {
1206  if (Jump.Flow > 0) {
1207  PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1208  }
1209  }
1210 
1211  // Run BFS from the source along edges with positive flow
1212  std::queue<uint64_t> Queue;
1213  auto Visited = BitVector(NumBlocks, false);
1214  Queue.push(Func.Entry);
1215  Visited[Func.Entry] = true;
1216  while (!Queue.empty()) {
1217  uint64_t Src = Queue.front();
1218  Queue.pop();
1219  for (uint64_t Dst : PositiveFlowEdges[Src]) {
1220  if (!Visited[Dst]) {
1221  Queue.push(Dst);
1222  Visited[Dst] = true;
1223  }
1224  }
1225  }
1226 
1227  // Verify that every block that has a positive flow is reached from the source
1228  // along edges with a positive flow
1229  for (uint64_t I = 0; I < NumBlocks; I++) {
1230  auto &Block = Func.Blocks[I];
1231  assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1232  }
1233 }
1234 #endif
1235 
1236 } // end of anonymous namespace
1237 
1238 /// Apply the profile inference algorithm for a given flow function
1240  // Create and apply an inference network model
1241  auto InferenceNetwork = MinCostMaxFlow();
1242  initializeNetwork(InferenceNetwork, Func);
1243  InferenceNetwork.run();
1244 
1245  // Extract flow values for every block and every edge
1246  extractWeights(InferenceNetwork, Func);
1247 
1248  // Post-processing adjustments to the flow
1249  auto Adjuster = FlowAdjuster(Func);
1250  Adjuster.run();
1251 
1252 #ifndef NDEBUG
1253  // Verify the result
1254  verifyWeights(Func);
1255 #endif
1256 }
llvm::applyFlowInference
void applyFlowInference(FlowFunction &Func)
Apply the profile inference algorithm for a given flow function.
Definition: SampleProfileInference.cpp:1239
llvm::BitVector::push_back
void push_back(bool Val)
Definition: BitVector.h:459
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
T
addEdge
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
Definition: LazyCallGraph.cpp:65
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:145
double
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in and only one load from a constant double
Definition: README-SSE.txt:85
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
T1
#define T1
Definition: Mips16ISelLowering.cpp:340
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
SampleProfileInference.h
Node::Node
Node(Kind K_, Prec Precedence_=Prec::Primary, Cache RHSComponentCache_=Cache::No, Cache ArrayCache_=Cache::No, Cache FunctionCache_=Cache::No)
Definition: ItaniumDemangle.h:211
initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:116
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::SubDirectoryType::Bin
@ Bin
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::FlowJump::Source
uint64_t Source
Definition: SampleProfileInference.h:66
llvm::misexpect::extractWeights
Optional< SmallVector< uint32_t, 4 > > extractWeights(Instruction *I, LLVMContext &Ctx)
Definition: MisExpect.cpp:122
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::FlowBlock
A wrapper of a binary basic block.
Definition: SampleProfileInference.h:48
BitVector.h
llvm::BitVector
Definition: BitVector.h:75
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::cl::opt< bool >
llvm::FlowBlock::Flow
uint64_t Flow
Definition: SampleProfileInference.h:52
llvm::FlowJump::Flow
uint64_t Flow
Definition: SampleProfileInference.h:68
llvm::FlowJump::IsUnlikely
bool IsUnlikely
Definition: SampleProfileInference.h:69
uint64_t
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1598
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::FlowJump::Target
uint64_t Target
Definition: SampleProfileInference.h:67
llvm::FlowBlock::Index
uint64_t Index
Definition: SampleProfileInference.h:49
llvm::FlowBlock::UnknownWeight
bool UnknownWeight
Definition: SampleProfileInference.h:51
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
llvm::FlowFunction
A wrapper of binary function with basic blocks and jumps.
Definition: SampleProfileInference.h:73
Flow
Annotate SI Control Flow
Definition: SIAnnotateControlFlow.cpp:118
llvm::FlowBlock::SuccJumps
std::vector< FlowJump * > SuccJumps
Definition: SampleProfileInference.h:54
llvm::cl::desc
Definition: CommandLine.h:405
Debug.h
INF
#define INF
llvm::FlowJump
A wrapper of a jump between two basic blocks.
Definition: SampleProfileInference.h:65