LLVM 17.0.0git
SampleProfileInference.cpp
Go to the documentation of this file.
1//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a profile inference algorithm. Given an incomplete and
10// possibly imprecise block counts, the algorithm reconstructs realistic block
11// and edge counts that satisfy flow conservation rules, while minimally modify
12// input block counts.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/BitVector.h"
19#include "llvm/Support/Debug.h"
20#include <queue>
21#include <set>
22#include <stack>
23
24using namespace llvm;
25#define DEBUG_TYPE "sample-profile-inference"
26
27namespace {
28
29static cl::opt<bool> SampleProfileEvenFlowDistribution(
30 "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
31 cl::desc("Try to evenly distribute flow when there are multiple equally "
32 "likely options."));
33
34static cl::opt<bool> SampleProfileRebalanceUnknown(
35 "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
36 cl::desc("Evenly re-distribute flow among unknown subgraphs."));
37
38static cl::opt<bool> SampleProfileJoinIslands(
39 "sample-profile-join-islands", cl::init(true), cl::Hidden,
40 cl::desc("Join isolated components having positive flow."));
41
42static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
43 "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
44 cl::desc("The cost of increasing a block's count by one."));
45
46static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
47 "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
48 cl::desc("The cost of decreasing a block's count by one."));
49
50static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
51 "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
52 cl::desc("The cost of increasing the entry block's count by one."));
53
54static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
55 "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
56 cl::desc("The cost of decreasing the entry block's count by one."));
57
58static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
59 "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
60 cl::desc("The cost of increasing a count of zero-weight block by one."));
61
62static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
63 "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
64 cl::desc("The cost of increasing an unknown block's count by one."));
65
66/// A value indicating an infinite flow/capacity/weight of a block/edge.
67/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
68/// during the execution.
69static constexpr int64_t INF = ((int64_t)1) << 50;
70
71/// The minimum-cost maximum flow algorithm.
72///
73/// The algorithm finds the maximum flow of minimum cost on a given (directed)
74/// network using a modified version of the classical Moore-Bellman-Ford
75/// approach. The algorithm applies a number of augmentation iterations in which
76/// flow is sent along paths of positive capacity from the source to the sink.
77/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
78/// where m is the number of edges, n is the number of vertices, and v(f) is the
79/// value of the maximum flow. However, the observed running time on typical
80/// instances is sub-quadratic, that is, o(n^2).
81///
82/// The input is a set of edges with specified costs and capacities, and a pair
83/// of nodes (source and sink). The output is the flow along each edge of the
84/// minimum total cost respecting the given edge capacities.
85class MinCostMaxFlow {
86public:
87 MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
88
89 // Initialize algorithm's data structures for a network of a given size.
90 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
91 Source = SourceNode;
92 Target = SinkNode;
93
94 Nodes = std::vector<Node>(NodeCount);
95 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
96 if (Params.EvenFlowDistribution)
97 AugmentingEdges =
98 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
99 }
100
101 // Run the algorithm.
102 int64_t run() {
103 LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
104
105 // Iteratively find an augmentation path/dag in the network and send the
106 // flow along its edges
107 size_t AugmentationIters = applyFlowAugmentation();
108
109 // Compute the total flow and its cost
110 int64_t TotalCost = 0;
111 int64_t TotalFlow = 0;
112 for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
113 for (auto &Edge : Edges[Src]) {
114 if (Edge.Flow > 0) {
115 TotalCost += Edge.Cost * Edge.Flow;
116 if (Src == Source)
117 TotalFlow += Edge.Flow;
118 }
119 }
120 }
121 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
122 << " iterations with " << TotalFlow << " total flow"
123 << " of " << TotalCost << " cost\n");
124 (void)TotalFlow;
125 (void)AugmentationIters;
126 return TotalCost;
127 }
128
129 /// Adding an edge to the network with a specified capacity and a cost.
130 /// Multiple edges between a pair of nodes are allowed but self-edges
131 /// are not supported.
132 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
133 assert(Capacity > 0 && "adding an edge of zero capacity");
134 assert(Src != Dst && "loop edge are not supported");
135
136 Edge SrcEdge;
137 SrcEdge.Dst = Dst;
138 SrcEdge.Cost = Cost;
139 SrcEdge.Capacity = Capacity;
140 SrcEdge.Flow = 0;
141 SrcEdge.RevEdgeIndex = Edges[Dst].size();
142
143 Edge DstEdge;
144 DstEdge.Dst = Src;
145 DstEdge.Cost = -Cost;
146 DstEdge.Capacity = 0;
147 DstEdge.Flow = 0;
148 DstEdge.RevEdgeIndex = Edges[Src].size();
149
150 Edges[Src].push_back(SrcEdge);
151 Edges[Dst].push_back(DstEdge);
152 }
153
154 /// Adding an edge to the network of infinite capacity and a given cost.
155 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
156 addEdge(Src, Dst, INF, Cost);
157 }
158
159 /// Get the total flow from a given source node.
160 /// Returns a list of pairs (target node, amount of flow to the target).
161 const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
162 std::vector<std::pair<uint64_t, int64_t>> Flow;
163 for (const auto &Edge : Edges[Src]) {
164 if (Edge.Flow > 0)
165 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
166 }
167 return Flow;
168 }
169
170 /// Get the total flow between a pair of nodes.
171 int64_t getFlow(uint64_t Src, uint64_t Dst) const {
172 int64_t Flow = 0;
173 for (const auto &Edge : Edges[Src]) {
174 if (Edge.Dst == Dst) {
175 Flow += Edge.Flow;
176 }
177 }
178 return Flow;
179 }
180
181private:
182 /// Iteratively find an augmentation path/dag in the network and send the
183 /// flow along its edges. The method returns the number of applied iterations.
184 size_t applyFlowAugmentation() {
185 size_t AugmentationIters = 0;
186 while (findAugmentingPath()) {
187 uint64_t PathCapacity = computeAugmentingPathCapacity();
188 while (PathCapacity > 0) {
189 bool Progress = false;
190 if (Params.EvenFlowDistribution) {
191 // Identify node/edge candidates for augmentation
192 identifyShortestEdges(PathCapacity);
193
194 // Find an augmenting DAG
195 auto AugmentingOrder = findAugmentingDAG();
196
197 // Apply the DAG augmentation
198 Progress = augmentFlowAlongDAG(AugmentingOrder);
199 PathCapacity = computeAugmentingPathCapacity();
200 }
201
202 if (!Progress) {
203 augmentFlowAlongPath(PathCapacity);
204 PathCapacity = 0;
205 }
206
207 AugmentationIters++;
208 }
209 }
210 return AugmentationIters;
211 }
212
213 /// Compute the capacity of the cannonical augmenting path. If the path is
214 /// saturated (that is, no flow can be sent along the path), then return 0.
215 uint64_t computeAugmentingPathCapacity() {
216 uint64_t PathCapacity = INF;
217 uint64_t Now = Target;
218 while (Now != Source) {
219 uint64_t Pred = Nodes[Now].ParentNode;
220 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
221
222 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
223 uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
224 PathCapacity = std::min(PathCapacity, EdgeCapacity);
225
226 Now = Pred;
227 }
228 return PathCapacity;
229 }
230
231 /// Check for existence of an augmenting path with a positive capacity.
232 bool findAugmentingPath() {
233 // Initialize data structures
234 for (auto &Node : Nodes) {
235 Node.Distance = INF;
236 Node.ParentNode = uint64_t(-1);
237 Node.ParentEdgeIndex = uint64_t(-1);
238 Node.Taken = false;
239 }
240
241 std::queue<uint64_t> Queue;
242 Queue.push(Source);
243 Nodes[Source].Distance = 0;
244 Nodes[Source].Taken = true;
245 while (!Queue.empty()) {
246 uint64_t Src = Queue.front();
247 Queue.pop();
248 Nodes[Src].Taken = false;
249 // Although the residual network contains edges with negative costs
250 // (in particular, backward edges), it can be shown that there are no
251 // negative-weight cycles and the following two invariants are maintained:
252 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
253 // where Dist is the length of the shortest path between two nodes. This
254 // allows to prune the search-space of the path-finding algorithm using
255 // the following early-stop criteria:
256 // -- If we find a path with zero-distance from Source to Target, stop the
257 // search, as the path is the shortest since Dist[Source, Target] >= 0;
258 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
259 // process node V, as it is guaranteed _not_ to be on a shortest path
260 // from Source to Target; it follows from inequalities
261 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
262 // >= Dist[Source, V]
263 if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
264 break;
265 if (Nodes[Src].Distance > Nodes[Target].Distance)
266 continue;
267
268 // Process adjacent edges
269 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
270 auto &Edge = Edges[Src][EdgeIdx];
271 if (Edge.Flow < Edge.Capacity) {
272 uint64_t Dst = Edge.Dst;
273 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
274 if (Nodes[Dst].Distance > NewDistance) {
275 // Update the distance and the parent node/edge
276 Nodes[Dst].Distance = NewDistance;
277 Nodes[Dst].ParentNode = Src;
278 Nodes[Dst].ParentEdgeIndex = EdgeIdx;
279 // Add the node to the queue, if it is not there yet
280 if (!Nodes[Dst].Taken) {
281 Queue.push(Dst);
282 Nodes[Dst].Taken = true;
283 }
284 }
285 }
286 }
287 }
288
289 return Nodes[Target].Distance != INF;
290 }
291
292 /// Update the current flow along the augmenting path.
293 void augmentFlowAlongPath(uint64_t PathCapacity) {
294 assert(PathCapacity > 0 && "found an incorrect augmenting path");
295 uint64_t Now = Target;
296 while (Now != Source) {
297 uint64_t Pred = Nodes[Now].ParentNode;
298 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
299 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
300
301 Edge.Flow += PathCapacity;
302 RevEdge.Flow -= PathCapacity;
303
304 Now = Pred;
305 }
306 }
307
308 /// Find an Augmenting DAG order using a modified version of DFS in which we
309 /// can visit a node multiple times. In the DFS search, when scanning each
310 /// edge out of a node, continue search at Edge.Dst endpoint if it has not
311 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
312 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
313 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
314 /// that starts with Source and ends with Target.
315 std::vector<uint64_t> findAugmentingDAG() {
316 // We use a stack based implemenation of DFS to avoid recursion.
317 // Defining DFS data structures:
318 // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
319 // - we are currently visiting Nodes[NodeIdx] and
320 // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
321 typedef std::pair<uint64_t, uint64_t> StackItemType;
322 std::stack<StackItemType> Stack;
323 std::vector<uint64_t> AugmentingOrder;
324
325 // Phase 0: Initialize Node attributes and Time for DFS run
326 for (auto &Node : Nodes) {
327 Node.Discovery = 0;
328 Node.Finish = 0;
329 Node.NumCalls = 0;
330 Node.Taken = false;
331 }
332 uint64_t Time = 0;
333 // Mark Target as Taken
334 // Taken attribute will be propagated backwards from Target towards Source
335 Nodes[Target].Taken = true;
336
337 // Phase 1: Start DFS traversal from Source
338 Stack.emplace(Source, 0);
339 Nodes[Source].Discovery = ++Time;
340 while (!Stack.empty()) {
341 auto NodeIdx = Stack.top().first;
342 auto EdgeIdx = Stack.top().second;
343
344 // If we haven't scanned all edges out of NodeIdx, continue scanning
345 if (EdgeIdx < Edges[NodeIdx].size()) {
346 auto &Edge = Edges[NodeIdx][EdgeIdx];
347 auto &Dst = Nodes[Edge.Dst];
348 Stack.top().second++;
349
350 if (Edge.OnShortestPath) {
351 // If we haven't seen Edge.Dst so far, continue DFS search there
352 if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
353 Dst.Discovery = ++Time;
354 Stack.emplace(Edge.Dst, 0);
355 Dst.NumCalls++;
356 } else if (Dst.Taken && Dst.Finish != 0) {
357 // Else, if Edge.Dst already have a path to Target, so that NodeIdx
358 Nodes[NodeIdx].Taken = true;
359 }
360 }
361 } else {
362 // If we are done scanning all edge out of NodeIdx
363 Stack.pop();
364 // If we haven't found a path from NodeIdx to Target, forget about it
365 if (!Nodes[NodeIdx].Taken) {
366 Nodes[NodeIdx].Discovery = 0;
367 } else {
368 // If we have found a path from NodeIdx to Target, then finish NodeIdx
369 // and propagate Taken flag to DFS parent unless at the Source
370 Nodes[NodeIdx].Finish = ++Time;
371 // NodeIdx == Source if and only if the stack is empty
372 if (NodeIdx != Source) {
373 assert(!Stack.empty() && "empty stack while running dfs");
374 Nodes[Stack.top().first].Taken = true;
375 }
376 AugmentingOrder.push_back(NodeIdx);
377 }
378 }
379 }
380 // Nodes are collected decreasing Finish time, so the order is reversed
381 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
382
383 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
384 for (size_t Src : AugmentingOrder) {
385 AugmentingEdges[Src].clear();
386 for (auto &Edge : Edges[Src]) {
387 uint64_t Dst = Edge.Dst;
388 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
389 Nodes[Dst].Finish < Nodes[Src].Finish) {
390 AugmentingEdges[Src].push_back(&Edge);
391 }
392 }
393 assert((Src == Target || !AugmentingEdges[Src].empty()) &&
394 "incorrectly constructed augmenting edges");
395 }
396
397 return AugmentingOrder;
398 }
399
400 /// Update the current flow along the given (acyclic) subgraph specified by
401 /// the vertex order, AugmentingOrder. The objective is to send as much flow
402 /// as possible while evenly distributing flow among successors of each node.
403 /// After the update at least one edge is saturated.
404 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
405 // Phase 0: Initialization
406 for (uint64_t Src : AugmentingOrder) {
407 Nodes[Src].FracFlow = 0;
408 Nodes[Src].IntFlow = 0;
409 for (auto &Edge : AugmentingEdges[Src]) {
410 Edge->AugmentedFlow = 0;
411 }
412 }
413
414 // Phase 1: Send a unit of fractional flow along the DAG
415 uint64_t MaxFlowAmount = INF;
416 Nodes[Source].FracFlow = 1.0;
417 for (uint64_t Src : AugmentingOrder) {
418 assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
419 "incorrectly computed fractional flow");
420 // Distribute flow evenly among successors of Src
421 uint64_t Degree = AugmentingEdges[Src].size();
422 for (auto &Edge : AugmentingEdges[Src]) {
423 double EdgeFlow = Nodes[Src].FracFlow / Degree;
424 Nodes[Edge->Dst].FracFlow += EdgeFlow;
425 if (Edge->Capacity == INF)
426 continue;
427 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
428 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
429 }
430 }
431 // Stop early if we cannot send any (integral) flow from Source to Target
432 if (MaxFlowAmount == 0)
433 return false;
434
435 // Phase 2: Send an integral flow of MaxFlowAmount
436 Nodes[Source].IntFlow = MaxFlowAmount;
437 for (uint64_t Src : AugmentingOrder) {
438 if (Src == Target)
439 break;
440 // Distribute flow evenly among successors of Src, rounding up to make
441 // sure all flow is sent
442 uint64_t Degree = AugmentingEdges[Src].size();
443 // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
444 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
445 for (auto &Edge : AugmentingEdges[Src]) {
446 uint64_t Dst = Edge->Dst;
447 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
448 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
449 Nodes[Dst].IntFlow += EdgeFlow;
450 Nodes[Src].IntFlow -= EdgeFlow;
451 Edge->AugmentedFlow += EdgeFlow;
452 }
453 }
454 assert(Nodes[Target].IntFlow <= MaxFlowAmount);
455 Nodes[Target].IntFlow = 0;
456
457 // Phase 3: Send excess flow back traversing the nodes backwards.
458 // Because of rounding, not all flow can be sent along the edges of Src.
459 // Hence, sending the remaining flow back to maintain flow conservation
460 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
461 uint64_t Src = AugmentingOrder[Idx - 1];
462 // Try to send excess flow back along each edge.
463 // Make sure we only send back flow we just augmented (AugmentedFlow).
464 for (auto &Edge : AugmentingEdges[Src]) {
465 uint64_t Dst = Edge->Dst;
466 if (Nodes[Dst].IntFlow == 0)
467 continue;
468 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
469 Nodes[Dst].IntFlow -= EdgeFlow;
470 Nodes[Src].IntFlow += EdgeFlow;
471 Edge->AugmentedFlow -= EdgeFlow;
472 }
473 }
474
475 // Phase 4: Update flow values along all edges
476 bool HasSaturatedEdges = false;
477 for (uint64_t Src : AugmentingOrder) {
478 // Verify that we have sent all the excess flow from the node
479 assert(Src == Source || Nodes[Src].IntFlow == 0);
480 for (auto &Edge : AugmentingEdges[Src]) {
481 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
482 // Update flow values along the edge and its reverse copy
483 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
484 Edge->Flow += Edge->AugmentedFlow;
485 RevEdge.Flow -= Edge->AugmentedFlow;
486 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
487 HasSaturatedEdges = true;
488 }
489 }
490
491 // The augmentation is successful iff at least one edge becomes saturated
492 return HasSaturatedEdges;
493 }
494
495 /// Identify candidate (shortest) edges for augmentation.
496 void identifyShortestEdges(uint64_t PathCapacity) {
497 assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
498 // To make sure the augmentation DAG contains only edges with large residual
499 // capacity, we prune all edges whose capacity is below a fraction of
500 // the capacity of the augmented path.
501 // (All edges of the path itself are always in the DAG)
502 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
503
504 // Decide which edges are on a shortest path from Source to Target
505 for (size_t Src = 0; Src < Nodes.size(); Src++) {
506 // An edge cannot be augmenting if the endpoint has large distance
507 if (Nodes[Src].Distance > Nodes[Target].Distance)
508 continue;
509
510 for (auto &Edge : Edges[Src]) {
511 uint64_t Dst = Edge.Dst;
512 Edge.OnShortestPath =
513 Src != Target && Dst != Source &&
514 Nodes[Dst].Distance <= Nodes[Target].Distance &&
515 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
516 Edge.Capacity > Edge.Flow &&
517 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
518 }
519 }
520 }
521
522 /// Maximum number of DFS iterations for DAG finding.
523 static constexpr uint64_t MaxDfsCalls = 10;
524
525 /// A node in a flow network.
526 struct Node {
527 /// The cost of the cheapest path from the source to the current node.
528 int64_t Distance;
529 /// The node preceding the current one in the path.
530 uint64_t ParentNode;
531 /// The index of the edge between ParentNode and the current node.
532 uint64_t ParentEdgeIndex;
533 /// An indicator of whether the current node is in a queue.
534 bool Taken;
535
536 /// Data fields utilized in DAG-augmentation:
537 /// Fractional flow.
538 double FracFlow;
539 /// Integral flow.
540 uint64_t IntFlow;
541 /// Discovery time.
542 uint64_t Discovery;
543 /// Finish time.
544 uint64_t Finish;
545 /// NumCalls.
546 uint64_t NumCalls;
547 };
548
549 /// An edge in a flow network.
550 struct Edge {
551 /// The cost of the edge.
552 int64_t Cost;
553 /// The capacity of the edge.
554 int64_t Capacity;
555 /// The current flow on the edge.
556 int64_t Flow;
557 /// The destination node of the edge.
558 uint64_t Dst;
559 /// The index of the reverse edge between Dst and the current node.
560 uint64_t RevEdgeIndex;
561
562 /// Data fields utilized in DAG-augmentation:
563 /// Whether the edge is currently on a shortest path from Source to Target.
564 bool OnShortestPath;
565 /// Extra flow along the edge.
566 uint64_t AugmentedFlow;
567 };
568
569 /// The set of network nodes.
570 std::vector<Node> Nodes;
571 /// The set of network edges.
572 std::vector<std::vector<Edge>> Edges;
573 /// Source node of the flow.
574 uint64_t Source;
575 /// Target (sink) node of the flow.
577 /// Augmenting edges.
578 std::vector<std::vector<Edge *>> AugmentingEdges;
579 /// Params for flow computation.
580 const ProfiParams &Params;
581};
582
583/// A post-processing adjustment of the control flow. It applies two steps by
584/// rerouting some flow and making it more realistic:
585///
586/// - First, it removes all isolated components ("islands") with a positive flow
587/// that are unreachable from the entry block. For every such component, we
588/// find the shortest from the entry to an exit passing through the component,
589/// and increase the flow by one unit along the path.
590///
591/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
592/// with no sampled counts. Then it rebalnces the flow that goes through such
593/// a subgraph so that each branch is taken with probability 50%.
594/// An unknown subgraph is such that for every two nodes u and v:
595/// - u dominates v and u is not unknown;
596/// - v post-dominates u; and
597/// - all inner-nodes of all (u,v)-paths are unknown.
598///
599class FlowAdjuster {
600public:
601 FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
602 : Params(Params), Func(Func) {}
603
604 /// Apply the post-processing.
605 void run() {
606 if (Params.JoinIslands) {
607 // Adjust the flow to get rid of isolated components
608 joinIsolatedComponents();
609 }
610
611 if (Params.RebalanceUnknown) {
612 // Rebalance the flow inside unknown subgraphs
613 rebalanceUnknownSubgraphs();
614 }
615 }
616
617private:
618 void joinIsolatedComponents() {
619 // Find blocks that are reachable from the source
620 auto Visited = BitVector(NumBlocks(), false);
621 findReachable(Func.Entry, Visited);
622
623 // Iterate over all non-reachable blocks and adjust their weights
624 for (uint64_t I = 0; I < NumBlocks(); I++) {
625 auto &Block = Func.Blocks[I];
626 if (Block.Flow > 0 && !Visited[I]) {
627 // Find a path from the entry to an exit passing through the block I
628 auto Path = findShortestPath(I);
629 // Increase the flow along the path
630 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
631 "incorrectly computed path adjusting control flow");
632 Func.Blocks[Func.Entry].Flow += 1;
633 for (auto &Jump : Path) {
634 Jump->Flow += 1;
635 Func.Blocks[Jump->Target].Flow += 1;
636 // Update reachability
637 findReachable(Jump->Target, Visited);
638 }
639 }
640 }
641 }
642
643 /// Run BFS from a given block along the jumps with a positive flow and mark
644 /// all reachable blocks.
645 void findReachable(uint64_t Src, BitVector &Visited) {
646 if (Visited[Src])
647 return;
648 std::queue<uint64_t> Queue;
649 Queue.push(Src);
650 Visited[Src] = true;
651 while (!Queue.empty()) {
652 Src = Queue.front();
653 Queue.pop();
654 for (auto *Jump : Func.Blocks[Src].SuccJumps) {
655 uint64_t Dst = Jump->Target;
656 if (Jump->Flow > 0 && !Visited[Dst]) {
657 Queue.push(Dst);
658 Visited[Dst] = true;
659 }
660 }
661 }
662 }
663
664 /// Find the shortest path from the entry block to an exit block passing
665 /// through a given block.
666 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
667 // A path from the entry block to BlockIdx
668 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
669 // A path from BlockIdx to an exit block
670 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
671
672 // Concatenate the two paths
673 std::vector<FlowJump *> Result;
674 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
675 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
676 return Result;
677 }
678
679 /// Apply the Dijkstra algorithm to find the shortest path from a given
680 /// Source to a given Target block.
681 /// If Target == -1, then the path ends at an exit block.
682 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
683 // Quit early, if possible
684 if (Source == Target)
685 return std::vector<FlowJump *>();
686 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
687 return std::vector<FlowJump *>();
688
689 // Initialize data structures
690 auto Distance = std::vector<int64_t>(NumBlocks(), INF);
691 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
692 Distance[Source] = 0;
693 std::set<std::pair<uint64_t, uint64_t>> Queue;
694 Queue.insert(std::make_pair(Distance[Source], Source));
695
696 // Run the Dijkstra algorithm
697 while (!Queue.empty()) {
698 uint64_t Src = Queue.begin()->second;
699 Queue.erase(Queue.begin());
700 // If we found a solution, quit early
701 if (Src == Target ||
702 (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
703 break;
704
705 for (auto *Jump : Func.Blocks[Src].SuccJumps) {
706 uint64_t Dst = Jump->Target;
707 int64_t JumpDist = jumpDistance(Jump);
708 if (Distance[Dst] > Distance[Src] + JumpDist) {
709 Queue.erase(std::make_pair(Distance[Dst], Dst));
710
711 Distance[Dst] = Distance[Src] + JumpDist;
712 Parent[Dst] = Jump;
713
714 Queue.insert(std::make_pair(Distance[Dst], Dst));
715 }
716 }
717 }
718 // If Target is not provided, find the closest exit block
719 if (Target == AnyExitBlock) {
720 for (uint64_t I = 0; I < NumBlocks(); I++) {
721 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
722 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
723 Target = I;
724 }
725 }
726 }
727 }
728 assert(Parent[Target] != nullptr && "a path does not exist");
729
730 // Extract the constructed path
731 std::vector<FlowJump *> Result;
732 uint64_t Now = Target;
733 while (Now != Source) {
734 assert(Now == Parent[Now]->Target && "incorrect parent jump");
735 Result.push_back(Parent[Now]);
736 Now = Parent[Now]->Source;
737 }
738 // Reverse the path, since it is extracted from Target to Source
739 std::reverse(Result.begin(), Result.end());
740 return Result;
741 }
742
743 /// A distance of a path for a given jump.
744 /// In order to incite the path to use blocks/jumps with large positive flow,
745 /// and avoid changing branch probability of outgoing edges drastically,
746 /// set the jump distance so as:
747 /// - to minimize the number of unlikely jumps used and subject to that,
748 /// - to minimize the number of Flow == 0 jumps used and subject to that,
749 /// - minimizes total multiplicative Flow increase for the remaining edges.
750 /// To capture this objective with integer distances, we round off fractional
751 /// parts to a multiple of 1 / BaseDistance.
752 int64_t jumpDistance(FlowJump *Jump) const {
753 if (Jump->IsUnlikely)
754 return Params.CostUnlikely;
755 uint64_t BaseDistance =
756 std::max(FlowAdjuster::MinBaseDistance,
757 std::min(Func.Blocks[Func.Entry].Flow,
758 Params.CostUnlikely / (2 * (NumBlocks() + 1))));
759 if (Jump->Flow > 0)
760 return BaseDistance + BaseDistance / Jump->Flow;
761 return 2 * BaseDistance * (NumBlocks() + 1);
762 };
763
764 uint64_t NumBlocks() const { return Func.Blocks.size(); }
765
766 /// Rebalance unknown subgraphs so that the flow is split evenly across the
767 /// outgoing branches of every block of the subgraph. The method iterates over
768 /// blocks with known weight and identifies unknown subgraphs rooted at the
769 /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
770 void rebalanceUnknownSubgraphs() {
771 // Try to find unknown subgraphs from each block
772 for (const FlowBlock &SrcBlock : Func.Blocks) {
773 // Verify if rebalancing rooted at SrcBlock is feasible
774 if (!canRebalanceAtRoot(&SrcBlock))
775 continue;
776
777 // Find an unknown subgraphs starting at SrcBlock. Along the way,
778 // fill in known destinations and intermediate unknown blocks.
779 std::vector<FlowBlock *> UnknownBlocks;
780 std::vector<FlowBlock *> KnownDstBlocks;
781 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
782
783 // Verify if rebalancing of the subgraph is feasible. If the search is
784 // successful, find the unique destination block (which can be null)
785 FlowBlock *DstBlock = nullptr;
786 if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
787 DstBlock))
788 continue;
789
790 // We cannot rebalance subgraphs containing cycles among unknown blocks
791 if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
792 continue;
793
794 // Rebalance the flow
795 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
796 }
797 }
798
799 /// Verify if rebalancing rooted at a given block is possible.
800 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
801 // Do not attempt to find unknown subgraphs from an unknown or a
802 // zero-flow block
803 if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
804 return false;
805
806 // Do not attempt to process subgraphs from a block w/o unknown sucessors
807 bool HasUnknownSuccs = false;
808 for (auto *Jump : SrcBlock->SuccJumps) {
809 if (Func.Blocks[Jump->Target].HasUnknownWeight) {
810 HasUnknownSuccs = true;
811 break;
812 }
813 }
814 if (!HasUnknownSuccs)
815 return false;
816
817 return true;
818 }
819
820 /// Find an unknown subgraph starting at block SrcBlock. The method sets
821 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
822 void findUnknownSubgraph(const FlowBlock *SrcBlock,
823 std::vector<FlowBlock *> &KnownDstBlocks,
824 std::vector<FlowBlock *> &UnknownBlocks) {
825 // Run BFS from SrcBlock and make sure all paths are going through unknown
826 // blocks and end at a known DstBlock
827 auto Visited = BitVector(NumBlocks(), false);
828 std::queue<uint64_t> Queue;
829
830 Queue.push(SrcBlock->Index);
831 Visited[SrcBlock->Index] = true;
832 while (!Queue.empty()) {
833 auto &Block = Func.Blocks[Queue.front()];
834 Queue.pop();
835 // Process blocks reachable from Block
836 for (auto *Jump : Block.SuccJumps) {
837 // If Jump can be ignored, skip it
838 if (ignoreJump(SrcBlock, nullptr, Jump))
839 continue;
840
841 uint64_t Dst = Jump->Target;
842 // If Dst has been visited, skip Jump
843 if (Visited[Dst])
844 continue;
845 // Process block Dst
846 Visited[Dst] = true;
847 if (!Func.Blocks[Dst].HasUnknownWeight) {
848 KnownDstBlocks.push_back(&Func.Blocks[Dst]);
849 } else {
850 Queue.push(Dst);
851 UnknownBlocks.push_back(&Func.Blocks[Dst]);
852 }
853 }
854 }
855 }
856
857 /// Verify if rebalancing of the subgraph is feasible. If the checks are
858 /// successful, set the unique destination block, DstBlock (can be null).
859 bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
860 const std::vector<FlowBlock *> &KnownDstBlocks,
861 const std::vector<FlowBlock *> &UnknownBlocks,
862 FlowBlock *&DstBlock) {
863 // If the list of unknown blocks is empty, we don't need rebalancing
864 if (UnknownBlocks.empty())
865 return false;
866
867 // If there are multiple known sinks, we can't rebalance
868 if (KnownDstBlocks.size() > 1)
869 return false;
870 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
871
872 // Verify sinks of the subgraph
873 for (auto *Block : UnknownBlocks) {
874 if (Block->SuccJumps.empty()) {
875 // If there are multiple (known and unknown) sinks, we can't rebalance
876 if (DstBlock != nullptr)
877 return false;
878 continue;
879 }
880 size_t NumIgnoredJumps = 0;
881 for (auto *Jump : Block->SuccJumps) {
882 if (ignoreJump(SrcBlock, DstBlock, Jump))
883 NumIgnoredJumps++;
884 }
885 // If there is a non-sink block in UnknownBlocks with all jumps ignored,
886 // then we can't rebalance
887 if (NumIgnoredJumps == Block->SuccJumps.size())
888 return false;
889 }
890
891 return true;
892 }
893
894 /// Decide whether the Jump is ignored while processing an unknown subgraphs
895 /// rooted at basic block SrcBlock with the destination block, DstBlock.
896 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
897 const FlowJump *Jump) {
898 // Ignore unlikely jumps with zero flow
899 if (Jump->IsUnlikely && Jump->Flow == 0)
900 return true;
901
902 auto JumpSource = &Func.Blocks[Jump->Source];
903 auto JumpTarget = &Func.Blocks[Jump->Target];
904
905 // Do not ignore jumps coming into DstBlock
906 if (DstBlock != nullptr && JumpTarget == DstBlock)
907 return false;
908
909 // Ignore jumps out of SrcBlock to known blocks
910 if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
911 return true;
912
913 // Ignore jumps to known blocks with zero flow
914 if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
915 return true;
916
917 return false;
918 }
919
920 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
921 /// UnknownBlocks in the topological order (so that all jumps are "forward").
922 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
923 std::vector<FlowBlock *> &UnknownBlocks) {
924 // Extract local in-degrees in the considered subgraph
925 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
926 auto fillInDegree = [&](const FlowBlock *Block) {
927 for (auto *Jump : Block->SuccJumps) {
928 if (ignoreJump(SrcBlock, DstBlock, Jump))
929 continue;
930 LocalInDegree[Jump->Target]++;
931 }
932 };
933 fillInDegree(SrcBlock);
934 for (auto *Block : UnknownBlocks) {
935 fillInDegree(Block);
936 }
937 // A loop containing SrcBlock
938 if (LocalInDegree[SrcBlock->Index] > 0)
939 return false;
940
941 std::vector<FlowBlock *> AcyclicOrder;
942 std::queue<uint64_t> Queue;
943 Queue.push(SrcBlock->Index);
944 while (!Queue.empty()) {
945 FlowBlock *Block = &Func.Blocks[Queue.front()];
946 Queue.pop();
947 // Stop propagation once we reach DstBlock, if any
948 if (DstBlock != nullptr && Block == DstBlock)
949 break;
950
951 // Keep an acyclic order of unknown blocks
952 if (Block->HasUnknownWeight && Block != SrcBlock)
953 AcyclicOrder.push_back(Block);
954
955 // Add to the queue all successors with zero local in-degree
956 for (auto *Jump : Block->SuccJumps) {
957 if (ignoreJump(SrcBlock, DstBlock, Jump))
958 continue;
959 uint64_t Dst = Jump->Target;
960 LocalInDegree[Dst]--;
961 if (LocalInDegree[Dst] == 0) {
962 Queue.push(Dst);
963 }
964 }
965 }
966
967 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
968 // of all blocks
969 if (UnknownBlocks.size() != AcyclicOrder.size())
970 return false;
971 UnknownBlocks = AcyclicOrder;
972 return true;
973 }
974
975 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
976 /// having UnknownBlocks intermediate blocks.
977 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
978 const FlowBlock *DstBlock,
979 const std::vector<FlowBlock *> &UnknownBlocks) {
980 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
981
982 // Ditribute flow from the source block
983 uint64_t BlockFlow = 0;
984 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
985 for (auto *Jump : SrcBlock->SuccJumps) {
986 if (ignoreJump(SrcBlock, DstBlock, Jump))
987 continue;
988 BlockFlow += Jump->Flow;
989 }
990 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
991
992 // Ditribute flow from the remaining blocks
993 for (auto *Block : UnknownBlocks) {
994 assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
995 uint64_t BlockFlow = 0;
996 // Block's flow is the sum of incoming flows
997 for (auto *Jump : Block->PredJumps) {
998 BlockFlow += Jump->Flow;
999 }
1000 Block->Flow = BlockFlow;
1001 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
1002 }
1003 }
1004
1005 /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1006 /// and ending at DstBlock.
1007 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
1008 const FlowBlock *Block, uint64_t BlockFlow) {
1009 // Process all successor jumps and update corresponding flow values
1010 size_t BlockDegree = 0;
1011 for (auto *Jump : Block->SuccJumps) {
1012 if (ignoreJump(SrcBlock, DstBlock, Jump))
1013 continue;
1014 BlockDegree++;
1015 }
1016 // If all successor jumps of the block are ignored, skip it
1017 if (DstBlock == nullptr && BlockDegree == 0)
1018 return;
1019 assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1020
1021 // Each of the Block's successors gets the following amount of flow.
1022 // Rounding the value up so that all flow is propagated
1023 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1024 for (auto *Jump : Block->SuccJumps) {
1025 if (ignoreJump(SrcBlock, DstBlock, Jump))
1026 continue;
1027 uint64_t Flow = std::min(SuccFlow, BlockFlow);
1028 Jump->Flow = Flow;
1029 BlockFlow -= Flow;
1030 }
1031 assert(BlockFlow == 0 && "not all flow is propagated");
1032 }
1033
1034 /// A constant indicating an arbitrary exit block of a function.
1035 static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1036 /// Minimum BaseDistance for the jump distance values in island joining.
1037 static constexpr uint64_t MinBaseDistance = 10000;
1038
1039 /// Params for flow computation.
1040 const ProfiParams &Params;
1041 /// The function.
1042 FlowFunction &Func;
1043};
1044
1045std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1046 const FlowBlock &Block);
1047std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1048 const FlowJump &Jump);
1049
1050/// Initializing flow network for a given function.
1051///
1052/// Every block is split into two nodes that are responsible for (i) an
1053/// incoming flow, (ii) an outgoing flow; they penalize an increase or a
1054/// reduction of the block weight.
1055void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
1056 FlowFunction &Func) {
1057 uint64_t NumBlocks = Func.Blocks.size();
1058 assert(NumBlocks > 1 && "Too few blocks in a function");
1059 uint64_t NumJumps = Func.Jumps.size();
1060 assert(NumJumps > 0 && "Too few jumps in a function");
1061
1062 // Introducing dummy source/sink pairs to allow flow circulation.
1063 // The nodes corresponding to blocks of the function have indicies in
1064 // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
1065 // next four values.
1066 uint64_t S = 2 * NumBlocks;
1067 uint64_t T = S + 1;
1068 uint64_t S1 = S + 2;
1069 uint64_t T1 = S + 3;
1070
1071 Network.initialize(2 * NumBlocks + 4, S1, T1);
1072
1073 // Initialize nodes of the flow network
1074 for (uint64_t B = 0; B < NumBlocks; B++) {
1075 auto &Block = Func.Blocks[B];
1076
1077 // Split every block into two auxiliary nodes to allow
1078 // increase/reduction of the block count.
1079 uint64_t Bin = 2 * B;
1080 uint64_t Bout = 2 * B + 1;
1081
1082 // Edges from S and to T
1083 if (Block.isEntry()) {
1084 Network.addEdge(S, Bin, 0);
1085 } else if (Block.isExit()) {
1086 Network.addEdge(Bout, T, 0);
1087 }
1088
1089 // Assign costs for increasing/decreasing the block counts
1090 auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
1091
1092 // Add the corresponding edges to the network
1093 Network.addEdge(Bin, Bout, AuxCostInc);
1094 if (Block.Weight > 0) {
1095 Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
1096 Network.addEdge(S1, Bout, Block.Weight, 0);
1097 Network.addEdge(Bin, T1, Block.Weight, 0);
1098 }
1099 }
1100
1101 // Initialize edges of the flow network
1102 for (uint64_t J = 0; J < NumJumps; J++) {
1103 auto &Jump = Func.Jumps[J];
1104
1105 // Get the endpoints corresponding to the jump
1106 uint64_t Jin = 2 * Jump.Source + 1;
1107 uint64_t Jout = 2 * Jump.Target;
1108
1109 // Assign costs for increasing/decreasing the jump counts
1110 auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1111
1112 // Add the corresponding edges to the network
1113 Network.addEdge(Jin, Jout, AuxCostInc);
1114 if (Jump.Weight > 0) {
1115 Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
1116 Network.addEdge(S1, Jout, Jump.Weight, 0);
1117 Network.addEdge(Jin, T1, Jump.Weight, 0);
1118 }
1119 }
1120
1121 // Make sure we have a valid flow circulation
1122 Network.addEdge(T, S, 0);
1123}
1124
1125/// Assign costs for increasing/decreasing the block counts.
1126std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1127 const FlowBlock &Block) {
1128 // Modifying the weight of an unlikely block is expensive
1129 if (Block.IsUnlikely)
1130 return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1131
1132 // Assign default values for the costs
1133 int64_t CostInc = Params.CostBlockInc;
1134 int64_t CostDec = Params.CostBlockDec;
1135 // Update the costs depending on the block metadata
1136 if (Block.HasUnknownWeight) {
1137 CostInc = Params.CostBlockUnknownInc;
1138 CostDec = 0;
1139 } else {
1140 // Increasing the count for "cold" blocks with zero initial count is more
1141 // expensive than for "hot" ones
1142 if (Block.Weight == 0)
1143 CostInc = Params.CostBlockZeroInc;
1144 // Modifying the count of the entry block is expensive
1145 if (Block.isEntry()) {
1146 CostInc = Params.CostBlockEntryInc;
1147 CostDec = Params.CostBlockEntryDec;
1148 }
1149 }
1150 return std::make_pair(CostInc, CostDec);
1151}
1152
1153/// Assign costs for increasing/decreasing the jump counts.
1154std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1155 const FlowJump &Jump) {
1156 // Modifying the weight of an unlikely jump is expensive
1157 if (Jump.IsUnlikely)
1158 return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1159
1160 // Assign default values for the costs
1161 int64_t CostInc = Params.CostJumpInc;
1162 int64_t CostDec = Params.CostJumpDec;
1163 // Update the costs depending on the block metadata
1164 if (Jump.Source + 1 == Jump.Target) {
1165 // Adjusting the fall-through branch
1166 CostInc = Params.CostJumpFTInc;
1167 CostDec = Params.CostJumpFTDec;
1168 }
1169 if (Jump.HasUnknownWeight) {
1170 // The cost is different for fall-through and non-fall-through branches
1171 if (Jump.Source + 1 == Jump.Target)
1172 CostInc = Params.CostJumpUnknownFTInc;
1173 else
1174 CostInc = Params.CostJumpUnknownInc;
1175 CostDec = 0;
1176 } else {
1177 assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
1178 }
1179 return std::make_pair(CostInc, CostDec);
1180}
1181
1182/// Extract resulting block and edge counts from the flow network.
1183void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
1184 FlowFunction &Func) {
1185 uint64_t NumBlocks = Func.Blocks.size();
1186 uint64_t NumJumps = Func.Jumps.size();
1187
1188 // Extract resulting jump counts
1189 for (uint64_t J = 0; J < NumJumps; J++) {
1190 auto &Jump = Func.Jumps[J];
1191 uint64_t SrcOut = 2 * Jump.Source + 1;
1192 uint64_t DstIn = 2 * Jump.Target;
1193
1194 int64_t Flow = 0;
1195 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1196 if (Jump.Source != Jump.Target)
1197 Flow = int64_t(Jump.Weight) + AuxFlow;
1198 else
1199 Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1200
1201 Jump.Flow = Flow;
1202 assert(Flow >= 0 && "negative jump flow");
1203 }
1204
1205 // Extract resulting block counts
1206 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1207 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1208 for (auto &Jump : Func.Jumps) {
1209 InFlow[Jump.Target] += Jump.Flow;
1210 OutFlow[Jump.Source] += Jump.Flow;
1211 }
1212 for (uint64_t B = 0; B < NumBlocks; B++) {
1213 auto &Block = Func.Blocks[B];
1214 Block.Flow = std::max(OutFlow[B], InFlow[B]);
1215 }
1216}
1217
1218#ifndef NDEBUG
1219/// Verify that the provided block/jump weights are as expected.
1220void verifyInput(const FlowFunction &Func) {
1221 // Verify the entry block
1222 assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1223 for (size_t I = 1; I < Func.Blocks.size(); I++) {
1224 assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
1225 }
1226 // Verify CFG jumps
1227 for (auto &Block : Func.Blocks) {
1228 assert((!Block.isEntry() || !Block.isExit()) &&
1229 "a block cannot be an entry and an exit");
1230 }
1231 // Verify input block weights
1232 for (auto &Block : Func.Blocks) {
1233 assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1234 "non-zero weight of a block w/o weight except for an entry");
1235 }
1236 // Verify input jump weights
1237 for (auto &Jump : Func.Jumps) {
1238 assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
1239 "non-zero weight of a jump w/o weight");
1240 }
1241}
1242
1243/// Verify that the computed flow values satisfy flow conservation rules.
1244void verifyOutput(const FlowFunction &Func) {
1245 const uint64_t NumBlocks = Func.Blocks.size();
1246 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1247 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1248 for (const auto &Jump : Func.Jumps) {
1249 InFlow[Jump.Target] += Jump.Flow;
1250 OutFlow[Jump.Source] += Jump.Flow;
1251 }
1252
1253 uint64_t TotalInFlow = 0;
1254 uint64_t TotalOutFlow = 0;
1255 for (uint64_t I = 0; I < NumBlocks; I++) {
1256 auto &Block = Func.Blocks[I];
1257 if (Block.isEntry()) {
1258 TotalInFlow += Block.Flow;
1259 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1260 } else if (Block.isExit()) {
1261 TotalOutFlow += Block.Flow;
1262 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1263 } else {
1264 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1265 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1266 }
1267 }
1268 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1269
1270 // Verify that there are no isolated flow components
1271 // One could modify FlowFunction to hold edges indexed by the sources, which
1272 // will avoid a creation of the object
1273 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1274 for (const auto &Jump : Func.Jumps) {
1275 if (Jump.Flow > 0) {
1276 PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1277 }
1278 }
1279
1280 // Run BFS from the source along edges with positive flow
1281 std::queue<uint64_t> Queue;
1282 auto Visited = BitVector(NumBlocks, false);
1283 Queue.push(Func.Entry);
1284 Visited[Func.Entry] = true;
1285 while (!Queue.empty()) {
1286 uint64_t Src = Queue.front();
1287 Queue.pop();
1288 for (uint64_t Dst : PositiveFlowEdges[Src]) {
1289 if (!Visited[Dst]) {
1290 Queue.push(Dst);
1291 Visited[Dst] = true;
1292 }
1293 }
1294 }
1295
1296 // Verify that every block that has a positive flow is reached from the source
1297 // along edges with a positive flow
1298 for (uint64_t I = 0; I < NumBlocks; I++) {
1299 auto &Block = Func.Blocks[I];
1300 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1301 }
1302}
1303#endif
1304
1305} // end of anonymous namespace
1306
1307/// Apply the profile inference algorithm for a given function
1309#ifndef NDEBUG
1310 // Verify the input data
1311 verifyInput(Func);
1312#endif
1313
1314 // Create and apply an inference network model
1315 auto InferenceNetwork = MinCostMaxFlow(Params);
1316 initializeNetwork(Params, InferenceNetwork, Func);
1317 InferenceNetwork.run();
1318
1319 // Extract flow values for every block and every edge
1320 extractWeights(Params, InferenceNetwork, Func);
1321
1322 // Post-processing adjustments to the flow
1323 auto Adjuster = FlowAdjuster(Params, Func);
1324 Adjuster.run();
1325
1326#ifndef NDEBUG
1327 // Verify the result
1328 verifyOutput(Func);
1329#endif
1330}
1331
1332/// Apply the profile inference algorithm for a given flow function
1334 ProfiParams Params;
1335 // Set the params from the command-line flags.
1336 Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
1337 Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
1338 Params.JoinIslands = SampleProfileJoinIslands;
1339 Params.CostBlockInc = SampleProfileProfiCostBlockInc;
1340 Params.CostBlockDec = SampleProfileProfiCostBlockDec;
1341 Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
1342 Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
1343 Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
1344 Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
1345
1346 applyFlowInference(Params, Func);
1347}
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
return ToRemove size() > 0
Annotate SI Control Flow
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the profile inference algorithm, profi.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
void push_back(bool Val)
Definition: BitVector.h:459
Target - Wrapper for Target specific information.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)
Apply the profile inference algorithm for a given function.
#define INF
A wrapper of a binary basic block.
std::vector< FlowJump * > SuccJumps
A wrapper of binary function with basic blocks and jumps.
A wrapper of a jump between two basic blocks.
Various thresholds and options controlling the behavior of the profile inference algorithm.
unsigned CostJumpUnknownFTInc
The cost of increasing an unknown fall-through jump's count by one.
unsigned CostBlockInc
The cost of increasing a block's count by one.
unsigned CostJumpFTInc
The cost of increasing a fall-through jump's count by one.
bool RebalanceUnknown
Evenly re-distribute flow among unknown subgraphs.
const int64_t CostUnlikely
The cost of taking an unlikely block/jump.
unsigned CostJumpDec
The cost of decreasing a jump's count by one.
bool JoinIslands
Join isolated components having positive flow.
unsigned CostBlockZeroInc
The cost of increasing a count of zero-weight block by one.
unsigned CostBlockEntryDec
The cost of decreasing the entry block's count by one.
unsigned CostJumpInc
The cost of increasing a jump's count by one.
unsigned CostJumpUnknownInc
The cost of increasing an unknown jump's count by one.
unsigned CostBlockUnknownInc
The cost of increasing an unknown block's count by one.
unsigned CostJumpFTDec
The cost of decreasing a fall-through jump's count by one.
unsigned CostBlockDec
The cost of decreasing a block's count by one.
unsigned CostBlockEntryInc
The cost of increasing the entry block's count by one.
bool EvenFlowDistribution
Evenly distribute flow when there are multiple equally likely options.