LLVM 18.0.0git
BalancedPartitioning.cpp
Go to the documentation of this file.
1//===- BalancedPartitioning.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements BalancedPartitioning, a recursive balanced graph
10// partitioning algorithm.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/Support/Debug.h"
16#include "llvm/Support/Format.h"
19
20using namespace llvm;
21#define DEBUG_TYPE "balanced-partitioning"
22
24 OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,
26}
27
28template <typename Func>
29void BalancedPartitioning::BPThreadPool::async(Func &&F) {
30#if LLVM_ENABLE_THREADS
31 // This new thread could spawn more threads, so mark it as active
32 ++NumActiveThreads;
33 TheThreadPool.async([=]() {
34 // Run the task
35 F();
36
37 // This thread will no longer spawn new threads, so mark it as inactive
38 if (--NumActiveThreads == 0) {
39 // There are no more active threads, so mark as finished and notify
40 {
41 std::unique_lock<std::mutex> lock(mtx);
42 assert(!IsFinishedSpawning);
43 IsFinishedSpawning = true;
44 }
45 cv.notify_one();
46 }
47 });
48#else
49 llvm_unreachable("threads are disabled");
50#endif
51}
52
53void BalancedPartitioning::BPThreadPool::wait() {
54#if LLVM_ENABLE_THREADS
55 // TODO: We could remove the mutex and condition variable and use
56 // std::atomic::wait() instead, but that isn't available until C++20
57 {
58 std::unique_lock<std::mutex> lock(mtx);
59 cv.wait(lock, [&]() { return IsFinishedSpawning; });
60 assert(IsFinishedSpawning && NumActiveThreads == 0);
61 }
62 // Now we can call ThreadPool::wait() since all tasks have been submitted
63 TheThreadPool.wait();
64#else
65 llvm_unreachable("threads are disabled");
66#endif
67}
68
71 : Config(Config) {
72 // Pre-computing log2 values
73 Log2Cache[0] = 0.0;
74 for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)
75 Log2Cache[I] = std::log2(I);
76}
77
78void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const {
80 dbgs() << format(
81 "Partitioning %d nodes using depth %d and %d iterations per split\n",
82 Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
83 std::optional<BPThreadPool> TP;
84#if LLVM_ENABLE_THREADS
85 ThreadPool TheThreadPool;
86 if (Config.TaskSplitDepth > 1)
87 TP.emplace(TheThreadPool);
88#endif
89
90 // Record the input order
91 for (unsigned I = 0; I < Nodes.size(); I++)
92 Nodes[I].InputOrderIndex = I;
93
94 auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());
95 auto BisectTask = [=, &TP]() {
96 bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP);
97 };
98 if (TP) {
99 TP->async(std::move(BisectTask));
100 TP->wait();
101 } else {
102 BisectTask();
103 }
104
105 llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) {
106 return L.Bucket < R.Bucket;
107 });
108
109 LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");
110}
111
112void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
113 unsigned RecDepth, unsigned RootBucket,
114 unsigned Offset,
115 std::optional<BPThreadPool> &TP) const {
116 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
117 if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
118 // We've reach the lowest level of the recursion tree. Fall back to the
119 // original order and assign to buckets.
120 llvm::stable_sort(Nodes, [](const auto &L, const auto &R) {
121 return L.InputOrderIndex < R.InputOrderIndex;
122 });
123 for (auto &N : Nodes)
124 N.Bucket = Offset++;
125 return;
126 }
127
128 LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n",
129 NumNodes, RootBucket));
130
131 std::mt19937 RNG(RootBucket);
132
133 unsigned LeftBucket = 2 * RootBucket;
134 unsigned RightBucket = 2 * RootBucket + 1;
135
136 // Split into two and assign to the left and right buckets
137 split(Nodes, LeftBucket);
138
139 runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG);
140
141 // Split nodes wrt the resulting buckets
142 auto NodesMid =
143 llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });
144 unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);
145
146 auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid);
147 auto RightNodes = llvm::make_range(NodesMid, Nodes.end());
148
149 auto LeftRecTask = [=, &TP]() {
150 bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);
151 };
152 auto RightRecTask = [=, &TP]() {
153 bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
154 };
155
156 if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
157 TP->async(std::move(LeftRecTask));
158 TP->async(std::move(RightRecTask));
159 } else {
160 LeftRecTask();
161 RightRecTask();
162 }
163}
164
165void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
166 unsigned RecDepth, unsigned LeftBucket,
167 unsigned RightBucket,
168 std::mt19937 &RNG) const {
169 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
171 for (auto &N : Nodes)
172 for (auto &UN : N.UtilityNodes)
173 ++UtilityNodeDegree[UN];
174 // Remove utility nodes if they have just one edge or are connected to all
175 // functions
176 for (auto &N : Nodes)
177 llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
178 return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes;
179 });
180
181 // Renumber utility nodes so they can be used to index into Signatures
183 for (auto &N : Nodes)
184 for (auto &UN : N.UtilityNodes)
185 if (!UtilityNodeIndex.count(UN))
186 UtilityNodeIndex[UN] = UtilityNodeIndex.size();
187 for (auto &N : Nodes)
188 for (auto &UN : N.UtilityNodes)
189 UN = UtilityNodeIndex[UN];
190
191 // Initialize signatures
192 SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
193 for (auto &N : Nodes) {
194 for (auto &UN : N.UtilityNodes) {
195 assert(UN < Signatures.size());
196 if (N.Bucket == LeftBucket) {
197 Signatures[UN].LeftCount++;
198 } else {
199 Signatures[UN].RightCount++;
200 }
201 }
202 }
203
204 for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {
205 unsigned NumMovedNodes =
206 runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);
207 if (NumMovedNodes == 0)
208 break;
209 }
210}
211
212unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
213 unsigned LeftBucket,
214 unsigned RightBucket,
215 SignaturesT &Signatures,
216 std::mt19937 &RNG) const {
217 // Init signature cost caches
218 for (auto &Signature : Signatures) {
219 if (Signature.CachedGainIsValid)
220 continue;
221 unsigned L = Signature.LeftCount;
222 unsigned R = Signature.RightCount;
223 assert((L > 0 || R > 0) && "incorrect signature");
224 float Cost = logCost(L, R);
225 Signature.CachedGainLR = 0.f;
226 Signature.CachedGainRL = 0.f;
227 if (L > 0)
228 Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);
229 if (R > 0)
230 Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);
231 Signature.CachedGainIsValid = true;
232 }
233
234 // Compute move gains
235 typedef std::pair<float, BPFunctionNode *> GainPair;
236 std::vector<GainPair> Gains;
237 for (auto &N : Nodes) {
238 bool FromLeftToRight = (N.Bucket == LeftBucket);
239 float Gain = moveGain(N, FromLeftToRight, Signatures);
240 Gains.push_back(std::make_pair(Gain, &N));
241 }
242
243 // Collect left and right gains
244 auto LeftEnd = llvm::partition(
245 Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });
246 auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd);
247 auto RightRange = llvm::make_range(LeftEnd, Gains.end());
248
249 // Sort gains in descending order
250 auto LargerGain = [](const auto &L, const auto &R) {
251 return L.first > R.first;
252 };
253 llvm::stable_sort(LeftRange, LargerGain);
254 llvm::stable_sort(RightRange, LargerGain);
255
256 unsigned NumMovedDataVertices = 0;
257 for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {
258 auto &[LeftGain, LeftNode] = LeftPair;
259 auto &[RightGain, RightNode] = RightPair;
260 // Stop when the gain is no longer beneficial
261 if (LeftGain + RightGain <= 0.f)
262 break;
263 // Try to exchange the nodes between buckets
264 if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))
265 ++NumMovedDataVertices;
266 if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))
267 ++NumMovedDataVertices;
268 }
269 return NumMovedDataVertices;
270}
271
272bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,
273 unsigned LeftBucket,
274 unsigned RightBucket,
275 SignaturesT &Signatures,
276 std::mt19937 &RNG) const {
277 // Sometimes we skip the move. This helps to escape local optima
278 if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
279 Config.SkipProbability)
280 return false;
281
282 bool FromLeftToRight = (N.Bucket == LeftBucket);
283 // Update the current bucket
284 N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
285
286 // Update signatures and invalidate gain cache
287 if (FromLeftToRight) {
288 for (auto &UN : N.UtilityNodes) {
289 auto &Signature = Signatures[UN];
290 Signature.LeftCount--;
291 Signature.RightCount++;
292 Signature.CachedGainIsValid = false;
293 }
294 } else {
295 for (auto &UN : N.UtilityNodes) {
296 auto &Signature = Signatures[UN];
297 Signature.LeftCount++;
298 Signature.RightCount--;
299 Signature.CachedGainIsValid = false;
300 }
301 }
302 return true;
303}
304
305void BalancedPartitioning::split(const FunctionNodeRange Nodes,
306 unsigned StartBucket) const {
307 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
308 auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
309
310 std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) {
311 return L.InputOrderIndex < R.InputOrderIndex;
312 });
313
314 for (auto &N : llvm::make_range(Nodes.begin(), NodesMid))
315 N.Bucket = StartBucket;
316 for (auto &N : llvm::make_range(NodesMid, Nodes.end()))
317 N.Bucket = StartBucket + 1;
318}
319
321 bool FromLeftToRight,
322 const SignaturesT &Signatures) {
323 float Gain = 0.f;
324 for (auto &UN : N.UtilityNodes)
325 Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR
326 : Signatures[UN].CachedGainRL);
327 return Gain;
328}
329
330float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {
331 return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));
332}
333
334float BalancedPartitioning::log2Cached(unsigned i) const {
335 return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
336}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
RelaxConfig Config
Definition: ELF_riscv.cpp:504
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static const FuncProtoTy Signatures[]
A function with a set of utility nodes where it is beneficial to order two functions close together i...
IDT Id
The ID of this node.
SmallVector< UtilityNodeT, 4 > UtilityNodes
The list of utility nodes associated with this node.
std::optional< unsigned > Bucket
The bucket assigned by balanced partitioning.
void dump(raw_ostream &OS) const
static float moveGain(const BPFunctionNode &N, bool FromLeftToRight, const SignaturesT &Signatures)
Compute the move gain for uniform log-gap cost.
void run(std::vector< BPFunctionNode > &Nodes) const
Run recursive graph partitioning that optimizes a given objective.
BalancedPartitioning(const BalancedPartitioningConfig &Config)
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
A ThreadPool for asynchronous parallel execution on a defined number of threads.
Definition: ThreadPool.h:52
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:862
void stable_sort(R &&Range)
Definition: STLExtras.h:1970
auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object< decltype(std::make_tuple(detail::build_format_adapter(std::forward< Ts >(Vals))...))>
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1939
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2020
#define N
Algorithm parameters; default values are tuned on real-world binaries.
unsigned SplitDepth
The depth of the recursive bisection.
float SkipProbability
The probability for a vertex to skip a move from its current bucket to another bucket; it often helps...
unsigned TaskSplitDepth
Recursive subtasks up to the given depth are added to the queue and distributed among threads by Thre...
unsigned IterationsPerSplit
The maximum number of bp iterations per split.