LLVM 22.0.0git
BalancedPartitioning.cpp
Go to the documentation of this file.
1//===- BalancedPartitioning.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements BalancedPartitioning, a recursive balanced graph
10// partitioning algorithm.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
16#include "llvm/Support/Debug.h"
17#include "llvm/Support/Format.h"
20
21using namespace llvm;
22#define DEBUG_TYPE "balanced-partitioning"
23
25 OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,
27}
28
29template <typename Func>
30void BalancedPartitioning::BPThreadPool::async(Func &&F) {
31#if LLVM_ENABLE_THREADS
32 // This new thread could spawn more threads, so mark it as active
33 ++NumActiveThreads;
34 TheThreadPool.async([this, F]() {
35 // Run the task
36 F();
37
38 // This thread will no longer spawn new threads, so mark it as inactive
39 if (--NumActiveThreads == 0) {
40 // There are no more active threads, so mark as finished and notify
41 {
42 std::unique_lock<std::mutex> lock(mtx);
43 assert(!IsFinishedSpawning);
44 IsFinishedSpawning = true;
45 }
46 cv.notify_one();
47 }
48 });
49#else
50 llvm_unreachable("threads are disabled");
51#endif
52}
53
54void BalancedPartitioning::BPThreadPool::wait() {
55#if LLVM_ENABLE_THREADS
56 // TODO: We could remove the mutex and condition variable and use
57 // std::atomic::wait() instead, but that isn't available until C++20
58 {
59 std::unique_lock<std::mutex> lock(mtx);
60 cv.wait(lock, [&]() { return IsFinishedSpawning; });
61 assert(IsFinishedSpawning && NumActiveThreads == 0);
62 }
63 // Now we can call ThreadPool::wait() since all tasks have been submitted
64 TheThreadPool.wait();
65#else
66 llvm_unreachable("threads are disabled");
67#endif
68}
69
71 const BalancedPartitioningConfig &Config)
72 : Config(Config) {
73 // Pre-computing log2 values
74 Log2Cache[0] = 0.0;
75 for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)
76 Log2Cache[I] = std::log2(I);
77}
78
79void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const {
81 dbgs() << format(
82 "Partitioning %d nodes using depth %d and %d iterations per split\n",
83 Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
84 std::optional<BPThreadPool> TP;
85#if LLVM_ENABLE_THREADS
86 DefaultThreadPool TheThreadPool;
87 if (Config.TaskSplitDepth > 1)
88 TP.emplace(TheThreadPool);
89#endif
90
91 // Record the input order
92 for (unsigned I = 0; I < Nodes.size(); I++)
93 Nodes[I].InputOrderIndex = I;
94
95 auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());
96 auto BisectTask = [this, NodesRange, &TP]() {
97 bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP);
98 };
99 if (TP) {
100 TP->async(std::move(BisectTask));
101 TP->wait();
102 } else {
103 BisectTask();
104 }
105
106 llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) {
107 return L.Bucket < R.Bucket;
108 });
109
110 LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");
111}
112
113void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
114 unsigned RecDepth, unsigned RootBucket,
115 unsigned Offset,
116 std::optional<BPThreadPool> &TP) const {
117 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
118 if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
119 // We've reach the lowest level of the recursion tree. Fall back to the
120 // original order and assign to buckets.
121 llvm::sort(Nodes, [](const auto &L, const auto &R) {
122 return L.InputOrderIndex < R.InputOrderIndex;
123 });
124 for (auto &N : Nodes)
125 N.Bucket = Offset++;
126 return;
127 }
128
129 LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n",
130 NumNodes, RootBucket));
131
132 std::mt19937 RNG(RootBucket);
133
134 unsigned LeftBucket = 2 * RootBucket;
135 unsigned RightBucket = 2 * RootBucket + 1;
136
137 // Split into two and assign to the left and right buckets
138 split(Nodes, LeftBucket);
139
140 runIterations(Nodes, LeftBucket, RightBucket, RNG);
141
142 // Split nodes wrt the resulting buckets
143 auto NodesMid =
144 llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });
145 unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);
146
147 auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid);
148 auto RightNodes = llvm::make_range(NodesMid, Nodes.end());
149
150 auto LeftRecTask = [this, LeftNodes, RecDepth, LeftBucket, Offset, &TP]() {
151 bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);
152 };
153 auto RightRecTask = [this, RightNodes, RecDepth, RightBucket, MidOffset,
154 &TP]() {
155 bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
156 };
157
158 if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
159 TP->async(std::move(LeftRecTask));
160 TP->async(std::move(RightRecTask));
161 } else {
162 LeftRecTask();
163 RightRecTask();
164 }
165}
166
167void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
168 unsigned LeftBucket,
169 unsigned RightBucket,
170 std::mt19937 &RNG) const {
171 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
172 DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
173 for (auto &N : Nodes)
174 for (auto &UN : N.UtilityNodes)
175 ++UtilityNodeIndex[UN];
176 // Remove utility nodes if they have just one edge or are connected to all
177 // functions
178 for (auto &N : Nodes)
179 llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
180 unsigned UNI = UtilityNodeIndex[UN];
181 return UNI == 1 || UNI == NumNodes;
182 });
183
184 // Renumber utility nodes so they can be used to index into Signatures
185 UtilityNodeIndex.clear();
186 for (auto &N : Nodes)
187 for (auto &UN : N.UtilityNodes)
188 UN = UtilityNodeIndex.insert({UN, UtilityNodeIndex.size()}).first->second;
189
190 // Initialize signatures
191 SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
192 for (auto &N : Nodes) {
193 for (auto &UN : N.UtilityNodes) {
194 assert(UN < Signatures.size());
195 if (N.Bucket == LeftBucket) {
196 Signatures[UN].LeftCount++;
197 } else {
198 Signatures[UN].RightCount++;
199 }
200 }
201 }
202
203 for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {
204 unsigned NumMovedNodes =
205 runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);
206 if (NumMovedNodes == 0)
207 break;
208 }
209}
210
211unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
212 unsigned LeftBucket,
213 unsigned RightBucket,
214 SignaturesT &Signatures,
215 std::mt19937 &RNG) const {
216 // Init signature cost caches
217 for (auto &Signature : Signatures) {
218 if (Signature.CachedGainIsValid)
219 continue;
220 unsigned L = Signature.LeftCount;
221 unsigned R = Signature.RightCount;
222 assert((L > 0 || R > 0) && "incorrect signature");
223 float Cost = logCost(L, R);
224 Signature.CachedGainLR = 0.f;
225 Signature.CachedGainRL = 0.f;
226 if (L > 0)
227 Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);
228 if (R > 0)
229 Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);
230 Signature.CachedGainIsValid = true;
231 }
232
233 // Compute move gains
234 typedef std::pair<float, BPFunctionNode *> GainPair;
235 std::vector<GainPair> Gains;
236 for (auto &N : Nodes) {
237 bool FromLeftToRight = (N.Bucket == LeftBucket);
238 float Gain = moveGain(N, FromLeftToRight, Signatures);
239 Gains.push_back(std::make_pair(Gain, &N));
240 }
241
242 // Collect left and right gains
243 auto LeftEnd = llvm::partition(
244 Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });
245 auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd);
246 auto RightRange = llvm::make_range(LeftEnd, Gains.end());
247
248 // Sort gains in descending order
249 auto LargerGain = [](const auto &L, const auto &R) {
250 return L.first > R.first;
251 };
252 llvm::stable_sort(LeftRange, LargerGain);
253 llvm::stable_sort(RightRange, LargerGain);
254
255 unsigned NumMovedDataVertices = 0;
256 for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {
257 auto &[LeftGain, LeftNode] = LeftPair;
258 auto &[RightGain, RightNode] = RightPair;
259 // Stop when the gain is no longer beneficial
260 if (LeftGain + RightGain <= 0.f)
261 break;
262 // Try to exchange the nodes between buckets
263 if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))
264 ++NumMovedDataVertices;
265 if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))
266 ++NumMovedDataVertices;
267 }
268 return NumMovedDataVertices;
269}
270
271bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,
272 unsigned LeftBucket,
273 unsigned RightBucket,
274 SignaturesT &Signatures,
275 std::mt19937 &RNG) const {
276 // Sometimes we skip the move. This helps to escape local optima
277 if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
278 Config.SkipProbability)
279 return false;
280
281 bool FromLeftToRight = (N.Bucket == LeftBucket);
282 // Update the current bucket
283 N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
284
285 // Update signatures and invalidate gain cache
286 if (FromLeftToRight) {
287 for (auto &UN : N.UtilityNodes) {
288 auto &Signature = Signatures[UN];
289 Signature.LeftCount--;
290 Signature.RightCount++;
291 Signature.CachedGainIsValid = false;
292 }
293 } else {
294 for (auto &UN : N.UtilityNodes) {
295 auto &Signature = Signatures[UN];
296 Signature.LeftCount++;
297 Signature.RightCount--;
298 Signature.CachedGainIsValid = false;
299 }
300 }
301 return true;
302}
303
304void BalancedPartitioning::split(const FunctionNodeRange Nodes,
305 unsigned StartBucket) const {
306 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
307 auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
308
309 llvm::sort(Nodes, [](auto &L, auto &R) {
310 return L.InputOrderIndex < R.InputOrderIndex;
311 });
312
313 for (auto &N : llvm::make_range(Nodes.begin(), NodesMid))
314 N.Bucket = StartBucket;
315 for (auto &N : llvm::make_range(NodesMid, Nodes.end()))
316 N.Bucket = StartBucket + 1;
317}
318
320 bool FromLeftToRight,
321 const SignaturesT &Signatures) {
322 float Gain = 0.f;
323 for (auto &UN : N.UtilityNodes)
324 Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR
325 : Signatures[UN].CachedGainRL);
326 return Gain;
327}
328
329float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {
330 return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));
331}
332
333float BalancedPartitioning::log2Cached(unsigned i) const {
334 return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
335}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static const FuncProtoTy Signatures[]
A function with a set of utility nodes where it is beneficial to order two functions close together i...
IDT Id
The ID of this node.
SmallVector< UtilityNodeT, 4 > UtilityNodes
The list of utility nodes associated with this node.
std::optional< unsigned > Bucket
The bucket assigned by balanced partitioning.
LLVM_ABI void dump(raw_ostream &OS) const
static LLVM_ABI float moveGain(const BPFunctionNode &N, bool FromLeftToRight, const SignaturesT &Signatures)
Compute the move gain for uniform log-gap cost.
LLVM_ABI void run(std::vector< BPFunctionNode > &Nodes) const
Run recursive graph partitioning that optimizes a given objective.
LLVM_ABI BalancedPartitioning(const BalancedPartitioningConfig &Config)
unsigned size() const
Definition DenseMap.h:108
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
auto async(Function &&F, Args &&...ArgList)
Asynchronous submission of a task to the pool.
Definition ThreadPool.h:79
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:843
void stable_sort(R &&Range)
Definition STLExtras.h:2060
InstructionCost Cost
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1652
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:126
SingleThreadExecutor DefaultThreadPool
Definition ThreadPool.h:250
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1977
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2122
#define N
Algorithm parameters; default values are tuned on real-world binaries.
unsigned SplitDepth
The depth of the recursive bisection.