15#include "llvm/Config/llvm-config.h"
22#define DEBUG_TYPE "balanced-partitioning"
25 OS <<
formatv(
"{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}",
Id,
29template <
typename Func>
30void BalancedPartitioning::BPThreadPool::async(Func &&
F) {
31#if LLVM_ENABLE_THREADS
34 TheThreadPool.
async([
this,
F]() {
39 if (--NumActiveThreads == 0) {
42 std::unique_lock<std::mutex> lock(mtx);
43 assert(!IsFinishedSpawning);
44 IsFinishedSpawning =
true;
54void BalancedPartitioning::BPThreadPool::wait() {
55#if LLVM_ENABLE_THREADS
59 std::unique_lock<std::mutex> lock(mtx);
60 cv.wait(lock, [&]() {
return IsFinishedSpawning; });
61 assert(IsFinishedSpawning && NumActiveThreads == 0);
75 for (
unsigned I = 1;
I < LOG_CACHE_SIZE;
I++)
76 Log2Cache[
I] = std::log2(
I);
82 "Partitioning %d nodes using depth %d and %d iterations per split\n",
83 Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
84 std::optional<BPThreadPool> TP;
85#if LLVM_ENABLE_THREADS
87 if (Config.TaskSplitDepth > 1)
88 TP.emplace(TheThreadPool);
92 for (
unsigned I = 0;
I < Nodes.size();
I++)
93 Nodes[
I].InputOrderIndex =
I;
96 auto BisectTask = [
this, NodesRange, &TP]() {
97 bisect(NodesRange, 0, 1, 0, TP);
100 TP->async(std::move(BisectTask));
107 return L.Bucket < R.Bucket;
113void BalancedPartitioning::bisect(
const FunctionNodeRange Nodes,
114 unsigned RecDepth,
unsigned RootBucket,
116 std::optional<BPThreadPool> &TP)
const {
117 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
118 if (NumNodes <= 1 || RecDepth >= Config.
SplitDepth) {
121 llvm::sort(Nodes, [](
const auto &L,
const auto &R) {
122 return L.InputOrderIndex < R.InputOrderIndex;
124 for (
auto &
N : Nodes)
130 NumNodes, RootBucket));
132 std::mt19937 RNG(RootBucket);
134 unsigned LeftBucket = 2 * RootBucket;
135 unsigned RightBucket = 2 * RootBucket + 1;
138 split(Nodes, LeftBucket);
140 runIterations(Nodes, LeftBucket, RightBucket, RNG);
145 unsigned MidOffset =
Offset + std::distance(Nodes.begin(), NodesMid);
150 auto LeftRecTask = [
this, LeftNodes, RecDepth, LeftBucket,
Offset, &TP]() {
151 bisect(LeftNodes, RecDepth + 1, LeftBucket,
Offset, TP);
153 auto RightRecTask = [
this, RightNodes, RecDepth, RightBucket, MidOffset,
155 bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
158 if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
159 TP->async(std::move(LeftRecTask));
160 TP->async(std::move(RightRecTask));
167void BalancedPartitioning::runIterations(
const FunctionNodeRange Nodes,
169 unsigned RightBucket,
170 std::mt19937 &RNG)
const {
171 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
172 DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
173 for (
auto &
N : Nodes)
174 for (
auto &UN :
N.UtilityNodes)
175 ++UtilityNodeIndex[UN];
178 for (
auto &
N : Nodes)
180 unsigned UNI = UtilityNodeIndex[UN];
181 return UNI == 1 || UNI == NumNodes;
185 UtilityNodeIndex.
clear();
186 for (
auto &
N : Nodes)
187 for (
auto &UN :
N.UtilityNodes)
188 UN = UtilityNodeIndex.
insert({UN, UtilityNodeIndex.
size()}).first->second;
192 for (
auto &
N : Nodes) {
193 for (
auto &UN :
N.UtilityNodes) {
195 if (
N.Bucket == LeftBucket) {
203 for (
unsigned I = 0;
I < Config.IterationsPerSplit;
I++) {
204 unsigned NumMovedNodes =
205 runIteration(Nodes, LeftBucket, RightBucket,
Signatures, RNG);
206 if (NumMovedNodes == 0)
211unsigned BalancedPartitioning::runIteration(
const FunctionNodeRange Nodes,
213 unsigned RightBucket,
215 std::mt19937 &RNG)
const {
218 if (Signature.CachedGainIsValid)
220 unsigned L = Signature.LeftCount;
221 unsigned R = Signature.RightCount;
222 assert((L > 0 || R > 0) &&
"incorrect signature");
223 float Cost = logCost(L, R);
224 Signature.CachedGainLR = 0.f;
225 Signature.CachedGainRL = 0.f;
227 Signature.CachedGainLR =
Cost - logCost(L - 1, R + 1);
229 Signature.CachedGainRL =
Cost - logCost(L + 1, R - 1);
230 Signature.CachedGainIsValid =
true;
234 typedef std::pair<float, BPFunctionNode *> GainPair;
235 std::vector<GainPair> Gains;
236 for (
auto &
N : Nodes) {
237 bool FromLeftToRight = (
N.Bucket == LeftBucket);
239 Gains.push_back(std::make_pair(Gain, &
N));
244 Gains, [&](
const auto &GP) {
return GP.second->Bucket == LeftBucket; });
249 auto LargerGain = [](
const auto &
L,
const auto &
R) {
250 return L.first >
R.first;
255 unsigned NumMovedDataVertices = 0;
256 for (
auto [LeftPair, RightPair] :
llvm::zip(LeftRange, RightRange)) {
257 auto &[LeftGain, LeftNode] = LeftPair;
258 auto &[RightGain, RightNode] = RightPair;
260 if (LeftGain + RightGain <= 0.f)
263 if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket,
Signatures, RNG))
264 ++NumMovedDataVertices;
265 if (moveFunctionNode(*RightNode, LeftBucket, RightBucket,
Signatures, RNG))
266 ++NumMovedDataVertices;
268 return NumMovedDataVertices;
273 unsigned RightBucket,
275 std::mt19937 &RNG)
const {
277 if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
278 Config.SkipProbability)
281 bool FromLeftToRight = (
N.Bucket == LeftBucket);
283 N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
286 if (FromLeftToRight) {
287 for (
auto &UN :
N.UtilityNodes) {
289 Signature.LeftCount--;
290 Signature.RightCount++;
291 Signature.CachedGainIsValid =
false;
294 for (
auto &UN :
N.UtilityNodes) {
296 Signature.LeftCount++;
297 Signature.RightCount--;
298 Signature.CachedGainIsValid =
false;
304void BalancedPartitioning::split(
const FunctionNodeRange Nodes,
305 unsigned StartBucket)
const {
306 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
307 auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
310 return L.InputOrderIndex <
R.InputOrderIndex;
314 N.Bucket = StartBucket;
316 N.Bucket = StartBucket + 1;
320 bool FromLeftToRight,
323 for (
auto &UN :
N.UtilityNodes)
324 Gain += (FromLeftToRight ?
Signatures[UN].CachedGainLR
329float BalancedPartitioning::logCost(
unsigned X,
unsigned Y)
const {
330 return -(
X * log2Cached(
X + 1) +
Y * log2Cached(
Y + 1));
333float BalancedPartitioning::log2Cached(
unsigned i)
const {
334 return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static const FuncProtoTy Signatures[]
A function with a set of utility nodes where it is beneficial to order two functions close together i...
IDT Id
The ID of this node.
SmallVector< UtilityNodeT, 4 > UtilityNodes
The list of utility nodes associated with this node.
std::optional< unsigned > Bucket
The bucket assigned by balanced partitioning.
LLVM_ABI void dump(raw_ostream &OS) const
static LLVM_ABI float moveGain(const BPFunctionNode &N, bool FromLeftToRight, const SignaturesT &Signatures)
Compute the move gain for uniform log-gap cost.
LLVM_ABI void run(std::vector< BPFunctionNode > &Nodes) const
Run recursive graph partitioning that optimizes a given objective.
LLVM_ABI BalancedPartitioning(const BalancedPartitioningConfig &Config)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
auto async(Function &&F, Args &&...ArgList)
Asynchronous submission of a task to the pool.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
SingleThreadExecutor DefaultThreadPool
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Algorithm parameters; default values are tuned on real-world binaries.
unsigned SplitDepth
The depth of the recursive bisection.