LLVM  10.0.0svn
SplitModule.cpp
Go to the documentation of this file.
1 //===- SplitModule.cpp - Split a module into partitions -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the function llvm::SplitModule, which splits a module
10 // into multiple linkable partitions. It can be used to implement parallel code
11 // generation for link-time optimization.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/IR/Comdat.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalAlias.h"
26 #include "llvm/IR/GlobalObject.h"
28 #include "llvm/IR/GlobalValue.h"
29 #include "llvm/IR/GlobalVariable.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/User.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/MD5.h"
41 #include <algorithm>
42 #include <cassert>
43 #include <iterator>
44 #include <memory>
45 #include <queue>
46 #include <utility>
47 #include <vector>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "split-module"
52 
53 namespace {
54 
55 using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
56 using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
57 using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
58 
59 } // end anonymous namespace
60 
61 static void addNonConstUser(ClusterMapType &GVtoClusterMap,
62  const GlobalValue *GV, const User *U) {
63  assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
64 
65  if (const Instruction *I = dyn_cast<Instruction>(U)) {
66  const GlobalValue *F = I->getParent()->getParent();
67  GVtoClusterMap.unionSets(GV, F);
68  } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
69  isa<GlobalVariable>(U)) {
70  GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
71  } else {
72  llvm_unreachable("Underimplemented use case");
73  }
74 }
75 
76 // Adds all GlobalValue users of V to the same cluster as GV.
77 static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
78  const GlobalValue *GV, const Value *V) {
79  for (auto *U : V->users()) {
81  Worklist.push_back(U);
82  while (!Worklist.empty()) {
83  const User *UU = Worklist.pop_back_val();
84  // For each constant that is not a GV (a pure const) recurse.
85  if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
86  Worklist.append(UU->user_begin(), UU->user_end());
87  continue;
88  }
89  addNonConstUser(GVtoClusterMap, GV, UU);
90  }
91  }
92 }
93 
94 // Find partitions for module in the way that no locals need to be
95 // globalized.
96 // Try to balance pack those partitions into N files since this roughly equals
97 // thread balancing for the backend codegen step.
98 static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
99  unsigned N) {
100  // At this point module should have the proper mix of globals and locals.
101  // As we attempt to partition this module, we must not change any
102  // locals to globals.
103  LLVM_DEBUG(dbgs() << "Partition module with (" << M->size()
104  << ")functions\n");
105  ClusterMapType GVtoClusterMap;
106  ComdatMembersType ComdatMembers;
107 
108  auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
109  if (GV.isDeclaration())
110  return;
111 
112  if (!GV.hasName())
113  GV.setName("__llvmsplit_unnamed");
114 
115  // Comdat groups must not be partitioned. For comdat groups that contain
116  // locals, record all their members here so we can keep them together.
117  // Comdat groups that only contain external globals are already handled by
118  // the MD5-based partitioning.
119  if (const Comdat *C = GV.getComdat()) {
120  auto &Member = ComdatMembers[C];
121  if (Member)
122  GVtoClusterMap.unionSets(Member, &GV);
123  else
124  Member = &GV;
125  }
126 
127  // For aliases we should not separate them from their aliasees regardless
128  // of linkage.
129  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
130  if (const GlobalObject *Base = GIS->getBaseObject())
131  GVtoClusterMap.unionSets(&GV, Base);
132  }
133 
134  if (const Function *F = dyn_cast<Function>(&GV)) {
135  for (const BasicBlock &BB : *F) {
137  if (!BA || !BA->isConstantUsed())
138  continue;
139  addAllGlobalValueUsers(GVtoClusterMap, F, BA);
140  }
141  }
142 
143  if (GV.hasLocalLinkage())
144  addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
145  };
146 
147  llvm::for_each(M->functions(), recordGVSet);
148  llvm::for_each(M->globals(), recordGVSet);
149  llvm::for_each(M->aliases(), recordGVSet);
150 
151  // Assigned all GVs to merged clusters while balancing number of objects in
152  // each.
153  auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
154  const std::pair<unsigned, unsigned> &b) {
155  if (a.second || b.second)
156  return a.second > b.second;
157  else
158  return a.first > b.first;
159  };
160 
161  std::priority_queue<std::pair<unsigned, unsigned>,
162  std::vector<std::pair<unsigned, unsigned>>,
163  decltype(CompareClusters)>
164  BalancinQueue(CompareClusters);
165  // Pre-populate priority queue with N slot blanks.
166  for (unsigned i = 0; i < N; ++i)
167  BalancinQueue.push(std::make_pair(i, 0));
168 
169  using SortType = std::pair<unsigned, ClusterMapType::iterator>;
170 
173 
174  // To guarantee determinism, we have to sort SCC according to size.
175  // When size is the same, use leader's name.
176  for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
177  E = GVtoClusterMap.end(); I != E; ++I)
178  if (I->isLeader())
179  Sets.push_back(
180  std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
181  GVtoClusterMap.member_end()), I));
182 
183  llvm::sort(Sets, [](const SortType &a, const SortType &b) {
184  if (a.first == b.first)
185  return a.second->getData()->getName() > b.second->getData()->getName();
186  else
187  return a.first > b.first;
188  });
189 
190  for (auto &I : Sets) {
191  unsigned CurrentClusterID = BalancinQueue.top().first;
192  unsigned CurrentClusterSize = BalancinQueue.top().second;
193  BalancinQueue.pop();
194 
195  LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size("
196  << I.first << ") ----> " << I.second->getData()->getName()
197  << "\n");
198 
199  for (ClusterMapType::member_iterator MI =
200  GVtoClusterMap.findLeader(I.second);
201  MI != GVtoClusterMap.member_end(); ++MI) {
202  if (!Visited.insert(*MI).second)
203  continue;
204  LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName()
205  << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
206  Visited.insert(*MI);
207  ClusterIDMap[*MI] = CurrentClusterID;
208  CurrentClusterSize++;
209  }
210  // Add this set size to the number of entries in this cluster.
211  BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
212  }
213 }
214 
215 static void externalize(GlobalValue *GV) {
216  if (GV->hasLocalLinkage()) {
219  }
220 
221  // Unnamed entities must be named consistently between modules. setName will
222  // give a distinct name to each such entity.
223  if (!GV->hasName())
224  GV->setName("__llvmsplit_unnamed");
225 }
226 
227 // Returns whether GV should be in partition (0-based) I of N.
228 static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
229  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
230  if (const GlobalObject *Base = GIS->getBaseObject())
231  GV = Base;
232 
233  StringRef Name;
234  if (const Comdat *C = GV->getComdat())
235  Name = C->getName();
236  else
237  Name = GV->getName();
238 
239  // Partition by MD5 hash. We only need a few bits for evenness as the number
240  // of partitions will generally be in the 1-2 figure range; the low 16 bits
241  // are enough.
242  MD5 H;
243  MD5::MD5Result R;
244  H.update(Name);
245  H.final(R);
246  return (R[0] | (R[1] << 8)) % N == I;
247 }
248 
250  std::unique_ptr<Module> M, unsigned N,
251  function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
252  bool PreserveLocals) {
253  if (!PreserveLocals) {
254  for (Function &F : *M)
255  externalize(&F);
256  for (GlobalVariable &GV : M->globals())
257  externalize(&GV);
258  for (GlobalAlias &GA : M->aliases())
259  externalize(&GA);
260  for (GlobalIFunc &GIF : M->ifuncs())
261  externalize(&GIF);
262  }
263 
264  // This performs splitting without a need for externalization, which might not
265  // always be possible.
266  ClusterIDMapType ClusterIDMap;
267  findPartitions(M.get(), ClusterIDMap, N);
268 
269  // FIXME: We should be able to reuse M as the last partition instead of
270  // cloning it.
271  for (unsigned I = 0; I < N; ++I) {
272  ValueToValueMapTy VMap;
273  std::unique_ptr<Module> MPart(
274  CloneModule(*M, VMap, [&](const GlobalValue *GV) {
275  if (ClusterIDMap.count(GV))
276  return (ClusterIDMap[GV] == I);
277  else
278  return isInPartition(GV, I, N);
279  }));
280  if (I != 0)
281  MPart->setModuleInlineAsm("");
282  ModuleCallback(std::move(MPart));
283  }
284 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:242
uint64_t CallInst * C
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
bool isConstantUsed() const
Return true if the constant has users other than constant expressions and other dangling things...
Definition: Constants.cpp:501
void SplitModule(std::unique_ptr< Module > M, unsigned N, function_ref< void(std::unique_ptr< Module > MPart)> ModuleCallback, bool PreserveLocals=false)
Splits the module M into N linkable partitions.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM...
Externally visible function.
Definition: GlobalValue.h:48
static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const Value *V)
Definition: SplitModule.cpp:77
F(f)
static void externalize(GlobalValue *GV)
The address of a basic block.
Definition: Constants.h:839
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition: MD5.cpp:189
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, unsigned N)
Definition: SplitModule.cpp:98
std::unique_ptr< Module > CloneModule(const Module &M)
Return an exact copy of the specified module.
Definition: CloneModule.cpp:34
iterator_range< iterator > functions()
Definition: Module.h:609
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void addNonConstUser(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const User *U)
Definition: SplitModule.cpp:61
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
static BlockAddress * lookup(const BasicBlock *BB)
Lookup an existing BlockAddress constant for the given BasicBlock.
Definition: Constants.cpp:1483
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Comdat * getComdat() const
Definition: Globals.cpp:175
iterator_range< user_iterator > users()
Definition: Value.h:419
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
size_t size() const
Definition: Module.h:606
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Definition: MD5.h:41
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:395
LLVM Value Representation.
Definition: Value.h:73
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition: MD5.cpp:234
iterator_range< global_iterator > globals()
Definition: Module.h:587
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1158
#define LLVM_DEBUG(X)
Definition: Debug.h:122
iterator_range< alias_iterator > aliases()
Definition: Module.h:627
user_iterator user_end()
Definition: Value.h:403