LLVM  16.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1 //===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges globals with internal linkage into one. This way all the
10 // globals which were merged into a biggest one can be addressed using offsets
11 // from the same base pointer (no need for separate base pointer for each of the
12 // global). Such a transformation can significantly reduce the register pressure
13 // when many globals are involved.
14 //
15 // For example, consider the code which touches several global variables at
16 // once:
17 //
18 // static int foo[N], bar[N], baz[N];
19 //
20 // for (i = 0; i < N; ++i) {
21 // foo[i] = bar[i] * baz[i];
22 // }
23 //
24 // On ARM the addresses of 3 arrays should be kept in the registers, thus
25 // this code has quite large register pressure (loop body):
26 //
27 // ldr r1, [r5], #4
28 // ldr r2, [r6], #4
29 // mul r1, r2, r1
30 // str r1, [r0], #4
31 //
32 // Pass converts the code to something like:
33 //
34 // static struct {
35 // int foo[N];
36 // int bar[N];
37 // int baz[N];
38 // } merged;
39 //
40 // for (i = 0; i < N; ++i) {
41 // merged.foo[i] = merged.bar[i] * merged.baz[i];
42 // }
43 //
44 // and in ARM code this becomes:
45 //
46 // ldr r0, [r5, #40]
47 // ldr r1, [r5, #80]
48 // mul r0, r1, r0
49 // str r0, [r5], #4
50 //
51 // note that we saved 2 registers here almostly "for free".
52 //
53 // However, merging globals can have tradeoffs:
54 // - it confuses debuggers, tools, and users
55 // - it makes linker optimizations less useful (order files, LOHs, ...)
56 // - it forces usage of indexed addressing (which isn't necessarily "free")
57 // - it can increase register pressure when the uses are disparate enough.
58 //
59 // We use heuristics to discover the best global grouping we can (cf cl::opts).
60 //
61 // ===---------------------------------------------------------------------===//
62 
63 #include "llvm/ADT/BitVector.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/SmallPtrSet.h"
66 #include "llvm/ADT/SmallVector.h"
67 #include "llvm/ADT/Statistic.h"
68 #include "llvm/ADT/StringRef.h"
69 #include "llvm/ADT/Triple.h"
70 #include "llvm/ADT/Twine.h"
71 #include "llvm/CodeGen/Passes.h"
72 #include "llvm/IR/BasicBlock.h"
73 #include "llvm/IR/Constants.h"
74 #include "llvm/IR/DataLayout.h"
75 #include "llvm/IR/DerivedTypes.h"
76 #include "llvm/IR/Function.h"
77 #include "llvm/IR/GlobalAlias.h"
78 #include "llvm/IR/GlobalValue.h"
79 #include "llvm/IR/GlobalVariable.h"
80 #include "llvm/IR/Instruction.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/Type.h"
83 #include "llvm/IR/Use.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/InitializePasses.h"
86 #include "llvm/MC/SectionKind.h"
87 #include "llvm/Pass.h"
88 #include "llvm/Support/Casting.h"
90 #include "llvm/Support/Debug.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstddef>
97 #include <cstdint>
98 #include <string>
99 #include <vector>
100 
101 using namespace llvm;
102 
103 #define DEBUG_TYPE "global-merge"
104 
105 // FIXME: This is only useful as a last-resort way to disable the pass.
106 static cl::opt<bool>
107 EnableGlobalMerge("enable-global-merge", cl::Hidden,
108  cl::desc("Enable the global merge pass"),
109  cl::init(true));
110 
111 static cl::opt<unsigned>
112 GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
113  cl::desc("Set maximum offset for global merge pass"),
114  cl::init(0));
115 
117  "global-merge-group-by-use", cl::Hidden,
118  cl::desc("Improve global merge pass to look at uses"), cl::init(true));
119 
121  "global-merge-ignore-single-use", cl::Hidden,
122  cl::desc("Improve global merge pass to ignore globals only used alone"),
123  cl::init(true));
124 
125 static cl::opt<bool>
126 EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
127  cl::desc("Enable global merge pass on constants"),
128  cl::init(false));
129 
130 // FIXME: this could be a transitional option, and we probably need to remove
131 // it if only we are sure this optimization could always benefit all targets.
133 EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
134  cl::desc("Enable global merge pass on external linkage"));
135 
136 STATISTIC(NumMerged, "Number of globals merged");
137 
138 namespace {
139 
140  class GlobalMerge : public FunctionPass {
141  const TargetMachine *TM = nullptr;
142 
143  // FIXME: Infer the maximum possible offset depending on the actual users
144  // (these max offsets are different for the users inside Thumb or ARM
145  // functions), see the code that passes in the offset in the ARM backend
146  // for more information.
147  unsigned MaxOffset;
148 
149  /// Whether we should try to optimize for size only.
150  /// Currently, this applies a dead simple heuristic: only consider globals
151  /// used in minsize functions for merging.
152  /// FIXME: This could learn about optsize, and be used in the cost model.
153  bool OnlyOptimizeForSize = false;
154 
155  /// Whether we should merge global variables that have external linkage.
156  bool MergeExternalGlobals = false;
157 
158  bool IsMachO;
159 
160  bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
161  Module &M, bool isConst, unsigned AddrSpace) const;
162 
163  /// Merge everything in \p Globals for which the corresponding bit
164  /// in \p GlobalSet is set.
165  bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
166  const BitVector &GlobalSet, Module &M, bool isConst,
167  unsigned AddrSpace) const;
168 
169  /// Check if the given variable has been identified as must keep
170  /// \pre setMustKeepGlobalVariables must have been called on the Module that
171  /// contains GV
172  bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
173  return MustKeepGlobalVariables.count(GV);
174  }
175 
176  /// Collect every variables marked as "used" or used in a landing pad
177  /// instruction for this Module.
178  void setMustKeepGlobalVariables(Module &M);
179 
180  /// Collect every variables marked as "used"
182 
183  /// Keep track of the GlobalVariable that must not be merged away
184  SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
185 
186  public:
187  static char ID; // Pass identification, replacement for typeid.
188 
189  explicit GlobalMerge()
190  : FunctionPass(ID), MaxOffset(GlobalMergeMaxOffset) {
192  }
193 
194  explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
195  bool OnlyOptimizeForSize, bool MergeExternalGlobals)
196  : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
197  OnlyOptimizeForSize(OnlyOptimizeForSize),
198  MergeExternalGlobals(MergeExternalGlobals) {
200  }
201 
202  bool doInitialization(Module &M) override;
203  bool runOnFunction(Function &F) override;
204  bool doFinalization(Module &M) override;
205 
206  StringRef getPassName() const override { return "Merge internal globals"; }
207 
208  void getAnalysisUsage(AnalysisUsage &AU) const override {
209  AU.setPreservesCFG();
211  }
212  };
213 
214 } // end anonymous namespace
215 
216 char GlobalMerge::ID = 0;
217 
218 INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
219 
220 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
221  Module &M, bool isConst, unsigned AddrSpace) const {
222  auto &DL = M.getDataLayout();
223  // FIXME: Find better heuristics
225  Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
226  // We don't support scalable global variables.
227  return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() <
228  DL.getTypeAllocSize(GV2->getValueType()).getFixedSize();
229  });
230 
231  // If we want to just blindly group all globals together, do so.
232  if (!GlobalMergeGroupByUse) {
233  BitVector AllGlobals(Globals.size());
234  AllGlobals.set();
235  return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
236  }
237 
238  // If we want to be smarter, look at all uses of each global, to try to
239  // discover all sets of globals used together, and how many times each of
240  // these sets occurred.
241  //
242  // Keep this reasonably efficient, by having an append-only list of all sets
243  // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
244  // code (currently, a Function) to the set of globals seen so far that are
245  // used together in that unit (GlobalUsesByFunction).
246  //
247  // When we look at the Nth global, we know that any new set is either:
248  // - the singleton set {N}, containing this global only, or
249  // - the union of {N} and a previously-discovered set, containing some
250  // combination of the previous N-1 globals.
251  // Using that knowledge, when looking at the Nth global, we can keep:
252  // - a reference to the singleton set {N} (CurGVOnlySetIdx)
253  // - a list mapping each previous set to its union with {N} (EncounteredUGS),
254  // if it actually occurs.
255 
256  // We keep track of the sets of globals used together "close enough".
257  struct UsedGlobalSet {
258  BitVector Globals;
259  unsigned UsageCount = 1;
260 
261  UsedGlobalSet(size_t Size) : Globals(Size) {}
262  };
263 
264  // Each set is unique in UsedGlobalSets.
265  std::vector<UsedGlobalSet> UsedGlobalSets;
266 
267  // Avoid repeating the create-global-set pattern.
268  auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
269  UsedGlobalSets.emplace_back(Globals.size());
270  return UsedGlobalSets.back();
271  };
272 
273  // The first set is the empty set.
274  CreateGlobalSet().UsageCount = 0;
275 
276  // We define "close enough" to be "in the same function".
277  // FIXME: Grouping uses by function is way too aggressive, so we should have
278  // a better metric for distance between uses.
279  // The obvious alternative would be to group by BasicBlock, but that's in
280  // turn too conservative..
281  // Anything in between wouldn't be trivial to compute, so just stick with
282  // per-function grouping.
283 
284  // The value type is an index into UsedGlobalSets.
285  // The default (0) conveniently points to the empty set.
286  DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
287 
288  // Now, look at each merge-eligible global in turn.
289 
290  // Keep track of the sets we already encountered to which we added the
291  // current global.
292  // Each element matches the same-index element in UsedGlobalSets.
293  // This lets us efficiently tell whether a set has already been expanded to
294  // include the current global.
295  std::vector<size_t> EncounteredUGS;
296 
297  for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
298  GlobalVariable *GV = Globals[GI];
299 
300  // Reset the encountered sets for this global...
301  std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
302  // ...and grow it in case we created new sets for the previous global.
303  EncounteredUGS.resize(UsedGlobalSets.size());
304 
305  // We might need to create a set that only consists of the current global.
306  // Keep track of its index into UsedGlobalSets.
307  size_t CurGVOnlySetIdx = 0;
308 
309  // For each global, look at all its Uses.
310  for (auto &U : GV->uses()) {
311  // This Use might be a ConstantExpr. We're interested in Instruction
312  // users, so look through ConstantExpr...
313  Use *UI, *UE;
314  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
315  if (CE->use_empty())
316  continue;
317  UI = &*CE->use_begin();
318  UE = nullptr;
319  } else if (isa<Instruction>(U.getUser())) {
320  UI = &U;
321  UE = UI->getNext();
322  } else {
323  continue;
324  }
325 
326  // ...to iterate on all the instruction users of the global.
327  // Note that we iterate on Uses and not on Users to be able to getNext().
328  for (; UI != UE; UI = UI->getNext()) {
329  Instruction *I = dyn_cast<Instruction>(UI->getUser());
330  if (!I)
331  continue;
332 
333  Function *ParentFn = I->getParent()->getParent();
334 
335  // If we're only optimizing for size, ignore non-minsize functions.
336  if (OnlyOptimizeForSize && !ParentFn->hasMinSize())
337  continue;
338 
339  size_t UGSIdx = GlobalUsesByFunction[ParentFn];
340 
341  // If this is the first global the basic block uses, map it to the set
342  // consisting of this global only.
343  if (!UGSIdx) {
344  // If that set doesn't exist yet, create it.
345  if (!CurGVOnlySetIdx) {
346  CurGVOnlySetIdx = UsedGlobalSets.size();
347  CreateGlobalSet().Globals.set(GI);
348  } else {
349  ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
350  }
351 
352  GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
353  continue;
354  }
355 
356  // If we already encountered this BB, just increment the counter.
357  if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
358  ++UsedGlobalSets[UGSIdx].UsageCount;
359  continue;
360  }
361 
362  // If not, the previous set wasn't actually used in this function.
363  --UsedGlobalSets[UGSIdx].UsageCount;
364 
365  // If we already expanded the previous set to include this global, just
366  // reuse that expanded set.
367  if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
368  ++UsedGlobalSets[ExpandedIdx].UsageCount;
369  GlobalUsesByFunction[ParentFn] = ExpandedIdx;
370  continue;
371  }
372 
373  // If not, create a new set consisting of the union of the previous set
374  // and this global. Mark it as encountered, so we can reuse it later.
375  GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
376  UsedGlobalSets.size();
377 
378  UsedGlobalSet &NewUGS = CreateGlobalSet();
379  NewUGS.Globals.set(GI);
380  NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
381  }
382  }
383  }
384 
385  // Now we found a bunch of sets of globals used together. We accumulated
386  // the number of times we encountered the sets (i.e., the number of blocks
387  // that use that exact set of globals).
388  //
389  // Multiply that by the size of the set to give us a crude profitability
390  // metric.
391  llvm::stable_sort(UsedGlobalSets,
392  [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
393  return UGS1.Globals.count() * UGS1.UsageCount <
394  UGS2.Globals.count() * UGS2.UsageCount;
395  });
396 
397  // We can choose to merge all globals together, but ignore globals never used
398  // with another global. This catches the obviously non-profitable cases of
399  // having a single global, but is aggressive enough for any other case.
401  BitVector AllGlobals(Globals.size());
402  for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
403  if (UGS.UsageCount == 0)
404  continue;
405  if (UGS.Globals.count() > 1)
406  AllGlobals |= UGS.Globals;
407  }
408  return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
409  }
410 
411  // Starting from the sets with the best (=biggest) profitability, find a
412  // good combination.
413  // The ideal (and expensive) solution can only be found by trying all
414  // combinations, looking for the one with the best profitability.
415  // Don't be smart about it, and just pick the first compatible combination,
416  // starting with the sets with the best profitability.
417  BitVector PickedGlobals(Globals.size());
418  bool Changed = false;
419 
420  for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
421  if (UGS.UsageCount == 0)
422  continue;
423  if (PickedGlobals.anyCommon(UGS.Globals))
424  continue;
425  PickedGlobals |= UGS.Globals;
426  // If the set only contains one global, there's no point in merging.
427  // Ignore the global for inclusion in other sets though, so keep it in
428  // PickedGlobals.
429  if (UGS.Globals.count() < 2)
430  continue;
431  Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
432  }
433 
434  return Changed;
435 }
436 
437 bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
438  const BitVector &GlobalSet, Module &M, bool isConst,
439  unsigned AddrSpace) const {
440  assert(Globals.size() > 1);
441 
442  Type *Int32Ty = Type::getInt32Ty(M.getContext());
443  Type *Int8Ty = Type::getInt8Ty(M.getContext());
444  auto &DL = M.getDataLayout();
445 
446  LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
447  << GlobalSet.find_first() << "\n");
448 
449  bool Changed = false;
450  ssize_t i = GlobalSet.find_first();
451  while (i != -1) {
452  ssize_t j = 0;
453  uint64_t MergedSize = 0;
454  std::vector<Type*> Tys;
455  std::vector<Constant*> Inits;
456  std::vector<unsigned> StructIdxs;
457 
458  bool HasExternal = false;
459  StringRef FirstExternalName;
460  Align MaxAlign;
461  unsigned CurIdx = 0;
462  for (j = i; j != -1; j = GlobalSet.find_next(j)) {
463  Type *Ty = Globals[j]->getValueType();
464 
465  // Make sure we use the same alignment AsmPrinter would use.
466  Align Alignment = DL.getPreferredAlign(Globals[j]);
467  unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
468  MergedSize += Padding;
469  MergedSize += DL.getTypeAllocSize(Ty);
470  if (MergedSize > MaxOffset) {
471  break;
472  }
473  if (Padding) {
474  Tys.push_back(ArrayType::get(Int8Ty, Padding));
475  Inits.push_back(ConstantAggregateZero::get(Tys.back()));
476  ++CurIdx;
477  }
478  Tys.push_back(Ty);
479  Inits.push_back(Globals[j]->getInitializer());
480  StructIdxs.push_back(CurIdx++);
481 
482  MaxAlign = std::max(MaxAlign, Alignment);
483 
484  if (Globals[j]->hasExternalLinkage() && !HasExternal) {
485  HasExternal = true;
486  FirstExternalName = Globals[j]->getName();
487  }
488  }
489 
490  // Exit early if there is only one global to merge.
491  if (Tys.size() < 2) {
492  i = j;
493  continue;
494  }
495 
496  // If merged variables doesn't have external linkage, we needn't to expose
497  // the symbol after merging.
498  GlobalValue::LinkageTypes Linkage = HasExternal
501  // Use a packed struct so we can control alignment.
502  StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
503  Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
504 
505  // On Darwin external linkage needs to be preserved, otherwise
506  // dsymutil cannot preserve the debug info for the merged
507  // variables. If they have external linkage, use the symbol name
508  // of the first variable merged as the suffix of global symbol
509  // name. This avoids a link-time naming conflict for the
510  // _MergedGlobals symbols.
511  Twine MergedName =
512  (IsMachO && HasExternal)
513  ? "_MergedGlobals_" + FirstExternalName
514  : "_MergedGlobals";
515  auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
516  auto *MergedGV = new GlobalVariable(
517  M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
518  GlobalVariable::NotThreadLocal, AddrSpace);
519 
520  MergedGV->setAlignment(MaxAlign);
521  MergedGV->setSection(Globals[i]->getSection());
522 
523  const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
524  for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
525  GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
526  std::string Name(Globals[k]->getName());
527  GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
529  Globals[k]->getDLLStorageClass();
530 
531  // Copy metadata while adjusting any debug info metadata by the original
532  // global's offset within the merged global.
533  MergedGV->copyMetadata(Globals[k],
534  MergedLayout->getElementOffset(StructIdxs[idx]));
535 
536  Constant *Idx[2] = {
538  ConstantInt::get(Int32Ty, StructIdxs[idx]),
539  };
540  Constant *GEP =
541  ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
542  Globals[k]->replaceAllUsesWith(GEP);
543  Globals[k]->eraseFromParent();
544 
545  // When the linkage is not internal we must emit an alias for the original
546  // variable name as it may be accessed from another object. On non-Mach-O
547  // we can also emit an alias for internal linkage as it's safe to do so.
548  // It's not safe on Mach-O as the alias (and thus the portion of the
549  // MergedGlobals variable) may be dead stripped at link time.
550  if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
551  GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
552  Linkage, Name, GEP, &M);
553  GA->setVisibility(Visibility);
554  GA->setDLLStorageClass(DLLStorage);
555  }
556 
557  NumMerged++;
558  }
559  Changed = true;
560  i = j;
561  }
562 
563  return Changed;
564 }
565 
567  // Extract global variables from llvm.used array
568  const GlobalVariable *GV = M.getGlobalVariable(Name);
569  if (!GV || !GV->hasInitializer()) return;
570 
571  // Should be an array of 'i8*'.
572  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
573 
574  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
575  if (const GlobalVariable *G =
576  dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
577  MustKeepGlobalVariables.insert(G);
578 }
579 
580 void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
581  collectUsedGlobalVariables(M, "llvm.used");
582  collectUsedGlobalVariables(M, "llvm.compiler.used");
583 
584  for (Function &F : M) {
585  for (BasicBlock &BB : F) {
586  Instruction *Pad = BB.getFirstNonPHI();
587  if (!Pad->isEHPad())
588  continue;
589 
590  // Keep globals used by landingpads and catchpads.
591  for (const Use &U : Pad->operands()) {
592  if (const GlobalVariable *GV =
593  dyn_cast<GlobalVariable>(U->stripPointerCasts()))
594  MustKeepGlobalVariables.insert(GV);
595  else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
596  for (const Use &Elt : CA->operands()) {
597  if (const GlobalVariable *GV =
598  dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
599  MustKeepGlobalVariables.insert(GV);
600  }
601  }
602  }
603  }
604  }
605 }
606 
607 bool GlobalMerge::doInitialization(Module &M) {
608  if (!EnableGlobalMerge)
609  return false;
610 
611  IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
612 
613  auto &DL = M.getDataLayout();
615  Globals, ConstGlobals, BSSGlobals;
616  bool Changed = false;
617  setMustKeepGlobalVariables(M);
618 
619  LLVM_DEBUG({
620  dbgs() << "Number of GV that must be kept: " <<
621  MustKeepGlobalVariables.size() << "\n";
622  for (auto KeptGV = MustKeepGlobalVariables.begin();
623  KeptGV != MustKeepGlobalVariables.end(); KeptGV++)
624  dbgs() << "Kept: " << **KeptGV << "\n";
625  });
626  // Grab all non-const globals.
627  for (auto &GV : M.globals()) {
628  // Merge is safe for "normal" internal or external globals only
629  if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
630  continue;
631 
632  // It's not safe to merge globals that may be preempted
633  if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
634  continue;
635 
636  if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
637  !GV.hasInternalLinkage())
638  continue;
639 
640  PointerType *PT = dyn_cast<PointerType>(GV.getType());
641  assert(PT && "Global variable is not a pointer!");
642 
643  unsigned AddressSpace = PT->getAddressSpace();
645 
646  // Ignore all 'special' globals.
647  if (GV.getName().startswith("llvm.") ||
648  GV.getName().startswith(".llvm."))
649  continue;
650 
651  // Ignore all "required" globals:
652  if (isMustKeepGlobalVariable(&GV))
653  continue;
654 
655  Type *Ty = GV.getValueType();
656  if (DL.getTypeAllocSize(Ty) < MaxOffset) {
657  if (TM &&
659  BSSGlobals[{AddressSpace, Section}].push_back(&GV);
660  else if (GV.isConstant())
661  ConstGlobals[{AddressSpace, Section}].push_back(&GV);
662  else
663  Globals[{AddressSpace, Section}].push_back(&GV);
664  }
665  }
666 
667  for (auto &P : Globals)
668  if (P.second.size() > 1)
669  Changed |= doMerge(P.second, M, false, P.first.first);
670 
671  for (auto &P : BSSGlobals)
672  if (P.second.size() > 1)
673  Changed |= doMerge(P.second, M, false, P.first.first);
674 
676  for (auto &P : ConstGlobals)
677  if (P.second.size() > 1)
678  Changed |= doMerge(P.second, M, true, P.first.first);
679 
680  return Changed;
681 }
682 
684  return false;
685 }
686 
687 bool GlobalMerge::doFinalization(Module &M) {
688  MustKeepGlobalVariables.clear();
689  return false;
690 }
691 
693  bool OnlyOptimizeForSize,
694  bool MergeExternalByDefault) {
695  bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
696  MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
697  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
698 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::initializeGlobalMergePass
void initializeGlobalMergePass(PassRegistry &)
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:492
llvm::Function
Definition: Function.h:60
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1306
Pass.h
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:111
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:184
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
Statistic.h
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
EnableGlobalMergeOnConst
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::object::getSection
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:404
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:381
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::ConstantExpr::getInBoundsGetElementPtr
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1253
llvm::GlobalValue::LinkageTypes
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:47
GlobalMergeGroupByUse
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
Use.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:798
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:82
Instruction.h
CommandLine.h
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:875
GlobalValue.h
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:264
llvm::GlobalValue::VisibilityTypes
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:62
llvm::StringRef::startswith
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:248
Constants.h
Twine.h
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:410
llvm::Triple::isOSBinFormatMachO
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:669
llvm::Instruction
Definition: Instruction.h:42
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
BitVector.h
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
SmallPtrSet.h
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:507
llvm::BitVector
Definition: BitVector.h:75
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetLoweringObjectFile::getKindForGlobal
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Definition: TargetLoweringObjectFile.cpp:200
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
Passes.h
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
BasicBlock.h
llvm::cl::opt< bool >
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::cl::BOU_UNSET
@ BOU_UNSET
Definition: CommandLine.h:618
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
GlobalMergeIgnoreSingleUse
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:408
llvm::GlobalVariable::hasImplicitSection
bool hasImplicitSection() const
Check if section name is present.
Definition: GlobalVariable.h:242
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
EnableGlobalMerge
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
EnableGlobalMergeOnExternal
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:249
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Triple.h
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetLoweringObjectFile.h
llvm::GlobalValue::setDLLStorageClass
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:270
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
GlobalMergeMaxOffset
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1762
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::GlobalAlias::create
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:509
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
GlobalVariable.h
Casting.h
Function.h
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:642
llvm::BitVector::find_next
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:301
SectionKind.h
GlobalAlias.h
llvm::createGlobalMergePass
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
Definition: GlobalMerge.cpp:692
llvm::pdb::PDB_ColorItem::Padding
@ Padding
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::GlobalValue::DLLStorageClassTypes
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:69
llvm::cl::BOU_TRUE
@ BOU_TRUE
Definition: CommandLine.h:618
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition: GlobalVariable.h:152
llvm::BitVector::find_first
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:293
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:661
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:276
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:97
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:278
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1589
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GlobalMerge.cpp:103
llvm::BitVector::back
bool back() const
Return the last element in the vector.
Definition: BitVector.h:449
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:240
InitializePasses.h
Debug.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38