LLVM  16.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1 //===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges globals with internal linkage into one. This way all the
10 // globals which were merged into a biggest one can be addressed using offsets
11 // from the same base pointer (no need for separate base pointer for each of the
12 // global). Such a transformation can significantly reduce the register pressure
13 // when many globals are involved.
14 //
15 // For example, consider the code which touches several global variables at
16 // once:
17 //
18 // static int foo[N], bar[N], baz[N];
19 //
20 // for (i = 0; i < N; ++i) {
21 // foo[i] = bar[i] * baz[i];
22 // }
23 //
24 // On ARM the addresses of 3 arrays should be kept in the registers, thus
25 // this code has quite large register pressure (loop body):
26 //
27 // ldr r1, [r5], #4
28 // ldr r2, [r6], #4
29 // mul r1, r2, r1
30 // str r1, [r0], #4
31 //
32 // Pass converts the code to something like:
33 //
34 // static struct {
35 // int foo[N];
36 // int bar[N];
37 // int baz[N];
38 // } merged;
39 //
40 // for (i = 0; i < N; ++i) {
41 // merged.foo[i] = merged.bar[i] * merged.baz[i];
42 // }
43 //
44 // and in ARM code this becomes:
45 //
46 // ldr r0, [r5, #40]
47 // ldr r1, [r5, #80]
48 // mul r0, r1, r0
49 // str r0, [r5], #4
50 //
51 // note that we saved 2 registers here almostly "for free".
52 //
53 // However, merging globals can have tradeoffs:
54 // - it confuses debuggers, tools, and users
55 // - it makes linker optimizations less useful (order files, LOHs, ...)
56 // - it forces usage of indexed addressing (which isn't necessarily "free")
57 // - it can increase register pressure when the uses are disparate enough.
58 //
59 // We use heuristics to discover the best global grouping we can (cf cl::opts).
60 //
61 // ===---------------------------------------------------------------------===//
62 
63 #include "llvm/ADT/BitVector.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/SetVector.h"
66 #include "llvm/ADT/SmallPtrSet.h"
67 #include "llvm/ADT/SmallVector.h"
68 #include "llvm/ADT/Statistic.h"
69 #include "llvm/ADT/StringRef.h"
70 #include "llvm/ADT/Triple.h"
71 #include "llvm/ADT/Twine.h"
72 #include "llvm/CodeGen/Passes.h"
73 #include "llvm/IR/BasicBlock.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DataLayout.h"
76 #include "llvm/IR/DerivedTypes.h"
77 #include "llvm/IR/Function.h"
78 #include "llvm/IR/GlobalAlias.h"
79 #include "llvm/IR/GlobalValue.h"
80 #include "llvm/IR/GlobalVariable.h"
81 #include "llvm/IR/Instruction.h"
82 #include "llvm/IR/Module.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/Use.h"
85 #include "llvm/IR/User.h"
86 #include "llvm/InitializePasses.h"
87 #include "llvm/MC/SectionKind.h"
88 #include "llvm/Pass.h"
89 #include "llvm/Support/Casting.h"
91 #include "llvm/Support/Debug.h"
95 #include <algorithm>
96 #include <cassert>
97 #include <cstddef>
98 #include <cstdint>
99 #include <string>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "global-merge"
105 
106 // FIXME: This is only useful as a last-resort way to disable the pass.
107 static cl::opt<bool>
108 EnableGlobalMerge("enable-global-merge", cl::Hidden,
109  cl::desc("Enable the global merge pass"),
110  cl::init(true));
111 
112 static cl::opt<unsigned>
113 GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
114  cl::desc("Set maximum offset for global merge pass"),
115  cl::init(0));
116 
118  "global-merge-group-by-use", cl::Hidden,
119  cl::desc("Improve global merge pass to look at uses"), cl::init(true));
120 
122  "global-merge-ignore-single-use", cl::Hidden,
123  cl::desc("Improve global merge pass to ignore globals only used alone"),
124  cl::init(true));
125 
126 static cl::opt<bool>
127 EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
128  cl::desc("Enable global merge pass on constants"),
129  cl::init(false));
130 
131 // FIXME: this could be a transitional option, and we probably need to remove
132 // it if only we are sure this optimization could always benefit all targets.
134 EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
135  cl::desc("Enable global merge pass on external linkage"));
136 
137 STATISTIC(NumMerged, "Number of globals merged");
138 
139 namespace {
140 
141  class GlobalMerge : public FunctionPass {
142  const TargetMachine *TM = nullptr;
143 
144  // FIXME: Infer the maximum possible offset depending on the actual users
145  // (these max offsets are different for the users inside Thumb or ARM
146  // functions), see the code that passes in the offset in the ARM backend
147  // for more information.
148  unsigned MaxOffset;
149 
150  /// Whether we should try to optimize for size only.
151  /// Currently, this applies a dead simple heuristic: only consider globals
152  /// used in minsize functions for merging.
153  /// FIXME: This could learn about optsize, and be used in the cost model.
154  bool OnlyOptimizeForSize = false;
155 
156  /// Whether we should merge global variables that have external linkage.
157  bool MergeExternalGlobals = false;
158 
159  bool IsMachO;
160 
161  bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
162  Module &M, bool isConst, unsigned AddrSpace) const;
163 
164  /// Merge everything in \p Globals for which the corresponding bit
165  /// in \p GlobalSet is set.
166  bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
167  const BitVector &GlobalSet, Module &M, bool isConst,
168  unsigned AddrSpace) const;
169 
170  /// Check if the given variable has been identified as must keep
171  /// \pre setMustKeepGlobalVariables must have been called on the Module that
172  /// contains GV
173  bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
174  return MustKeepGlobalVariables.count(GV);
175  }
176 
177  /// Collect every variables marked as "used" or used in a landing pad
178  /// instruction for this Module.
179  void setMustKeepGlobalVariables(Module &M);
180 
181  /// Collect every variables marked as "used"
183 
184  /// Keep track of the GlobalVariable that must not be merged away
185  SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
186 
187  public:
188  static char ID; // Pass identification, replacement for typeid.
189 
190  explicit GlobalMerge()
191  : FunctionPass(ID), MaxOffset(GlobalMergeMaxOffset) {
193  }
194 
195  explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
196  bool OnlyOptimizeForSize, bool MergeExternalGlobals)
197  : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
198  OnlyOptimizeForSize(OnlyOptimizeForSize),
199  MergeExternalGlobals(MergeExternalGlobals) {
201  }
202 
203  bool doInitialization(Module &M) override;
204  bool runOnFunction(Function &F) override;
205  bool doFinalization(Module &M) override;
206 
207  StringRef getPassName() const override { return "Merge internal globals"; }
208 
209  void getAnalysisUsage(AnalysisUsage &AU) const override {
210  AU.setPreservesCFG();
212  }
213  };
214 
215 } // end anonymous namespace
216 
217 char GlobalMerge::ID = 0;
218 
219 INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
220 
221 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
222  Module &M, bool isConst, unsigned AddrSpace) const {
223  auto &DL = M.getDataLayout();
224  // FIXME: Find better heuristics
226  Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
227  // We don't support scalable global variables.
228  return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() <
229  DL.getTypeAllocSize(GV2->getValueType()).getFixedSize();
230  });
231 
232  // If we want to just blindly group all globals together, do so.
233  if (!GlobalMergeGroupByUse) {
234  BitVector AllGlobals(Globals.size());
235  AllGlobals.set();
236  return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
237  }
238 
239  // If we want to be smarter, look at all uses of each global, to try to
240  // discover all sets of globals used together, and how many times each of
241  // these sets occurred.
242  //
243  // Keep this reasonably efficient, by having an append-only list of all sets
244  // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
245  // code (currently, a Function) to the set of globals seen so far that are
246  // used together in that unit (GlobalUsesByFunction).
247  //
248  // When we look at the Nth global, we know that any new set is either:
249  // - the singleton set {N}, containing this global only, or
250  // - the union of {N} and a previously-discovered set, containing some
251  // combination of the previous N-1 globals.
252  // Using that knowledge, when looking at the Nth global, we can keep:
253  // - a reference to the singleton set {N} (CurGVOnlySetIdx)
254  // - a list mapping each previous set to its union with {N} (EncounteredUGS),
255  // if it actually occurs.
256 
257  // We keep track of the sets of globals used together "close enough".
258  struct UsedGlobalSet {
259  BitVector Globals;
260  unsigned UsageCount = 1;
261 
262  UsedGlobalSet(size_t Size) : Globals(Size) {}
263  };
264 
265  // Each set is unique in UsedGlobalSets.
266  std::vector<UsedGlobalSet> UsedGlobalSets;
267 
268  // Avoid repeating the create-global-set pattern.
269  auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
270  UsedGlobalSets.emplace_back(Globals.size());
271  return UsedGlobalSets.back();
272  };
273 
274  // The first set is the empty set.
275  CreateGlobalSet().UsageCount = 0;
276 
277  // We define "close enough" to be "in the same function".
278  // FIXME: Grouping uses by function is way too aggressive, so we should have
279  // a better metric for distance between uses.
280  // The obvious alternative would be to group by BasicBlock, but that's in
281  // turn too conservative..
282  // Anything in between wouldn't be trivial to compute, so just stick with
283  // per-function grouping.
284 
285  // The value type is an index into UsedGlobalSets.
286  // The default (0) conveniently points to the empty set.
287  DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
288 
289  // Now, look at each merge-eligible global in turn.
290 
291  // Keep track of the sets we already encountered to which we added the
292  // current global.
293  // Each element matches the same-index element in UsedGlobalSets.
294  // This lets us efficiently tell whether a set has already been expanded to
295  // include the current global.
296  std::vector<size_t> EncounteredUGS;
297 
298  for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
299  GlobalVariable *GV = Globals[GI];
300 
301  // Reset the encountered sets for this global...
302  std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
303  // ...and grow it in case we created new sets for the previous global.
304  EncounteredUGS.resize(UsedGlobalSets.size());
305 
306  // We might need to create a set that only consists of the current global.
307  // Keep track of its index into UsedGlobalSets.
308  size_t CurGVOnlySetIdx = 0;
309 
310  // For each global, look at all its Uses.
311  for (auto &U : GV->uses()) {
312  // This Use might be a ConstantExpr. We're interested in Instruction
313  // users, so look through ConstantExpr...
314  Use *UI, *UE;
315  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
316  if (CE->use_empty())
317  continue;
318  UI = &*CE->use_begin();
319  UE = nullptr;
320  } else if (isa<Instruction>(U.getUser())) {
321  UI = &U;
322  UE = UI->getNext();
323  } else {
324  continue;
325  }
326 
327  // ...to iterate on all the instruction users of the global.
328  // Note that we iterate on Uses and not on Users to be able to getNext().
329  for (; UI != UE; UI = UI->getNext()) {
330  Instruction *I = dyn_cast<Instruction>(UI->getUser());
331  if (!I)
332  continue;
333 
334  Function *ParentFn = I->getParent()->getParent();
335 
336  // If we're only optimizing for size, ignore non-minsize functions.
337  if (OnlyOptimizeForSize && !ParentFn->hasMinSize())
338  continue;
339 
340  size_t UGSIdx = GlobalUsesByFunction[ParentFn];
341 
342  // If this is the first global the basic block uses, map it to the set
343  // consisting of this global only.
344  if (!UGSIdx) {
345  // If that set doesn't exist yet, create it.
346  if (!CurGVOnlySetIdx) {
347  CurGVOnlySetIdx = UsedGlobalSets.size();
348  CreateGlobalSet().Globals.set(GI);
349  } else {
350  ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
351  }
352 
353  GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
354  continue;
355  }
356 
357  // If we already encountered this BB, just increment the counter.
358  if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
359  ++UsedGlobalSets[UGSIdx].UsageCount;
360  continue;
361  }
362 
363  // If not, the previous set wasn't actually used in this function.
364  --UsedGlobalSets[UGSIdx].UsageCount;
365 
366  // If we already expanded the previous set to include this global, just
367  // reuse that expanded set.
368  if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
369  ++UsedGlobalSets[ExpandedIdx].UsageCount;
370  GlobalUsesByFunction[ParentFn] = ExpandedIdx;
371  continue;
372  }
373 
374  // If not, create a new set consisting of the union of the previous set
375  // and this global. Mark it as encountered, so we can reuse it later.
376  GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
377  UsedGlobalSets.size();
378 
379  UsedGlobalSet &NewUGS = CreateGlobalSet();
380  NewUGS.Globals.set(GI);
381  NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
382  }
383  }
384  }
385 
386  // Now we found a bunch of sets of globals used together. We accumulated
387  // the number of times we encountered the sets (i.e., the number of blocks
388  // that use that exact set of globals).
389  //
390  // Multiply that by the size of the set to give us a crude profitability
391  // metric.
392  llvm::stable_sort(UsedGlobalSets,
393  [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
394  return UGS1.Globals.count() * UGS1.UsageCount <
395  UGS2.Globals.count() * UGS2.UsageCount;
396  });
397 
398  // We can choose to merge all globals together, but ignore globals never used
399  // with another global. This catches the obviously non-profitable cases of
400  // having a single global, but is aggressive enough for any other case.
402  BitVector AllGlobals(Globals.size());
403  for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
404  if (UGS.UsageCount == 0)
405  continue;
406  if (UGS.Globals.count() > 1)
407  AllGlobals |= UGS.Globals;
408  }
409  return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
410  }
411 
412  // Starting from the sets with the best (=biggest) profitability, find a
413  // good combination.
414  // The ideal (and expensive) solution can only be found by trying all
415  // combinations, looking for the one with the best profitability.
416  // Don't be smart about it, and just pick the first compatible combination,
417  // starting with the sets with the best profitability.
418  BitVector PickedGlobals(Globals.size());
419  bool Changed = false;
420 
421  for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
422  if (UGS.UsageCount == 0)
423  continue;
424  if (PickedGlobals.anyCommon(UGS.Globals))
425  continue;
426  PickedGlobals |= UGS.Globals;
427  // If the set only contains one global, there's no point in merging.
428  // Ignore the global for inclusion in other sets though, so keep it in
429  // PickedGlobals.
430  if (UGS.Globals.count() < 2)
431  continue;
432  Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
433  }
434 
435  return Changed;
436 }
437 
438 bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
439  const BitVector &GlobalSet, Module &M, bool isConst,
440  unsigned AddrSpace) const {
441  assert(Globals.size() > 1);
442 
443  Type *Int32Ty = Type::getInt32Ty(M.getContext());
444  Type *Int8Ty = Type::getInt8Ty(M.getContext());
445  auto &DL = M.getDataLayout();
446 
447  LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
448  << GlobalSet.find_first() << "\n");
449 
450  bool Changed = false;
451  ssize_t i = GlobalSet.find_first();
452  while (i != -1) {
453  ssize_t j = 0;
454  uint64_t MergedSize = 0;
455  std::vector<Type*> Tys;
456  std::vector<Constant*> Inits;
457  std::vector<unsigned> StructIdxs;
458 
459  bool HasExternal = false;
460  StringRef FirstExternalName;
461  Align MaxAlign;
462  unsigned CurIdx = 0;
463  for (j = i; j != -1; j = GlobalSet.find_next(j)) {
464  Type *Ty = Globals[j]->getValueType();
465 
466  // Make sure we use the same alignment AsmPrinter would use.
467  Align Alignment = DL.getPreferredAlign(Globals[j]);
468  unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
469  MergedSize += Padding;
470  MergedSize += DL.getTypeAllocSize(Ty);
471  if (MergedSize > MaxOffset) {
472  break;
473  }
474  if (Padding) {
475  Tys.push_back(ArrayType::get(Int8Ty, Padding));
476  Inits.push_back(ConstantAggregateZero::get(Tys.back()));
477  ++CurIdx;
478  }
479  Tys.push_back(Ty);
480  Inits.push_back(Globals[j]->getInitializer());
481  StructIdxs.push_back(CurIdx++);
482 
483  MaxAlign = std::max(MaxAlign, Alignment);
484 
485  if (Globals[j]->hasExternalLinkage() && !HasExternal) {
486  HasExternal = true;
487  FirstExternalName = Globals[j]->getName();
488  }
489  }
490 
491  // Exit early if there is only one global to merge.
492  if (Tys.size() < 2) {
493  i = j;
494  continue;
495  }
496 
497  // If merged variables doesn't have external linkage, we needn't to expose
498  // the symbol after merging.
499  GlobalValue::LinkageTypes Linkage = HasExternal
502  // Use a packed struct so we can control alignment.
503  StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
504  Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
505 
506  // On Darwin external linkage needs to be preserved, otherwise
507  // dsymutil cannot preserve the debug info for the merged
508  // variables. If they have external linkage, use the symbol name
509  // of the first variable merged as the suffix of global symbol
510  // name. This avoids a link-time naming conflict for the
511  // _MergedGlobals symbols.
512  Twine MergedName =
513  (IsMachO && HasExternal)
514  ? "_MergedGlobals_" + FirstExternalName
515  : "_MergedGlobals";
516  auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
517  auto *MergedGV = new GlobalVariable(
518  M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
519  GlobalVariable::NotThreadLocal, AddrSpace);
520 
521  MergedGV->setAlignment(MaxAlign);
522  MergedGV->setSection(Globals[i]->getSection());
523 
524  const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
525  for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
526  GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
527  std::string Name(Globals[k]->getName());
528  GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
530  Globals[k]->getDLLStorageClass();
531 
532  // Copy metadata while adjusting any debug info metadata by the original
533  // global's offset within the merged global.
534  MergedGV->copyMetadata(Globals[k],
535  MergedLayout->getElementOffset(StructIdxs[idx]));
536 
537  Constant *Idx[2] = {
539  ConstantInt::get(Int32Ty, StructIdxs[idx]),
540  };
541  Constant *GEP =
542  ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
543  Globals[k]->replaceAllUsesWith(GEP);
544  Globals[k]->eraseFromParent();
545 
546  // When the linkage is not internal we must emit an alias for the original
547  // variable name as it may be accessed from another object. On non-Mach-O
548  // we can also emit an alias for internal linkage as it's safe to do so.
549  // It's not safe on Mach-O as the alias (and thus the portion of the
550  // MergedGlobals variable) may be dead stripped at link time.
551  if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
552  GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
553  Linkage, Name, GEP, &M);
554  GA->setVisibility(Visibility);
555  GA->setDLLStorageClass(DLLStorage);
556  }
557 
558  NumMerged++;
559  }
560  Changed = true;
561  i = j;
562  }
563 
564  return Changed;
565 }
566 
568  // Extract global variables from llvm.used array
569  const GlobalVariable *GV = M.getGlobalVariable(Name);
570  if (!GV || !GV->hasInitializer()) return;
571 
572  // Should be an array of 'i8*'.
573  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
574 
575  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
576  if (const GlobalVariable *G =
577  dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
578  MustKeepGlobalVariables.insert(G);
579 }
580 
581 void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
582  collectUsedGlobalVariables(M, "llvm.used");
583  collectUsedGlobalVariables(M, "llvm.compiler.used");
584 
585  for (Function &F : M) {
586  for (BasicBlock &BB : F) {
587  Instruction *Pad = BB.getFirstNonPHI();
588  if (!Pad->isEHPad())
589  continue;
590 
591  // Keep globals used by landingpads and catchpads.
592  for (const Use &U : Pad->operands()) {
593  if (const GlobalVariable *GV =
594  dyn_cast<GlobalVariable>(U->stripPointerCasts()))
595  MustKeepGlobalVariables.insert(GV);
596  else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
597  for (const Use &Elt : CA->operands()) {
598  if (const GlobalVariable *GV =
599  dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
600  MustKeepGlobalVariables.insert(GV);
601  }
602  }
603  }
604  }
605  }
606 }
607 
608 bool GlobalMerge::doInitialization(Module &M) {
609  if (!EnableGlobalMerge)
610  return false;
611 
612  IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
613 
614  auto &DL = M.getDataLayout();
616  Globals, ConstGlobals, BSSGlobals;
617  bool Changed = false;
618  setMustKeepGlobalVariables(M);
619 
620  LLVM_DEBUG({
621  dbgs() << "Number of GV that must be kept: " <<
622  MustKeepGlobalVariables.size() << "\n";
623  for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
624  dbgs() << "Kept: " << *KeptGV << "\n";
625  });
626  // Grab all non-const globals.
627  for (auto &GV : M.globals()) {
628  // Merge is safe for "normal" internal or external globals only
629  if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
630  continue;
631 
632  // It's not safe to merge globals that may be preempted
633  if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
634  continue;
635 
636  if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
637  !GV.hasInternalLinkage())
638  continue;
639 
640  PointerType *PT = dyn_cast<PointerType>(GV.getType());
641  assert(PT && "Global variable is not a pointer!");
642 
643  unsigned AddressSpace = PT->getAddressSpace();
645 
646  // Ignore all 'special' globals.
647  if (GV.getName().startswith("llvm.") ||
648  GV.getName().startswith(".llvm."))
649  continue;
650 
651  // Ignore all "required" globals:
652  if (isMustKeepGlobalVariable(&GV))
653  continue;
654 
655  Type *Ty = GV.getValueType();
656  if (DL.getTypeAllocSize(Ty) < MaxOffset) {
657  if (TM &&
659  BSSGlobals[{AddressSpace, Section}].push_back(&GV);
660  else if (GV.isConstant())
661  ConstGlobals[{AddressSpace, Section}].push_back(&GV);
662  else
663  Globals[{AddressSpace, Section}].push_back(&GV);
664  }
665  }
666 
667  for (auto &P : Globals)
668  if (P.second.size() > 1)
669  Changed |= doMerge(P.second, M, false, P.first.first);
670 
671  for (auto &P : BSSGlobals)
672  if (P.second.size() > 1)
673  Changed |= doMerge(P.second, M, false, P.first.first);
674 
676  for (auto &P : ConstGlobals)
677  if (P.second.size() > 1)
678  Changed |= doMerge(P.second, M, true, P.first.first);
679 
680  return Changed;
681 }
682 
684  return false;
685 }
686 
687 bool GlobalMerge::doFinalization(Module &M) {
688  MustKeepGlobalVariables.clear();
689  return false;
690 }
691 
693  bool OnlyOptimizeForSize,
694  bool MergeExternalByDefault) {
695  bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
696  MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
697  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
698 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::initializeGlobalMergePass
void initializeGlobalMergePass(PassRegistry &)
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:506
llvm::Function
Definition: Function.h:60
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1306
Pass.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:111
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:192
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
EnableGlobalMergeOnConst
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::object::getSection
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:406
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
Module.h
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::ConstantExpr::getInBoundsGetElementPtr
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1245
llvm::GlobalValue::LinkageTypes
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:47
GlobalMergeGroupByUse
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
Use.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:800
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:82
Instruction.h
CommandLine.h
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:902
GlobalValue.h
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:266
llvm::GlobalValue::VisibilityTypes
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:62
llvm::StringRef::startswith
bool startswith(StringRef Prefix) const
Definition: StringRef.h:260
Constants.h
Twine.h
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:410
llvm::Triple::isOSBinFormatMachO
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:686
llvm::Instruction
Definition: Instruction.h:42
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
BitVector.h
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
SmallPtrSet.h
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:521
llvm::BitVector
Definition: BitVector.h:75
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetLoweringObjectFile::getKindForGlobal
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Definition: TargetLoweringObjectFile.cpp:200
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
Passes.h
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
BasicBlock.h
llvm::cl::opt< bool >
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::cl::BOU_UNSET
@ BOU_UNSET
Definition: CommandLine.h:630
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
GlobalMergeIgnoreSingleUse
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
llvm::GlobalVariable::hasImplicitSection
bool hasImplicitSection() const
Check if section name is present.
Definition: GlobalVariable.h:242
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
EnableGlobalMerge
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
EnableGlobalMergeOnExternal
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:259
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
Triple.h
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetLoweringObjectFile.h
llvm::GlobalValue::setDLLStorageClass
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:280
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
GlobalMergeMaxOffset
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1947
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::GlobalAlias::create
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:511
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
GlobalVariable.h
Casting.h
Function.h
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:669
llvm::BitVector::find_next
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:301
SectionKind.h
GlobalAlias.h
llvm::createGlobalMergePass
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
Definition: GlobalMerge.cpp:692
llvm::pdb::PDB_ColorItem::Padding
@ Padding
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::GlobalValue::DLLStorageClassTypes
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:69
llvm::cl::BOU_TRUE
@ BOU_TRUE
Definition: CommandLine.h:630
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition: GlobalVariable.h:152
llvm::BitVector::find_first
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:293
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:642
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
DerivedTypes.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:292
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1587
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GlobalMerge.cpp:104
llvm::BitVector::back
bool back() const
Return the last element in the vector.
Definition: BitVector.h:449
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:250
InitializePasses.h
Debug.h
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43