LLVM 20.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21// foo[i] = bar[i] * baz[i];
22// }
23//
24// On ARM the addresses of 3 arrays should be kept in the registers, thus
25// this code has quite large register pressure (loop body):
26//
27// ldr r1, [r5], #4
28// ldr r2, [r6], #4
29// mul r1, r2, r1
30// str r1, [r0], #4
31//
32// Pass converts the code to something like:
33//
34// static struct {
35// int foo[N];
36// int bar[N];
37// int baz[N];
38// } merged;
39//
40// for (i = 0; i < N; ++i) {
41// merged.foo[i] = merged.bar[i] * merged.baz[i];
42// }
43//
44// and in ARM code this becomes:
45//
46// ldr r0, [r5, #40]
47// ldr r1, [r5, #80]
48// mul r0, r1, r0
49// str r0, [r5], #4
50//
51// note that we saved 2 registers here almostly "for free".
52//
53// However, merging globals can have tradeoffs:
54// - it confuses debuggers, tools, and users
55// - it makes linker optimizations less useful (order files, LOHs, ...)
56// - it forces usage of indexed addressing (which isn't necessarily "free")
57// - it can increase register pressure when the uses are disparate enough.
58//
59// We use heuristics to discover the best global grouping we can (cf cl::opts).
60//
61// ===---------------------------------------------------------------------===//
62
64#include "llvm/ADT/BitVector.h"
65#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/MapVector.h"
67#include "llvm/ADT/SetVector.h"
69#include "llvm/ADT/Statistic.h"
70#include "llvm/ADT/StringRef.h"
71#include "llvm/ADT/Twine.h"
72#include "llvm/CodeGen/Passes.h"
73#include "llvm/IR/BasicBlock.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DataLayout.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/Instruction.h"
82#include "llvm/IR/Module.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
87#include "llvm/MC/SectionKind.h"
88#include "llvm/Pass.h"
91#include "llvm/Support/Debug.h"
96#include <algorithm>
97#include <cassert>
98#include <cstddef>
99#include <cstdint>
100#include <string>
101#include <vector>
102
103using namespace llvm;
104
105#define DEBUG_TYPE "global-merge"
106
107// FIXME: This is only useful as a last-resort way to disable the pass.
108static cl::opt<bool>
109EnableGlobalMerge("enable-global-merge", cl::Hidden,
110 cl::desc("Enable the global merge pass"),
111 cl::init(true));
112
114GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
115 cl::desc("Set maximum offset for global merge pass"),
116 cl::init(0));
117
119 "global-merge-group-by-use", cl::Hidden,
120 cl::desc("Improve global merge pass to look at uses"), cl::init(true));
121
123 "global-merge-ignore-single-use", cl::Hidden,
124 cl::desc("Improve global merge pass to ignore globals only used alone"),
125 cl::init(true));
126
127static cl::opt<bool>
128EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
129 cl::desc("Enable global merge pass on constants"),
130 cl::init(false));
131
132// FIXME: this could be a transitional option, and we probably need to remove
133// it if only we are sure this optimization could always benefit all targets.
135EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
136 cl::desc("Enable global merge pass on external linkage"));
137
139 GlobalMergeMinDataSize("global-merge-min-data-size",
140 cl::desc("The minimum size in bytes of each global "
141 "that should considered in merging."),
142 cl::init(0), cl::Hidden);
143
144STATISTIC(NumMerged, "Number of globals merged");
145
146namespace {
147
148class GlobalMergeImpl {
149 const TargetMachine *TM = nullptr;
151 bool IsMachO = false;
152
153private:
154 bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
155 bool isConst, unsigned AddrSpace) const;
156
157 /// Merge everything in \p Globals for which the corresponding bit
158 /// in \p GlobalSet is set.
159 bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
160 const BitVector &GlobalSet, Module &M, bool isConst,
161 unsigned AddrSpace) const;
162
163 /// Check if the given variable has been identified as must keep
164 /// \pre setMustKeepGlobalVariables must have been called on the Module that
165 /// contains GV
166 bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
167 return MustKeepGlobalVariables.count(GV);
168 }
169
170 /// Collect every variables marked as "used" or used in a landing pad
171 /// instruction for this Module.
172 void setMustKeepGlobalVariables(Module &M);
173
174 /// Collect every variables marked as "used"
176
177 /// Keep track of the GlobalVariable that must not be merged away
178 SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
179
180public:
181 GlobalMergeImpl(const TargetMachine *TM, GlobalMergeOptions Opt)
182 : TM(TM), Opt(Opt) {}
183 bool run(Module &M);
184};
185
186class GlobalMerge : public FunctionPass {
187 const TargetMachine *TM = nullptr;
189
190public:
191 static char ID; // Pass identification, replacement for typeid.
192
193 explicit GlobalMerge() : FunctionPass(ID) {
196 }
197
198 explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
199 bool OnlyOptimizeForSize, bool MergeExternalGlobals)
200 : FunctionPass(ID), TM(TM) {
201 Opt.MaxOffset = MaximalOffset;
202 Opt.SizeOnly = OnlyOptimizeForSize;
203 Opt.MergeExternal = MergeExternalGlobals;
205 }
206
207 bool doInitialization(Module &M) override {
208 auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
209 Metadata *SDL = M.getModuleFlag("SmallDataLimit");
210 if (!SDL)
211 return std::nullopt;
212 return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
213 };
214 if (GlobalMergeMinDataSize.getNumOccurrences())
216 else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
217 Opt.MinSize = *SDL + 1;
218 else
219 Opt.MinSize = 0;
220
221 GlobalMergeImpl P(TM, Opt);
222 return P.run(M);
223 }
224 bool runOnFunction(Function &F) override { return false; }
225
226 StringRef getPassName() const override { return "Merge internal globals"; }
227
228 void getAnalysisUsage(AnalysisUsage &AU) const override {
229 AU.setPreservesCFG();
231 }
232};
233
234} // end anonymous namespace
235
237 GlobalMergeImpl P(TM, Options);
238 bool Changed = P.run(M);
239 if (!Changed)
240 return PreservedAnalyses::all();
241
244 return PA;
245}
246
247char GlobalMerge::ID = 0;
248
249INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
250
251bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
252 Module &M, bool isConst,
253 unsigned AddrSpace) const {
254 auto &DL = M.getDataLayout();
255 // FIXME: Find better heuristics
257 Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
258 // We don't support scalable global variables.
259 return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
260 DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
261 });
262
263 // If we want to just blindly group all globals together, do so.
265 BitVector AllGlobals(Globals.size());
266 AllGlobals.set();
267 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
268 }
269
270 // If we want to be smarter, look at all uses of each global, to try to
271 // discover all sets of globals used together, and how many times each of
272 // these sets occurred.
273 //
274 // Keep this reasonably efficient, by having an append-only list of all sets
275 // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
276 // code (currently, a Function) to the set of globals seen so far that are
277 // used together in that unit (GlobalUsesByFunction).
278 //
279 // When we look at the Nth global, we know that any new set is either:
280 // - the singleton set {N}, containing this global only, or
281 // - the union of {N} and a previously-discovered set, containing some
282 // combination of the previous N-1 globals.
283 // Using that knowledge, when looking at the Nth global, we can keep:
284 // - a reference to the singleton set {N} (CurGVOnlySetIdx)
285 // - a list mapping each previous set to its union with {N} (EncounteredUGS),
286 // if it actually occurs.
287
288 // We keep track of the sets of globals used together "close enough".
289 struct UsedGlobalSet {
290 BitVector Globals;
291 unsigned UsageCount = 1;
292
293 UsedGlobalSet(size_t Size) : Globals(Size) {}
294 };
295
296 // Each set is unique in UsedGlobalSets.
297 std::vector<UsedGlobalSet> UsedGlobalSets;
298
299 // Avoid repeating the create-global-set pattern.
300 auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
301 UsedGlobalSets.emplace_back(Globals.size());
302 return UsedGlobalSets.back();
303 };
304
305 // The first set is the empty set.
306 CreateGlobalSet().UsageCount = 0;
307
308 // We define "close enough" to be "in the same function".
309 // FIXME: Grouping uses by function is way too aggressive, so we should have
310 // a better metric for distance between uses.
311 // The obvious alternative would be to group by BasicBlock, but that's in
312 // turn too conservative..
313 // Anything in between wouldn't be trivial to compute, so just stick with
314 // per-function grouping.
315
316 // The value type is an index into UsedGlobalSets.
317 // The default (0) conveniently points to the empty set.
318 DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
319
320 // Now, look at each merge-eligible global in turn.
321
322 // Keep track of the sets we already encountered to which we added the
323 // current global.
324 // Each element matches the same-index element in UsedGlobalSets.
325 // This lets us efficiently tell whether a set has already been expanded to
326 // include the current global.
327 std::vector<size_t> EncounteredUGS;
328
329 for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
330 GlobalVariable *GV = Globals[GI];
331
332 // Reset the encountered sets for this global and grow it in case we created
333 // new sets for the previous global.
334 EncounteredUGS.assign(UsedGlobalSets.size(), 0);
335
336 // We might need to create a set that only consists of the current global.
337 // Keep track of its index into UsedGlobalSets.
338 size_t CurGVOnlySetIdx = 0;
339
340 // For each global, look at all its Uses.
341 for (auto &U : GV->uses()) {
342 // This Use might be a ConstantExpr. We're interested in Instruction
343 // users, so look through ConstantExpr...
344 Use *UI, *UE;
345 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
346 if (CE->use_empty())
347 continue;
348 UI = &*CE->use_begin();
349 UE = nullptr;
350 } else if (isa<Instruction>(U.getUser())) {
351 UI = &U;
352 UE = UI->getNext();
353 } else {
354 continue;
355 }
356
357 // ...to iterate on all the instruction users of the global.
358 // Note that we iterate on Uses and not on Users to be able to getNext().
359 for (; UI != UE; UI = UI->getNext()) {
360 Instruction *I = dyn_cast<Instruction>(UI->getUser());
361 if (!I)
362 continue;
363
364 Function *ParentFn = I->getParent()->getParent();
365
366 // If we're only optimizing for size, ignore non-minsize functions.
367 if (Opt.SizeOnly && !ParentFn->hasMinSize())
368 continue;
369
370 size_t UGSIdx = GlobalUsesByFunction[ParentFn];
371
372 // If this is the first global the basic block uses, map it to the set
373 // consisting of this global only.
374 if (!UGSIdx) {
375 // If that set doesn't exist yet, create it.
376 if (!CurGVOnlySetIdx) {
377 CurGVOnlySetIdx = UsedGlobalSets.size();
378 CreateGlobalSet().Globals.set(GI);
379 } else {
380 ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
381 }
382
383 GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
384 continue;
385 }
386
387 // If we already encountered this BB, just increment the counter.
388 if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
389 ++UsedGlobalSets[UGSIdx].UsageCount;
390 continue;
391 }
392
393 // If not, the previous set wasn't actually used in this function.
394 --UsedGlobalSets[UGSIdx].UsageCount;
395
396 // If we already expanded the previous set to include this global, just
397 // reuse that expanded set.
398 if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
399 ++UsedGlobalSets[ExpandedIdx].UsageCount;
400 GlobalUsesByFunction[ParentFn] = ExpandedIdx;
401 continue;
402 }
403
404 // If not, create a new set consisting of the union of the previous set
405 // and this global. Mark it as encountered, so we can reuse it later.
406 GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
407 UsedGlobalSets.size();
408
409 UsedGlobalSet &NewUGS = CreateGlobalSet();
410 NewUGS.Globals.set(GI);
411 NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
412 }
413 }
414 }
415
416 // Now we found a bunch of sets of globals used together. We accumulated
417 // the number of times we encountered the sets (i.e., the number of blocks
418 // that use that exact set of globals).
419 //
420 // Multiply that by the size of the set to give us a crude profitability
421 // metric.
422 llvm::stable_sort(UsedGlobalSets,
423 [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
424 return UGS1.Globals.count() * UGS1.UsageCount <
425 UGS2.Globals.count() * UGS2.UsageCount;
426 });
427
428 // We can choose to merge all globals together, but ignore globals never used
429 // with another global. This catches the obviously non-profitable cases of
430 // having a single global, but is aggressive enough for any other case.
432 BitVector AllGlobals(Globals.size());
433 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
434 if (UGS.UsageCount == 0)
435 continue;
436 if (UGS.Globals.count() > 1)
437 AllGlobals |= UGS.Globals;
438 }
439 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
440 }
441
442 // Starting from the sets with the best (=biggest) profitability, find a
443 // good combination.
444 // The ideal (and expensive) solution can only be found by trying all
445 // combinations, looking for the one with the best profitability.
446 // Don't be smart about it, and just pick the first compatible combination,
447 // starting with the sets with the best profitability.
448 BitVector PickedGlobals(Globals.size());
449 bool Changed = false;
450
451 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
452 if (UGS.UsageCount == 0)
453 continue;
454 if (PickedGlobals.anyCommon(UGS.Globals))
455 continue;
456 PickedGlobals |= UGS.Globals;
457 // If the set only contains one global, there's no point in merging.
458 // Ignore the global for inclusion in other sets though, so keep it in
459 // PickedGlobals.
460 if (UGS.Globals.count() < 2)
461 continue;
462 Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
463 }
464
465 return Changed;
466}
467
468bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
469 const BitVector &GlobalSet, Module &M,
470 bool isConst, unsigned AddrSpace) const {
471 assert(Globals.size() > 1);
472
473 Type *Int32Ty = Type::getInt32Ty(M.getContext());
474 Type *Int8Ty = Type::getInt8Ty(M.getContext());
475 auto &DL = M.getDataLayout();
476
477 LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
478 << GlobalSet.find_first() << "\n");
479
480 bool Changed = false;
481 ssize_t i = GlobalSet.find_first();
482 while (i != -1) {
483 ssize_t j = 0;
484 uint64_t MergedSize = 0;
485 std::vector<Type*> Tys;
486 std::vector<Constant*> Inits;
487 std::vector<unsigned> StructIdxs;
488
489 bool HasExternal = false;
490 StringRef FirstExternalName;
491 Align MaxAlign;
492 unsigned CurIdx = 0;
493 for (j = i; j != -1; j = GlobalSet.find_next(j)) {
494 Type *Ty = Globals[j]->getValueType();
495
496 // Make sure we use the same alignment AsmPrinter would use.
497 Align Alignment = DL.getPreferredAlign(Globals[j]);
498 unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
499 MergedSize += Padding;
500 MergedSize += DL.getTypeAllocSize(Ty);
501 if (MergedSize > Opt.MaxOffset) {
502 break;
503 }
504 if (Padding) {
505 Tys.push_back(ArrayType::get(Int8Ty, Padding));
506 Inits.push_back(ConstantAggregateZero::get(Tys.back()));
507 ++CurIdx;
508 }
509 Tys.push_back(Ty);
510 Inits.push_back(Globals[j]->getInitializer());
511 StructIdxs.push_back(CurIdx++);
512
513 MaxAlign = std::max(MaxAlign, Alignment);
514
515 if (Globals[j]->hasExternalLinkage() && !HasExternal) {
516 HasExternal = true;
517 FirstExternalName = Globals[j]->getName();
518 }
519 }
520
521 // Exit early if there is only one global to merge.
522 if (Tys.size() < 2) {
523 i = j;
524 continue;
525 }
526
527 // If merged variables doesn't have external linkage, we needn't to expose
528 // the symbol after merging.
532 // Use a packed struct so we can control alignment.
533 StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
534 Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
535
536 // On Darwin external linkage needs to be preserved, otherwise
537 // dsymutil cannot preserve the debug info for the merged
538 // variables. If they have external linkage, use the symbol name
539 // of the first variable merged as the suffix of global symbol
540 // name. This avoids a link-time naming conflict for the
541 // _MergedGlobals symbols.
542 Twine MergedName =
543 (IsMachO && HasExternal)
544 ? "_MergedGlobals_" + FirstExternalName
545 : "_MergedGlobals";
546 auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
547 auto *MergedGV = new GlobalVariable(
548 M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
550
551 MergedGV->setAlignment(MaxAlign);
552 MergedGV->setSection(Globals[i]->getSection());
553
554 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
555 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
556 GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
557 std::string Name(Globals[k]->getName());
558 GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
560 Globals[k]->getDLLStorageClass();
561
562 // Copy metadata while adjusting any debug info metadata by the original
563 // global's offset within the merged global.
564 MergedGV->copyMetadata(Globals[k],
565 MergedLayout->getElementOffset(StructIdxs[idx]));
566
567 Constant *Idx[2] = {
568 ConstantInt::get(Int32Ty, 0),
569 ConstantInt::get(Int32Ty, StructIdxs[idx]),
570 };
571 Constant *GEP =
572 ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
573 Globals[k]->replaceAllUsesWith(GEP);
574 Globals[k]->eraseFromParent();
575
576 // When the linkage is not internal we must emit an alias for the original
577 // variable name as it may be accessed from another object. On non-Mach-O
578 // we can also emit an alias for internal linkage as it's safe to do so.
579 // It's not safe on Mach-O as the alias (and thus the portion of the
580 // MergedGlobals variable) may be dead stripped at link time.
581 if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
582 GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
583 Linkage, Name, GEP, &M);
584 GA->setVisibility(Visibility);
585 GA->setDLLStorageClass(DLLStorage);
586 }
587
588 NumMerged++;
589 }
590 Changed = true;
591 i = j;
592 }
593
594 return Changed;
595}
596
597void GlobalMergeImpl::collectUsedGlobalVariables(Module &M, StringRef Name) {
598 // Extract global variables from llvm.used array
599 const GlobalVariable *GV = M.getGlobalVariable(Name);
600 if (!GV || !GV->hasInitializer()) return;
601
602 // Should be an array of 'i8*'.
603 const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
604
605 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
606 if (const GlobalVariable *G =
607 dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
608 MustKeepGlobalVariables.insert(G);
609}
610
611void GlobalMergeImpl::setMustKeepGlobalVariables(Module &M) {
612 collectUsedGlobalVariables(M, "llvm.used");
613 collectUsedGlobalVariables(M, "llvm.compiler.used");
614
615 for (Function &F : M) {
616 for (BasicBlock &BB : F) {
617 Instruction *Pad = BB.getFirstNonPHI();
618 if (!Pad->isEHPad())
619 continue;
620
621 // Keep globals used by landingpads and catchpads.
622 for (const Use &U : Pad->operands()) {
623 if (const GlobalVariable *GV =
624 dyn_cast<GlobalVariable>(U->stripPointerCasts()))
625 MustKeepGlobalVariables.insert(GV);
626 else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
627 for (const Use &Elt : CA->operands()) {
628 if (const GlobalVariable *GV =
629 dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
630 MustKeepGlobalVariables.insert(GV);
631 }
632 }
633 }
634 }
635 }
636}
637
638bool GlobalMergeImpl::run(Module &M) {
640 return false;
641
642 IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
643
644 auto &DL = M.getDataLayout();
646 Globals, ConstGlobals, BSSGlobals;
647 bool Changed = false;
648 setMustKeepGlobalVariables(M);
649
650 LLVM_DEBUG({
651 dbgs() << "Number of GV that must be kept: " <<
652 MustKeepGlobalVariables.size() << "\n";
653 for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
654 dbgs() << "Kept: " << *KeptGV << "\n";
655 });
656 // Grab all non-const globals.
657 for (auto &GV : M.globals()) {
658 // Merge is safe for "normal" internal or external globals only
659 if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
660 continue;
661
662 // It's not safe to merge globals that may be preempted
663 if (TM && !TM->shouldAssumeDSOLocal(&GV))
664 continue;
665
666 if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
667 !GV.hasInternalLinkage())
668 continue;
669
670 PointerType *PT = dyn_cast<PointerType>(GV.getType());
671 assert(PT && "Global variable is not a pointer!");
672
673 unsigned AddressSpace = PT->getAddressSpace();
675
676 // Ignore all 'special' globals.
677 if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm."))
678 continue;
679
680 // Ignore all "required" globals:
681 if (isMustKeepGlobalVariable(&GV))
682 continue;
683
684 // Don't merge tagged globals, as each global should have its own unique
685 // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
686 // with compatible alignment and the same contents may be merged as long as
687 // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
688 // size_b / 16`).
689 if (GV.isTagged())
690 continue;
691
692 Type *Ty = GV.getValueType();
693 TypeSize AllocSize = DL.getTypeAllocSize(Ty);
694 if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) {
695 if (TM &&
697 BSSGlobals[{AddressSpace, Section}].push_back(&GV);
698 else if (GV.isConstant())
699 ConstGlobals[{AddressSpace, Section}].push_back(&GV);
700 else
701 Globals[{AddressSpace, Section}].push_back(&GV);
702 }
703 }
704
705 for (auto &P : Globals)
706 if (P.second.size() > 1)
707 Changed |= doMerge(P.second, M, false, P.first.first);
708
709 for (auto &P : BSSGlobals)
710 if (P.second.size() > 1)
711 Changed |= doMerge(P.second, M, false, P.first.first);
712
714 for (auto &P : ConstGlobals)
715 if (P.second.size() > 1)
716 Changed |= doMerge(P.second, M, true, P.first.first);
717
718 return Changed;
719}
720
722 bool OnlyOptimizeForSize,
723 bool MergeExternalByDefault) {
724 bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
725 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
726 return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
727}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
#define DEBUG_TYPE
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
static cl::opt< unsigned > GlobalMergeMinDataSize("global-merge-min-data-size", cl::desc("The minimum size in bytes of each global " "that should considered in merging."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
Hexagon Common GEP
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:635
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:300
bool back() const
Return the last element in the vector.
Definition: BitVector.h:456
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:308
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1650
ConstantArray - Constant Array Declarations.
Definition: Constants.h:424
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1097
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1280
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1357
This is an important base class in LLVM.
Definition: Constant.h:42
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:550
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:118
bool hasExternalLinkage() const
Definition: GlobalValue.h:511
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
bool isTagged() const
Definition: GlobalValue.h:365
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:284
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:73
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:66
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool hasImplicitSection() const
Check if section name is present.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
void push_back(MachineInstr *MI)
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:614
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:643
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:361
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:732
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
@ SDL
Definition: COFF.h:815
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:533
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
AddressSpace
Definition: NVPTXBaseInfo.h:21
void initializeGlobalMergePass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:833
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool SizeOnly
Whether we should try to optimize for size only.
Definition: GlobalMerge.h:35
bool MergeExternal
Whether we should merge global variables that have external linkage.
Definition: GlobalMerge.h:30