LLVM 23.0.0git
ConstantMerge.cpp
Go to the documentation of this file.
1//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface to a pass that merges duplicate global
10// constants together into a single constant that is shared. This is useful
11// because some passes (ie TraceValues) insert a lot of string constants into
12// the program, regardless of whether or not an existing string is available.
13//
14// Algorithm: ConstantMerge is designed to build up a map of available constants
15// and eliminate duplicates when it is initialized.
16//
17//===----------------------------------------------------------------------===//
18
20#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/GlobalValue.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/Module.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Transforms/IPO.h"
34#include <algorithm>
35#include <cassert>
36#include <utility>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "constmerge"
41
42STATISTIC(NumIdenticalMerged, "Number of identical global constants merged");
43
44/// Find values that are marked as llvm.used.
45static void FindUsedValues(GlobalVariable *LLVMUsed,
47 if (!LLVMUsed) return;
48 ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
49
50 for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
51 Value *Operand = Inits->getOperand(i)->stripPointerCasts();
52 GlobalValue *GV = cast<GlobalValue>(Operand);
53 UsedValues.insert(GV);
54 }
55}
56
57// True if A is better than B.
59 const GlobalVariable &B) {
60 if (!A.hasLocalLinkage() && B.hasLocalLinkage())
61 return true;
62
63 if (A.hasLocalLinkage() && !B.hasLocalLinkage())
64 return false;
65
66 if (A.hasGlobalUnnamedAddr() != B.hasGlobalUnnamedAddr())
67 return A.hasGlobalUnnamedAddr();
68
69 return !A.hasComdat();
70}
71
72static void copyDebugLocMetadata(const GlobalVariable *From,
73 GlobalVariable *To) {
75 From->getDebugInfo(MDs);
76 for (auto *MD : MDs)
77 To->addDebugInfo(MD);
78}
79
81 return GV->getAlign().value_or(
83}
84
85static bool
87 const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) {
88 // Only process constants with initializers in the default address space.
89 return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
90 GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
91 // Don't touch thread-local variables.
92 GV->isThreadLocal() ||
93 // Don't touch values marked with attribute(used).
94 UsedGlobals.count(GV);
95}
96
97enum class CanMerge { No, Yes };
99 if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
100 return CanMerge::No;
102 return CanMerge::No;
103 assert(!New->hasMetadataOtherThanDebugLoc());
104
105 // Merging constants with different comdats means one group cannot in general
106 // be dropped independently without the other group now having an invalid
107 // reference to the dropped constant.
108 // If we merge into a constant that does not have comdat, we can merge even
109 // when the old constant has a comdat group because it has local linkage and
110 // is therefore not the comdat key.
111 if (Old->getComdat() != New->getComdat() && New->hasComdat())
112 return CanMerge::No;
113
114 if (!Old->hasGlobalUnnamedAddr())
115 New->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
116 return CanMerge::Yes;
117}
118
119static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
120 Constant *NewConstant = New;
121
122 LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @"
123 << New->getName() << "\n");
124
125 // Bump the alignment if necessary.
126 if (Old->getAlign() || New->getAlign())
127 New->setAlignment(std::max(getAlign(Old), getAlign(New)));
128
129 copyDebugLocMetadata(Old, New);
130 Old->replaceAllUsesWith(NewConstant);
131
132 // Delete the global value from the module.
133 assert(Old->hasLocalLinkage() &&
134 "Refusing to delete an externally visible global variable.");
135 Old->eraseFromParent();
136}
137
138static bool mergeConstants(Module &M) {
139 // Find all the globals that are marked "used". These cannot be merged.
141 FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
142 FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
143
144 // Map unique constants to globals.
146
148 SameContentReplacements;
149
150 size_t ChangesMade = 0;
151 size_t OldChangesMade = 0;
152
153 // Iterate constant merging while we are still making progress. Merging two
154 // constants together may allow us to merge other constants together if the
155 // second level constants have initializers which point to the globals that
156 // were just merged.
157 while (true) {
158 // Find the canonical constants others will be merged with.
159 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
160 // If this GV is dead, remove it.
161 GV.removeDeadConstantUsers();
162 if (GV.use_empty() && GV.hasLocalLinkage()) {
163 GV.eraseFromParent();
164 ++ChangesMade;
165 continue;
166 }
167
168 if (isUnmergeableGlobal(&GV, UsedGlobals))
169 continue;
170
171 // This transformation is legal for weak ODR globals in the sense it
172 // doesn't change semantics, but we really don't want to perform it
173 // anyway; it's likely to pessimize code generation, and some tools
174 // (like the Darwin linker in cases involving CFString) don't expect it.
175 if (GV.isWeakForLinker())
176 continue;
177
178 // Don't touch globals with metadata other then !dbg.
179 if (GV.hasMetadataOtherThanDebugLoc())
180 continue;
181
182 Constant *Init = GV.getInitializer();
183
184 // Check to see if the initializer is already known.
185 GlobalVariable *&Slot = CMap[Init];
186
187 // If this is the first constant we find or if the old one is local,
188 // replace with the current one. If the current is externally visible
189 // it cannot be replace, but can be the canonical constant we merge with.
190 bool FirstConstantFound = !Slot;
191 if (FirstConstantFound || IsBetterCanonical(GV, *Slot)) {
192 Slot = &GV;
193 LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName()
194 << (FirstConstantFound ? "\n" : " (updated)\n"));
195 }
196 }
197
198 // Identify all globals that can be merged together, filling in the
199 // SameContentReplacements vector. We cannot do the replacement in this pass
200 // because doing so may cause initializers of other globals to be rewritten,
201 // invalidating the Constant* pointers in CMap.
202 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
203 if (isUnmergeableGlobal(&GV, UsedGlobals))
204 continue;
205
206 // We can only replace constant with local linkage.
207 if (!GV.hasLocalLinkage())
208 continue;
209
210 Constant *Init = GV.getInitializer();
211
212 // Check to see if the initializer is already known.
213 auto Found = CMap.find(Init);
214 if (Found == CMap.end())
215 continue;
216
217 GlobalVariable *Slot = Found->second;
218 if (Slot == &GV)
219 continue;
220
221 if (makeMergeable(&GV, Slot) == CanMerge::No)
222 continue;
223
224 // Make all uses of the duplicate constant use the canonical version.
225 LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @"
226 << Slot->getName() << "\n");
227 SameContentReplacements.push_back(std::make_pair(&GV, Slot));
228 }
229
230 // Now that we have figured out which replacements must be made, do them all
231 // now. This avoid invalidating the pointers in CMap, which are unneeded
232 // now.
233 for (const auto &[Old, New] : SameContentReplacements) {
234 replace(M, Old, New);
235 ++ChangesMade;
236 ++NumIdenticalMerged;
237 }
238
239 if (ChangesMade == OldChangesMade)
240 break;
241 OldChangesMade = ChangesMade;
242
243 SameContentReplacements.clear();
244 CMap.clear();
245 }
246
247 return ChangesMade;
248}
249
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool IsBetterCanonical(const GlobalVariable &A, const GlobalVariable &B)
static void copyDebugLocMetadata(const GlobalVariable *From, GlobalVariable *To)
static bool mergeConstants(Module &M)
static bool isUnmergeableGlobal(GlobalVariable *GV, const SmallPtrSetImpl< const GlobalValue * > &UsedGlobals)
static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSetImpl< const GlobalValue * > &UsedValues)
Find values that are marked as llvm.used.
CanMerge
static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Module.h This file contains the declarations for the Module class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
ConstantArray - Constant Array Declarations.
Definition Constants.h:576
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
LLVM_ABI bool hasMetadataOtherThanDebugLoc() const
Definition Globals.cpp:400
const Comdat * getComdat() const
bool hasSection() const
Check if this global has a custom object file section.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasLocalLinkage() const
PointerType * getType() const
Global values are always pointers.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
bool hasGlobalUnnamedAddr() const
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void getDebugInfo(SmallVectorImpl< DIGlobalVariableExpression * > &GVs) const
Fill the vector with all debug info attachements.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Globals.cpp:538
LLVM_ABI void addDebugInfo(DIGlobalVariableExpression *GV)
Attach a DIGlobalVariableExpression.
bool hasDefinitiveInitializer() const
hasDefinitiveInitializer - Whether the global variable has an initializer, and any other instances of...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This is an optimization pass for GlobalISel generic memory operations.
MaybeAlign getAlign(const CallInst &I, unsigned Index)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1910
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39