LLVM 17.0.0git
Internalize.cpp
Go to the documentation of this file.
1//===-- Internalize.cpp - Mark functions internal -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass loops over all of the functions and variables in the input module.
10// If the function or variable does not need to be preserved according to the
11// client supplied callback, it is marked as internal.
12//
13// This transformation would not be legal in a regular compilation, but it gets
14// extra information from the linker about what is safe.
15//
16// For example: Internalizing a function with external linkage. Only if we are
17// told it is only used from within this module, it is safe to do it.
18//
19//===----------------------------------------------------------------------===//
20
22#include "llvm/ADT/Statistic.h"
23#include "llvm/ADT/StringSet.h"
25#include "llvm/IR/Module.h"
27#include "llvm/Pass.h"
29#include "llvm/Support/Debug.h"
35#include "llvm/Transforms/IPO.h"
36using namespace llvm;
37
38#define DEBUG_TYPE "internalize"
39
40STATISTIC(NumAliases, "Number of aliases internalized");
41STATISTIC(NumFunctions, "Number of functions internalized");
42STATISTIC(NumGlobals, "Number of global vars internalized");
43
44// APIFile - A file which contains a list of symbol glob patterns that should
45// not be marked external.
47 APIFile("internalize-public-api-file", cl::value_desc("filename"),
48 cl::desc("A file containing list of symbol names to preserve"));
49
50// APIList - A list of symbol glob patterns that should not be marked internal.
52 APIList("internalize-public-api-list", cl::value_desc("list"),
53 cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
54
55namespace {
56// Helper to load an API list to preserve from file and expose it as a functor
57// for internalization.
58class PreserveAPIList {
59public:
60 PreserveAPIList() {
61 if (!APIFile.empty())
62 LoadFile(APIFile);
64 addGlob(Pattern);
65 }
66
67 bool operator()(const GlobalValue &GV) {
68 return llvm::any_of(
69 ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); });
70 }
71
72private:
73 // Contains the set of symbols loaded from file
74 SmallVector<GlobPattern> ExternalNames;
75
76 void addGlob(StringRef Pattern) {
77 auto GlobOrErr = GlobPattern::create(Pattern);
78 if (!GlobOrErr) {
79 errs() << "WARNING: when loading pattern: '"
80 << toString(GlobOrErr.takeError()) << "' ignoring";
81 return;
82 }
83 ExternalNames.emplace_back(std::move(*GlobOrErr));
84 }
85
86 void LoadFile(StringRef Filename) {
87 // Load the APIFile...
89 MemoryBuffer::getFile(Filename);
90 if (!BufOrErr) {
91 errs() << "WARNING: Internalize couldn't load file '" << Filename
92 << "'! Continuing as if it's empty.\n";
93 return; // Just continue as if the file were empty
94 }
95 Buf = std::move(*BufOrErr);
96 for (line_iterator I(*Buf, true), E; I != E; ++I)
97 addGlob(*I);
98 }
99
100 std::shared_ptr<MemoryBuffer> Buf;
101};
102} // end anonymous namespace
103
104bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
105 // Function must be defined here
106 if (GV.isDeclaration())
107 return true;
108
109 // Available externally is really just a "declaration with a body".
111 return true;
112
113 // Assume that dllexported symbols are referenced elsewhere
115 return true;
116
117 // As the name suggests, externally initialized variables need preserving as
118 // they would be initialized elsewhere externally.
119 if (const auto *G = dyn_cast<GlobalVariable>(&GV))
120 if (G->isExternallyInitialized())
121 return true;
122
123 // Already local, has nothing to do.
124 if (GV.hasLocalLinkage())
125 return false;
126
127 // Check some special cases
128 if (AlwaysPreserved.count(GV.getName()))
129 return true;
130
131 return MustPreserveGV(GV);
132}
133
134bool InternalizePass::maybeInternalize(
136 SmallString<0> ComdatName;
137 if (Comdat *C = GV.getComdat()) {
138 // For GlobalAlias, C is the aliasee object's comdat which may have been
139 // redirected. So ComdatMap may not contain C.
140 if (ComdatMap.lookup(C).External)
141 return false;
142
143 if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
144 // If a comdat with one member is not externally visible, we can drop it.
145 // Otherwise, the comdat can be used to establish dependencies among the
146 // group of sections. Thus we have to keep the comdat but switch it to
147 // nodeduplicate.
148 // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
149 // nodeduplicate.
150 ComdatInfo &Info = ComdatMap.find(C)->second;
151 if (Info.Size == 1)
152 GO->setComdat(nullptr);
153 else if (!IsWasm)
154 C->setSelectionKind(Comdat::NoDeduplicate);
155 }
156
157 if (GV.hasLocalLinkage())
158 return false;
159 } else {
160 if (GV.hasLocalLinkage())
161 return false;
162
163 if (shouldPreserveGV(GV))
164 return false;
165 }
166
169 return true;
170}
171
172// If GV is part of a comdat and is externally visible, update the comdat size
173// and keep track of its comdat so that we don't internalize any of its members.
174void InternalizePass::checkComdat(
176 Comdat *C = GV.getComdat();
177 if (!C)
178 return;
179
180 ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
181 ++Info.Size;
182 if (shouldPreserveGV(GV))
183 Info.External = true;
184}
185
187 bool Changed = false;
188
190 collectUsedGlobalVariables(M, Used, false);
191
192 // Collect comdat size and visiblity information for the module.
194 if (!M.getComdatSymbolTable().empty()) {
195 for (Function &F : M)
196 checkComdat(F, ComdatMap);
197 for (GlobalVariable &GV : M.globals())
198 checkComdat(GV, ComdatMap);
199 for (GlobalAlias &GA : M.aliases())
200 checkComdat(GA, ComdatMap);
201 }
202
203 // We must assume that globals in llvm.used have a reference that not even
204 // the linker can see, so we don't internalize them.
205 // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
206 // linker can drop those symbols. If this pass is running as part of LTO,
207 // one might think that it could just drop llvm.compiler.used. The problem
208 // is that even in LTO llvm doesn't see every reference. For example,
209 // we don't see references from function local inline assembly. To be
210 // conservative, we internalize symbols in llvm.compiler.used, but we
211 // keep llvm.compiler.used so that the symbol is not deleted by llvm.
212 for (GlobalValue *V : Used) {
213 AlwaysPreserved.insert(V->getName());
214 }
215
216 // Never internalize the llvm.used symbol. It is used to implement
217 // attribute((used)).
218 // FIXME: Shouldn't this just filter on llvm.metadata section??
219 AlwaysPreserved.insert("llvm.used");
220 AlwaysPreserved.insert("llvm.compiler.used");
221
222 // Never internalize anchors used by the machine module info, else the info
223 // won't find them. (see MachineModuleInfo.)
224 AlwaysPreserved.insert("llvm.global_ctors");
225 AlwaysPreserved.insert("llvm.global_dtors");
226 AlwaysPreserved.insert("llvm.global.annotations");
227
228 // Never internalize symbols code-gen inserts.
229 // FIXME: We should probably add this (and the __stack_chk_guard) via some
230 // type of call-back in CodeGen.
231 AlwaysPreserved.insert("__stack_chk_fail");
232 if (Triple(M.getTargetTriple()).isOSAIX())
233 AlwaysPreserved.insert("__ssp_canary_word");
234 else
235 AlwaysPreserved.insert("__stack_chk_guard");
236
237 // Mark all functions not in the api as internal.
238 IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
239 for (Function &I : M) {
240 if (!maybeInternalize(I, ComdatMap))
241 continue;
242 Changed = true;
243
244 ++NumFunctions;
245 LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
246 }
247
248 // Mark all global variables with initializers that are not in the api as
249 // internal as well.
250 for (auto &GV : M.globals()) {
251 if (!maybeInternalize(GV, ComdatMap))
252 continue;
253 Changed = true;
254
255 ++NumGlobals;
256 LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
257 }
258
259 // Mark all aliases that are not in the api as internal as well.
260 for (auto &GA : M.aliases()) {
261 if (!maybeInternalize(GA, ComdatMap))
262 continue;
263 Changed = true;
264
265 ++NumAliases;
266 LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
267 }
268
269 return Changed;
270}
271
272InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
273
275 if (!internalizeModule(M))
276 return PreservedAnalyses::all();
277
279}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
StringSet - A set-like wrapper for the StringMap.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
@ NoDeduplicate
No deduplication is performed.
Definition: Comdat.h:39
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
Represents either an error or a value T.
Definition: ErrorOr.h:56
bool match(StringRef S) const
static Expected< GlobPattern > create(StringRef Pat)
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:275
bool hasLocalLinkage() const
Definition: GlobalValue.h:523
const Comdat * getComdat() const
Definition: Globals.cpp:185
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:532
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:277
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:250
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:507
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
bool internalizeModule(Module &TheModule)
Run the internalizer on TheModule, returns true if any changes was made.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:256
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:34
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatWasm() const
Tests whether the OS uses the Wasm binary format.
Definition: Triple.h:693
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:670
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:33
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CommaSeparated
Definition: CommandLine.h:164
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:807