LLVM  15.0.0git
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass loops over all of the functions and variables in the input module.
10 // If the function or variable does not need to be preserved according to the
11 // client supplied callback, it is marked as internal.
12 //
13 // This transformation would not be legal in a regular compilation, but it gets
14 // extra information from the linker about what is safe.
15 //
16 // For example: Internalizing a function with external linkage. Only if we are
17 // told it is only used from within this module, it is safe to do it.
18 //
19 //===----------------------------------------------------------------------===//
20 
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/ADT/Triple.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
34 #include "llvm/Transforms/IPO.h"
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "internalize"
38 
39 STATISTIC(NumAliases, "Number of aliases internalized");
40 STATISTIC(NumFunctions, "Number of functions internalized");
41 STATISTIC(NumGlobals, "Number of global vars internalized");
42 
43 // APIFile - A file which contains a list of symbols that should not be marked
44 // external.
46  APIFile("internalize-public-api-file", cl::value_desc("filename"),
47  cl::desc("A file containing list of symbol names to preserve"));
48 
49 // APIList - A list of symbols that should not be marked internal.
51  APIList("internalize-public-api-list", cl::value_desc("list"),
52  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
53 
54 namespace {
55 // Helper to load an API list to preserve from file and expose it as a functor
56 // for internalization.
57 class PreserveAPIList {
58 public:
59  PreserveAPIList() {
60  if (!APIFile.empty())
61  LoadFile(APIFile);
62  ExternalNames.insert(APIList.begin(), APIList.end());
63  }
64 
65  bool operator()(const GlobalValue &GV) {
66  return ExternalNames.count(GV.getName());
67  }
68 
69 private:
70  // Contains the set of symbols loaded from file
71  StringSet<> ExternalNames;
72 
73  void LoadFile(StringRef Filename) {
74  // Load the APIFile...
76  MemoryBuffer::getFile(Filename);
77  if (!Buf) {
78  errs() << "WARNING: Internalize couldn't load file '" << Filename
79  << "'! Continuing as if it's empty.\n";
80  return; // Just continue as if the file were empty
81  }
82  for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
83  ExternalNames.insert(*I);
84  }
85 };
86 } // end anonymous namespace
87 
88 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
89  // Function must be defined here
90  if (GV.isDeclaration())
91  return true;
92 
93  // Available externally is really just a "declaration with a body".
95  return true;
96 
97  // Assume that dllexported symbols are referenced elsewhere
98  if (GV.hasDLLExportStorageClass())
99  return true;
100 
101  // As the name suggests, externally initialized variables need preserving as
102  // they would be initialized elsewhere externally.
103  if (const auto *G = dyn_cast<GlobalVariable>(&GV))
104  if (G->isExternallyInitialized())
105  return true;
106 
107  // Already local, has nothing to do.
108  if (GV.hasLocalLinkage())
109  return false;
110 
111  // Check some special cases
112  if (AlwaysPreserved.count(GV.getName()))
113  return true;
114 
115  return MustPreserveGV(GV);
116 }
117 
118 bool InternalizePass::maybeInternalize(
120  SmallString<0> ComdatName;
121  if (Comdat *C = GV.getComdat()) {
122  // For GlobalAlias, C is the aliasee object's comdat which may have been
123  // redirected. So ComdatMap may not contain C.
124  if (ComdatMap.lookup(C).External)
125  return false;
126 
127  if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
128  // If a comdat with one member is not externally visible, we can drop it.
129  // Otherwise, the comdat can be used to establish dependencies among the
130  // group of sections. Thus we have to keep the comdat but switch it to
131  // nodeduplicate.
132  // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
133  // nodeduplicate.
134  ComdatInfo &Info = ComdatMap.find(C)->second;
135  if (Info.Size == 1)
136  GO->setComdat(nullptr);
137  else if (!IsWasm)
138  C->setSelectionKind(Comdat::NoDeduplicate);
139  }
140 
141  if (GV.hasLocalLinkage())
142  return false;
143  } else {
144  if (GV.hasLocalLinkage())
145  return false;
146 
147  if (shouldPreserveGV(GV))
148  return false;
149  }
150 
153  return true;
154 }
155 
156 // If GV is part of a comdat and is externally visible, update the comdat size
157 // and keep track of its comdat so that we don't internalize any of its members.
158 void InternalizePass::checkComdat(
160  Comdat *C = GV.getComdat();
161  if (!C)
162  return;
163 
164  ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
165  ++Info.Size;
166  if (shouldPreserveGV(GV))
167  Info.External = true;
168 }
169 
171  bool Changed = false;
172  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
173 
175  collectUsedGlobalVariables(M, Used, false);
176 
177  // Collect comdat size and visiblity information for the module.
179  if (!M.getComdatSymbolTable().empty()) {
180  for (Function &F : M)
181  checkComdat(F, ComdatMap);
182  for (GlobalVariable &GV : M.globals())
183  checkComdat(GV, ComdatMap);
184  for (GlobalAlias &GA : M.aliases())
185  checkComdat(GA, ComdatMap);
186  }
187 
188  // We must assume that globals in llvm.used have a reference that not even
189  // the linker can see, so we don't internalize them.
190  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
191  // linker can drop those symbols. If this pass is running as part of LTO,
192  // one might think that it could just drop llvm.compiler.used. The problem
193  // is that even in LTO llvm doesn't see every reference. For example,
194  // we don't see references from function local inline assembly. To be
195  // conservative, we internalize symbols in llvm.compiler.used, but we
196  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
197  for (GlobalValue *V : Used) {
198  AlwaysPreserved.insert(V->getName());
199  }
200 
201  // Never internalize the llvm.used symbol. It is used to implement
202  // attribute((used)).
203  // FIXME: Shouldn't this just filter on llvm.metadata section??
204  AlwaysPreserved.insert("llvm.used");
205  AlwaysPreserved.insert("llvm.compiler.used");
206 
207  // Never internalize anchors used by the machine module info, else the info
208  // won't find them. (see MachineModuleInfo.)
209  AlwaysPreserved.insert("llvm.global_ctors");
210  AlwaysPreserved.insert("llvm.global_dtors");
211  AlwaysPreserved.insert("llvm.global.annotations");
212 
213  // Never internalize symbols code-gen inserts.
214  // FIXME: We should probably add this (and the __stack_chk_guard) via some
215  // type of call-back in CodeGen.
216  AlwaysPreserved.insert("__stack_chk_fail");
217  if (Triple(M.getTargetTriple()).isOSAIX())
218  AlwaysPreserved.insert("__ssp_canary_word");
219  else
220  AlwaysPreserved.insert("__stack_chk_guard");
221 
222  // Mark all functions not in the api as internal.
223  IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
224  for (Function &I : M) {
225  if (!maybeInternalize(I, ComdatMap))
226  continue;
227  Changed = true;
228 
229  if (ExternalNode)
230  // Remove a callgraph edge from the external node to this function.
231  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
232 
233  ++NumFunctions;
234  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
235  }
236 
237  // Mark all global variables with initializers that are not in the api as
238  // internal as well.
239  for (auto &GV : M.globals()) {
240  if (!maybeInternalize(GV, ComdatMap))
241  continue;
242  Changed = true;
243 
244  ++NumGlobals;
245  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
246  }
247 
248  // Mark all aliases that are not in the api as internal as well.
249  for (auto &GA : M.aliases()) {
250  if (!maybeInternalize(GA, ComdatMap))
251  continue;
252  Changed = true;
253 
254  ++NumAliases;
255  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
256  }
257 
258  return Changed;
259 }
260 
261 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
262 
265  return PreservedAnalyses::all();
266 
269  return PA;
270 }
271 
272 namespace {
273 class InternalizeLegacyPass : public ModulePass {
274  // Client supplied callback to control wheter a symbol must be preserved.
275  std::function<bool(const GlobalValue &)> MustPreserveGV;
276 
277 public:
278  static char ID; // Pass identification, replacement for typeid
279 
280  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
281 
282  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
283  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
285  }
286 
287  bool runOnModule(Module &M) override {
288  if (skipModule(M))
289  return false;
290 
291  CallGraphWrapperPass *CGPass =
292  getAnalysisIfAvailable<CallGraphWrapperPass>();
293  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
294  return internalizeModule(M, MustPreserveGV, CG);
295  }
296 
297  void getAnalysisUsage(AnalysisUsage &AU) const override {
298  AU.setPreservesCFG();
300  }
301 };
302 }
303 
305 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
306  "Internalize Global Symbols", false, false)
307 
309  return new InternalizeLegacyPass();
310 }
311 
313  std::function<bool(const GlobalValue &)> MustPreserveGV) {
314  return new InternalizeLegacyPass(std::move(MustPreserveGV));
315 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
MemoryBuffer.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CallGraph::getExternalCallingNode
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:127
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:304
llvm::line_iterator
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:33
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
llvm::Function
Definition: Function.h:60
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
Pass.h
llvm::CallGraphWrapperPass::getCallGraph
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:347
llvm::Comdat::NoDeduplicate
@ NoDeduplicate
No deduplication is performed.
Definition: Comdat.h:39
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:165
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::InternalizePass::internalizeModule
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
Definition: Internalize.cpp:170
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Module.h
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:34
llvm::Triple::isOSBinFormatWasm
bool isOSBinFormatWasm() const
Tests whether the OS uses the Wasm binary format.
Definition: Triple.h:666
Internalize.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:786
INITIALIZE_PASS
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
Definition: Internalize.cpp:305
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:239
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:241
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InternalizePass::InternalizePass
InternalizePass()
Definition: Internalize.cpp:261
LineIterator.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Comdat
Definition: Comdat.h:33
llvm::Triple::isOSAIX
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:643
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SmallString< 0 >
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:434
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
IPO.h
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:716
llvm::CallGraphWrapperPass
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:336
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:459
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1663
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:450
APIFile
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::internalizeModule
bool internalizeModule(Module &TheModule, std::function< bool(const GlobalValue &)> MustPreserveGV, CallGraph *CG=nullptr)
Helper function to internalize functions and variables in a Module.
Definition: Internalize.h:79
Triple.h
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::InternalizePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: Internalize.cpp:263
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
StringSet.h
llvm::CallGraphNode::removeOneAbstractEdgeTo
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function.
Definition: CallGraph.cpp:245
std
Definition: BitVector.h:851
llvm::GlobalValue::hasDLLExportStorageClass
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:261
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::GlobalValue::getComdat
const Comdat * getComdat() const
Definition: Globals.cpp:177
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:799
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:312
llvm::cl::value_desc
Definition: CommandLine.h:414
llvm::StringMap::count
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:246
llvm::initializeInternalizeLegacyPassPass
void initializeInternalizeLegacyPassPass(PassRegistry &)
CallGraph.h
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:224
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:234
InitializePasses.h
Debug.h
APIList
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::cl::list
Definition: CommandLine.h:1601