LLVM  13.0.0git
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass loops over all of the functions and variables in the input module.
10 // If the function or variable does not need to be preserved according to the
11 // client supplied callback, it is marked as internal.
12 //
13 // This transformation would not be legal in a regular compilation, but it gets
14 // extra information from the linker about what is safe.
15 //
16 // For example: Internalizing a function with external linkage. Only if we are
17 // told it is only used from within this module, it is safe to do it.
18 //
19 //===----------------------------------------------------------------------===//
20 
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSet.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
34 #include "llvm/Transforms/IPO.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "internalize"
39 
40 STATISTIC(NumAliases, "Number of aliases internalized");
41 STATISTIC(NumFunctions, "Number of functions internalized");
42 STATISTIC(NumGlobals, "Number of global vars internalized");
43 
44 // APIFile - A file which contains a list of symbols that should not be marked
45 // external.
47  APIFile("internalize-public-api-file", cl::value_desc("filename"),
48  cl::desc("A file containing list of symbol names to preserve"));
49 
50 // APIList - A list of symbols that should not be marked internal.
52  APIList("internalize-public-api-list", cl::value_desc("list"),
53  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
54 
55 namespace {
56 // Helper to load an API list to preserve from file and expose it as a functor
57 // for internalization.
58 class PreserveAPIList {
59 public:
60  PreserveAPIList() {
61  if (!APIFile.empty())
62  LoadFile(APIFile);
63  ExternalNames.insert(APIList.begin(), APIList.end());
64  }
65 
66  bool operator()(const GlobalValue &GV) {
67  return ExternalNames.count(GV.getName());
68  }
69 
70 private:
71  // Contains the set of symbols loaded from file
72  StringSet<> ExternalNames;
73 
74  void LoadFile(StringRef Filename) {
75  // Load the APIFile...
77  MemoryBuffer::getFile(Filename);
78  if (!Buf) {
79  errs() << "WARNING: Internalize couldn't load file '" << Filename
80  << "'! Continuing as if it's empty.\n";
81  return; // Just continue as if the file were empty
82  }
83  for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
84  ExternalNames.insert(*I);
85  }
86 };
87 } // end anonymous namespace
88 
89 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
90  // Function must be defined here
91  if (GV.isDeclaration())
92  return true;
93 
94  // Available externally is really just a "declaration with a body".
96  return true;
97 
98  // Assume that dllexported symbols are referenced elsewhere
99  if (GV.hasDLLExportStorageClass())
100  return true;
101 
102  // Already local, has nothing to do.
103  if (GV.hasLocalLinkage())
104  return false;
105 
106  // Check some special cases
107  if (AlwaysPreserved.count(GV.getName()))
108  return true;
109 
110  return MustPreserveGV(GV);
111 }
112 
113 bool InternalizePass::maybeInternalize(
114  GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) {
115  if (Comdat *C = GV.getComdat()) {
116  if (ExternalComdats.count(C))
117  return false;
118 
119  // If a comdat is not externally visible we can drop it.
120  if (auto GO = dyn_cast<GlobalObject>(&GV))
121  GO->setComdat(nullptr);
122 
123  if (GV.hasLocalLinkage())
124  return false;
125  } else {
126  if (GV.hasLocalLinkage())
127  return false;
128 
129  if (shouldPreserveGV(GV))
130  return false;
131  }
132 
135  return true;
136 }
137 
138 // If GV is part of a comdat and is externally visible, keep track of its
139 // comdat so that we don't internalize any of its members.
140 void InternalizePass::checkComdatVisibility(
141  GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) {
142  Comdat *C = GV.getComdat();
143  if (!C)
144  return;
145 
146  if (shouldPreserveGV(GV))
147  ExternalComdats.insert(C);
148 }
149 
151  bool Changed = false;
152  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
153 
155  collectUsedGlobalVariables(M, Used, false);
156 
157  // Collect comdat visiblity information for the module.
158  DenseSet<const Comdat *> ExternalComdats;
159  if (!M.getComdatSymbolTable().empty()) {
160  for (Function &F : M)
161  checkComdatVisibility(F, ExternalComdats);
162  for (GlobalVariable &GV : M.globals())
163  checkComdatVisibility(GV, ExternalComdats);
164  for (GlobalAlias &GA : M.aliases())
165  checkComdatVisibility(GA, ExternalComdats);
166  }
167 
168  // We must assume that globals in llvm.used have a reference that not even
169  // the linker can see, so we don't internalize them.
170  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
171  // linker can drop those symbols. If this pass is running as part of LTO,
172  // one might think that it could just drop llvm.compiler.used. The problem
173  // is that even in LTO llvm doesn't see every reference. For example,
174  // we don't see references from function local inline assembly. To be
175  // conservative, we internalize symbols in llvm.compiler.used, but we
176  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
177  for (GlobalValue *V : Used) {
178  AlwaysPreserved.insert(V->getName());
179  }
180 
181  // Mark all functions not in the api as internal.
182  for (Function &I : M) {
183  if (!maybeInternalize(I, ExternalComdats))
184  continue;
185  Changed = true;
186 
187  if (ExternalNode)
188  // Remove a callgraph edge from the external node to this function.
189  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
190 
191  ++NumFunctions;
192  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
193  }
194 
195  // Never internalize the llvm.used symbol. It is used to implement
196  // attribute((used)).
197  // FIXME: Shouldn't this just filter on llvm.metadata section??
198  AlwaysPreserved.insert("llvm.used");
199  AlwaysPreserved.insert("llvm.compiler.used");
200 
201  // Never internalize anchors used by the machine module info, else the info
202  // won't find them. (see MachineModuleInfo.)
203  AlwaysPreserved.insert("llvm.global_ctors");
204  AlwaysPreserved.insert("llvm.global_dtors");
205  AlwaysPreserved.insert("llvm.global.annotations");
206 
207  // Never internalize symbols code-gen inserts.
208  // FIXME: We should probably add this (and the __stack_chk_guard) via some
209  // type of call-back in CodeGen.
210  AlwaysPreserved.insert("__stack_chk_fail");
211  AlwaysPreserved.insert("__stack_chk_guard");
212 
213  // Mark all global variables with initializers that are not in the api as
214  // internal as well.
215  for (auto &GV : M.globals()) {
216  if (!maybeInternalize(GV, ExternalComdats))
217  continue;
218  Changed = true;
219 
220  ++NumGlobals;
221  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
222  }
223 
224  // Mark all aliases that are not in the api as internal as well.
225  for (auto &GA : M.aliases()) {
226  if (!maybeInternalize(GA, ExternalComdats))
227  continue;
228  Changed = true;
229 
230  ++NumAliases;
231  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
232  }
233 
234  return Changed;
235 }
236 
237 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
238 
241  return PreservedAnalyses::all();
242 
245  return PA;
246 }
247 
248 namespace {
249 class InternalizeLegacyPass : public ModulePass {
250  // Client supplied callback to control wheter a symbol must be preserved.
251  std::function<bool(const GlobalValue &)> MustPreserveGV;
252 
253 public:
254  static char ID; // Pass identification, replacement for typeid
255 
256  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
257 
258  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
259  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
261  }
262 
263  bool runOnModule(Module &M) override {
264  if (skipModule(M))
265  return false;
266 
267  CallGraphWrapperPass *CGPass =
268  getAnalysisIfAvailable<CallGraphWrapperPass>();
269  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
270  return internalizeModule(M, MustPreserveGV, CG);
271  }
272 
273  void getAnalysisUsage(AnalysisUsage &AU) const override {
274  AU.setPreservesCFG();
276  }
277 };
278 }
279 
281 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
282  "Internalize Global Symbols", false, false)
283 
285  return new InternalizeLegacyPass();
286 }
287 
289  std::function<bool(const GlobalValue &)> MustPreserveGV) {
290  return new InternalizeLegacyPass(std::move(MustPreserveGV));
291 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
MemoryBuffer.h
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CallGraph::getExternalCallingNode
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:128
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:305
llvm::line_iterator
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:33
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function
Definition: Function.h:61
Pass.h
llvm::CallGraphWrapperPass::getCallGraph
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:348
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:169
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::InternalizePass::internalizeModule
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
Definition: Internalize.cpp:150
llvm::GlobalAlias
Definition: GlobalAlias.h:27
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
Module.h
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:33
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
Internalize.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:728
INITIALIZE_PASS
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
Definition: Internalize.cpp:281
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:246
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InternalizePass::InternalizePass
InternalizePass()
Definition: Internalize.cpp:237
LineIterator.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
SmallPtrSet.h
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Comdat
Definition: Comdat.h:31
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1422
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:432
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
IPO.h
GlobalStatus.h
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::CallGraphWrapperPass
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:337
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
APIFile
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::internalizeModule
bool internalizeModule(Module &TheModule, std::function< bool(const GlobalValue &)> MustPreserveGV, CallGraph *CG=nullptr)
Helper function to internalize functions and variables in a Module.
Definition: Internalize.h:70
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::InternalizePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: Internalize.cpp:239
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
StringSet.h
llvm::CallGraphNode::removeOneAbstractEdgeTo
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function.
Definition: CallGraph.cpp:246
std
Definition: BitVector.h:838
llvm::GlobalValue::hasDLLExportStorageClass
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:262
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::GlobalValue::getComdat
const Comdat * getComdat() const
Definition: Globals.cpp:172
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:804
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:288
llvm::cl::value_desc
Definition: CommandLine.h:424
llvm::StringMap::count
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:246
llvm::initializeInternalizeLegacyPassPass
void initializeInternalizeLegacyPassPass(PassRegistry &)
CallGraph.h
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
InitializePasses.h
Debug.h
APIList
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::cl::list
Definition: CommandLine.h:1630