LLVM  14.0.0git
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass loops over all of the functions and variables in the input module.
10 // If the function or variable does not need to be preserved according to the
11 // client supplied callback, it is marked as internal.
12 //
13 // This transformation would not be legal in a regular compilation, but it gets
14 // extra information from the linker about what is safe.
15 //
16 // For example: Internalizing a function with external linkage. Only if we are
17 // told it is only used from within this module, it is safe to do it.
18 //
19 //===----------------------------------------------------------------------===//
20 
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSet.h"
25 #include "llvm/ADT/Triple.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
31 #include "llvm/Support/Debug.h"
35 #include "llvm/Transforms/IPO.h"
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "internalize"
41 
42 STATISTIC(NumAliases, "Number of aliases internalized");
43 STATISTIC(NumFunctions, "Number of functions internalized");
44 STATISTIC(NumGlobals, "Number of global vars internalized");
45 
46 // APIFile - A file which contains a list of symbols that should not be marked
47 // external.
49  APIFile("internalize-public-api-file", cl::value_desc("filename"),
50  cl::desc("A file containing list of symbol names to preserve"));
51 
52 // APIList - A list of symbols that should not be marked internal.
54  APIList("internalize-public-api-list", cl::value_desc("list"),
55  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
56 
57 namespace {
58 // Helper to load an API list to preserve from file and expose it as a functor
59 // for internalization.
60 class PreserveAPIList {
61 public:
62  PreserveAPIList() {
63  if (!APIFile.empty())
64  LoadFile(APIFile);
65  ExternalNames.insert(APIList.begin(), APIList.end());
66  }
67 
68  bool operator()(const GlobalValue &GV) {
69  return ExternalNames.count(GV.getName());
70  }
71 
72 private:
73  // Contains the set of symbols loaded from file
74  StringSet<> ExternalNames;
75 
76  void LoadFile(StringRef Filename) {
77  // Load the APIFile...
79  MemoryBuffer::getFile(Filename);
80  if (!Buf) {
81  errs() << "WARNING: Internalize couldn't load file '" << Filename
82  << "'! Continuing as if it's empty.\n";
83  return; // Just continue as if the file were empty
84  }
85  for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
86  ExternalNames.insert(*I);
87  }
88 };
89 } // end anonymous namespace
90 
91 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
92  // Function must be defined here
93  if (GV.isDeclaration())
94  return true;
95 
96  // Available externally is really just a "declaration with a body".
98  return true;
99 
100  // Assume that dllexported symbols are referenced elsewhere
101  if (GV.hasDLLExportStorageClass())
102  return true;
103 
104  // As the name suggests, externally initialized variables need preserving as
105  // they would be initialized elsewhere externally.
106  if (const auto *G = dyn_cast<GlobalVariable>(&GV))
107  if (G->isExternallyInitialized())
108  return true;
109 
110  // Already local, has nothing to do.
111  if (GV.hasLocalLinkage())
112  return false;
113 
114  // Check some special cases
115  if (AlwaysPreserved.count(GV.getName()))
116  return true;
117 
118  return MustPreserveGV(GV);
119 }
120 
121 bool InternalizePass::maybeInternalize(
123  SmallString<0> ComdatName;
124  if (Comdat *C = GV.getComdat()) {
125  // For GlobalAlias, C is the aliasee object's comdat which may have been
126  // redirected. So ComdatMap may not contain C.
127  if (ComdatMap.lookup(C).External)
128  return false;
129 
130  if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
131  // If a comdat with one member is not externally visible, we can drop it.
132  // Otherwise, the comdat can be used to establish dependencies among the
133  // group of sections. Thus we have to keep the comdat but switch it to
134  // nodeduplicate.
135  // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
136  // nodeduplicate.
137  ComdatInfo &Info = ComdatMap.find(C)->second;
138  if (Info.Size == 1)
139  GO->setComdat(nullptr);
140  else if (!IsWasm)
141  C->setSelectionKind(Comdat::NoDeduplicate);
142  }
143 
144  if (GV.hasLocalLinkage())
145  return false;
146  } else {
147  if (GV.hasLocalLinkage())
148  return false;
149 
150  if (shouldPreserveGV(GV))
151  return false;
152  }
153 
156  return true;
157 }
158 
159 // If GV is part of a comdat and is externally visible, update the comdat size
160 // and keep track of its comdat so that we don't internalize any of its members.
161 void InternalizePass::checkComdat(
163  Comdat *C = GV.getComdat();
164  if (!C)
165  return;
166 
167  ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
168  ++Info.Size;
169  if (shouldPreserveGV(GV))
170  Info.External = true;
171 }
172 
174  bool Changed = false;
175  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
176 
178  collectUsedGlobalVariables(M, Used, false);
179 
180  // Collect comdat size and visiblity information for the module.
182  if (!M.getComdatSymbolTable().empty()) {
183  for (Function &F : M)
184  checkComdat(F, ComdatMap);
185  for (GlobalVariable &GV : M.globals())
186  checkComdat(GV, ComdatMap);
187  for (GlobalAlias &GA : M.aliases())
188  checkComdat(GA, ComdatMap);
189  }
190 
191  // We must assume that globals in llvm.used have a reference that not even
192  // the linker can see, so we don't internalize them.
193  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
194  // linker can drop those symbols. If this pass is running as part of LTO,
195  // one might think that it could just drop llvm.compiler.used. The problem
196  // is that even in LTO llvm doesn't see every reference. For example,
197  // we don't see references from function local inline assembly. To be
198  // conservative, we internalize symbols in llvm.compiler.used, but we
199  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
200  for (GlobalValue *V : Used) {
201  AlwaysPreserved.insert(V->getName());
202  }
203 
204  // Mark all functions not in the api as internal.
205  IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
206  for (Function &I : M) {
207  if (!maybeInternalize(I, ComdatMap))
208  continue;
209  Changed = true;
210 
211  if (ExternalNode)
212  // Remove a callgraph edge from the external node to this function.
213  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
214 
215  ++NumFunctions;
216  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
217  }
218 
219  // Never internalize the llvm.used symbol. It is used to implement
220  // attribute((used)).
221  // FIXME: Shouldn't this just filter on llvm.metadata section??
222  AlwaysPreserved.insert("llvm.used");
223  AlwaysPreserved.insert("llvm.compiler.used");
224 
225  // Never internalize anchors used by the machine module info, else the info
226  // won't find them. (see MachineModuleInfo.)
227  AlwaysPreserved.insert("llvm.global_ctors");
228  AlwaysPreserved.insert("llvm.global_dtors");
229  AlwaysPreserved.insert("llvm.global.annotations");
230 
231  // Never internalize symbols code-gen inserts.
232  // FIXME: We should probably add this (and the __stack_chk_guard) via some
233  // type of call-back in CodeGen.
234  AlwaysPreserved.insert("__stack_chk_fail");
235  if (Triple(M.getTargetTriple()).isOSAIX())
236  AlwaysPreserved.insert("__ssp_canary_word");
237  else
238  AlwaysPreserved.insert("__stack_chk_guard");
239 
240  // Mark all global variables with initializers that are not in the api as
241  // internal as well.
242  for (auto &GV : M.globals()) {
243  if (!maybeInternalize(GV, ComdatMap))
244  continue;
245  Changed = true;
246 
247  ++NumGlobals;
248  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
249  }
250 
251  // Mark all aliases that are not in the api as internal as well.
252  for (auto &GA : M.aliases()) {
253  if (!maybeInternalize(GA, ComdatMap))
254  continue;
255  Changed = true;
256 
257  ++NumAliases;
258  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
259  }
260 
261  return Changed;
262 }
263 
264 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
265 
268  return PreservedAnalyses::all();
269 
272  return PA;
273 }
274 
275 namespace {
276 class InternalizeLegacyPass : public ModulePass {
277  // Client supplied callback to control wheter a symbol must be preserved.
278  std::function<bool(const GlobalValue &)> MustPreserveGV;
279 
280 public:
281  static char ID; // Pass identification, replacement for typeid
282 
283  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
284 
285  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
286  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
288  }
289 
290  bool runOnModule(Module &M) override {
291  if (skipModule(M))
292  return false;
293 
294  CallGraphWrapperPass *CGPass =
295  getAnalysisIfAvailable<CallGraphWrapperPass>();
296  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
297  return internalizeModule(M, MustPreserveGV, CG);
298  }
299 
300  void getAnalysisUsage(AnalysisUsage &AU) const override {
301  AU.setPreservesCFG();
303  }
304 };
305 }
306 
308 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
309  "Internalize Global Symbols", false, false)
310 
312  return new InternalizeLegacyPass();
313 }
314 
316  std::function<bool(const GlobalValue &)> MustPreserveGV) {
317  return new InternalizeLegacyPass(std::move(MustPreserveGV));
318 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
MemoryBuffer.h
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CallGraph::getExternalCallingNode
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:128
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:305
llvm::line_iterator
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:33
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function
Definition: Function.h:61
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::CallGraphWrapperPass::getCallGraph
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:348
llvm::Comdat::NoDeduplicate
@ NoDeduplicate
No deduplication is performed.
Definition: Comdat.h:37
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:169
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::InternalizePass::internalizeModule
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
Definition: Internalize.cpp:173
llvm::GlobalAlias
Definition: GlobalAlias.h:27
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
Module.h
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:33
llvm::Triple::isOSBinFormatWasm
bool isOSBinFormatWasm() const
Tests whether the OS uses the Wasm binary format.
Definition: Triple.h:650
Internalize.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:778
INITIALIZE_PASS
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
Definition: Internalize.cpp:308
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:246
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InternalizePass::InternalizePass
InternalizePass()
Definition: Internalize.cpp:264
LineIterator.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
SmallPtrSet.h
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Comdat
Definition: Comdat.h:31
llvm::Triple::isOSAIX
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:627
llvm::SmallString< 0 >
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:432
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
IPO.h
GlobalStatus.h
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
llvm::CallGraphWrapperPass
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:337
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
APIFile
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::internalizeModule
bool internalizeModule(Module &TheModule, std::function< bool(const GlobalValue &)> MustPreserveGV, CallGraph *CG=nullptr)
Helper function to internalize functions and variables in a Module.
Definition: Internalize.h:80
Triple.h
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::InternalizePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: Internalize.cpp:266
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
StringSet.h
llvm::CallGraphNode::removeOneAbstractEdgeTo
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function.
Definition: CallGraph.cpp:246
std
Definition: BitVector.h:838
llvm::GlobalValue::hasDLLExportStorageClass
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:262
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::GlobalValue::getComdat
const Comdat * getComdat() const
Definition: Globals.cpp:172
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:798
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:315
llvm::cl::value_desc
Definition: CommandLine.h:424
llvm::StringMap::count
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:244
llvm::initializeInternalizeLegacyPassPass
void initializeInternalizeLegacyPassPass(PassRegistry &)
CallGraph.h
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
ModuleUtils.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
InitializePasses.h
Debug.h
APIList
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::cl::list
Definition: CommandLine.h:1642