LLVM  16.0.0git
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass loops over all of the functions and variables in the input module.
10 // If the function or variable does not need to be preserved according to the
11 // client supplied callback, it is marked as internal.
12 //
13 // This transformation would not be legal in a regular compilation, but it gets
14 // extra information from the linker about what is safe.
15 //
16 // For example: Internalizing a function with external linkage. Only if we are
17 // told it is only used from within this module, it is safe to do it.
18 //
19 //===----------------------------------------------------------------------===//
20 
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/ADT/Triple.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
35 #include "llvm/Transforms/IPO.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "internalize"
39 
40 STATISTIC(NumAliases, "Number of aliases internalized");
41 STATISTIC(NumFunctions, "Number of functions internalized");
42 STATISTIC(NumGlobals, "Number of global vars internalized");
43 
44 // APIFile - A file which contains a list of symbol glob patterns that should
45 // not be marked external.
47  APIFile("internalize-public-api-file", cl::value_desc("filename"),
48  cl::desc("A file containing list of symbol names to preserve"));
49 
50 // APIList - A list of symbol glob patterns that should not be marked internal.
52  APIList("internalize-public-api-list", cl::value_desc("list"),
53  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
54 
55 namespace {
56 // Helper to load an API list to preserve from file and expose it as a functor
57 // for internalization.
58 class PreserveAPIList {
59 public:
60  PreserveAPIList() {
61  if (!APIFile.empty())
62  LoadFile(APIFile);
63  for (StringRef Pattern : APIList)
64  addGlob(Pattern);
65  }
66 
67  bool operator()(const GlobalValue &GV) {
68  return llvm::any_of(
69  ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); });
70  }
71 
72 private:
73  // Contains the set of symbols loaded from file
74  SmallVector<GlobPattern> ExternalNames;
75 
76  void addGlob(StringRef Pattern) {
77  auto GlobOrErr = GlobPattern::create(Pattern);
78  if (!GlobOrErr) {
79  errs() << "WARNING: when loading pattern: '"
80  << toString(GlobOrErr.takeError()) << "' ignoring";
81  return;
82  }
83  ExternalNames.emplace_back(std::move(*GlobOrErr));
84  }
85 
86  void LoadFile(StringRef Filename) {
87  // Load the APIFile...
89  MemoryBuffer::getFile(Filename);
90  if (!BufOrErr) {
91  errs() << "WARNING: Internalize couldn't load file '" << Filename
92  << "'! Continuing as if it's empty.\n";
93  return; // Just continue as if the file were empty
94  }
95  Buf = std::move(*BufOrErr);
96  for (line_iterator I(*Buf, true), E; I != E; ++I)
97  addGlob(*I);
98  }
99 
100  std::shared_ptr<MemoryBuffer> Buf;
101 };
102 } // end anonymous namespace
103 
104 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
105  // Function must be defined here
106  if (GV.isDeclaration())
107  return true;
108 
109  // Available externally is really just a "declaration with a body".
111  return true;
112 
113  // Assume that dllexported symbols are referenced elsewhere
114  if (GV.hasDLLExportStorageClass())
115  return true;
116 
117  // As the name suggests, externally initialized variables need preserving as
118  // they would be initialized elsewhere externally.
119  if (const auto *G = dyn_cast<GlobalVariable>(&GV))
120  if (G->isExternallyInitialized())
121  return true;
122 
123  // Already local, has nothing to do.
124  if (GV.hasLocalLinkage())
125  return false;
126 
127  // Check some special cases
128  if (AlwaysPreserved.count(GV.getName()))
129  return true;
130 
131  return MustPreserveGV(GV);
132 }
133 
134 bool InternalizePass::maybeInternalize(
136  SmallString<0> ComdatName;
137  if (Comdat *C = GV.getComdat()) {
138  // For GlobalAlias, C is the aliasee object's comdat which may have been
139  // redirected. So ComdatMap may not contain C.
140  if (ComdatMap.lookup(C).External)
141  return false;
142 
143  if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
144  // If a comdat with one member is not externally visible, we can drop it.
145  // Otherwise, the comdat can be used to establish dependencies among the
146  // group of sections. Thus we have to keep the comdat but switch it to
147  // nodeduplicate.
148  // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
149  // nodeduplicate.
150  ComdatInfo &Info = ComdatMap.find(C)->second;
151  if (Info.Size == 1)
152  GO->setComdat(nullptr);
153  else if (!IsWasm)
154  C->setSelectionKind(Comdat::NoDeduplicate);
155  }
156 
157  if (GV.hasLocalLinkage())
158  return false;
159  } else {
160  if (GV.hasLocalLinkage())
161  return false;
162 
163  if (shouldPreserveGV(GV))
164  return false;
165  }
166 
169  return true;
170 }
171 
172 // If GV is part of a comdat and is externally visible, update the comdat size
173 // and keep track of its comdat so that we don't internalize any of its members.
174 void InternalizePass::checkComdat(
176  Comdat *C = GV.getComdat();
177  if (!C)
178  return;
179 
180  ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
181  ++Info.Size;
182  if (shouldPreserveGV(GV))
183  Info.External = true;
184 }
185 
187  bool Changed = false;
188  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
189 
191  collectUsedGlobalVariables(M, Used, false);
192 
193  // Collect comdat size and visiblity information for the module.
195  if (!M.getComdatSymbolTable().empty()) {
196  for (Function &F : M)
197  checkComdat(F, ComdatMap);
198  for (GlobalVariable &GV : M.globals())
199  checkComdat(GV, ComdatMap);
200  for (GlobalAlias &GA : M.aliases())
201  checkComdat(GA, ComdatMap);
202  }
203 
204  // We must assume that globals in llvm.used have a reference that not even
205  // the linker can see, so we don't internalize them.
206  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
207  // linker can drop those symbols. If this pass is running as part of LTO,
208  // one might think that it could just drop llvm.compiler.used. The problem
209  // is that even in LTO llvm doesn't see every reference. For example,
210  // we don't see references from function local inline assembly. To be
211  // conservative, we internalize symbols in llvm.compiler.used, but we
212  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
213  for (GlobalValue *V : Used) {
214  AlwaysPreserved.insert(V->getName());
215  }
216 
217  // Never internalize the llvm.used symbol. It is used to implement
218  // attribute((used)).
219  // FIXME: Shouldn't this just filter on llvm.metadata section??
220  AlwaysPreserved.insert("llvm.used");
221  AlwaysPreserved.insert("llvm.compiler.used");
222 
223  // Never internalize anchors used by the machine module info, else the info
224  // won't find them. (see MachineModuleInfo.)
225  AlwaysPreserved.insert("llvm.global_ctors");
226  AlwaysPreserved.insert("llvm.global_dtors");
227  AlwaysPreserved.insert("llvm.global.annotations");
228 
229  // Never internalize symbols code-gen inserts.
230  // FIXME: We should probably add this (and the __stack_chk_guard) via some
231  // type of call-back in CodeGen.
232  AlwaysPreserved.insert("__stack_chk_fail");
233  if (Triple(M.getTargetTriple()).isOSAIX())
234  AlwaysPreserved.insert("__ssp_canary_word");
235  else
236  AlwaysPreserved.insert("__stack_chk_guard");
237 
238  // Mark all functions not in the api as internal.
239  IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
240  for (Function &I : M) {
241  if (!maybeInternalize(I, ComdatMap))
242  continue;
243  Changed = true;
244 
245  if (ExternalNode)
246  // Remove a callgraph edge from the external node to this function.
247  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
248 
249  ++NumFunctions;
250  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
251  }
252 
253  // Mark all global variables with initializers that are not in the api as
254  // internal as well.
255  for (auto &GV : M.globals()) {
256  if (!maybeInternalize(GV, ComdatMap))
257  continue;
258  Changed = true;
259 
260  ++NumGlobals;
261  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
262  }
263 
264  // Mark all aliases that are not in the api as internal as well.
265  for (auto &GA : M.aliases()) {
266  if (!maybeInternalize(GA, ComdatMap))
267  continue;
268  Changed = true;
269 
270  ++NumAliases;
271  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
272  }
273 
274  return Changed;
275 }
276 
277 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
278 
281  return PreservedAnalyses::all();
282 
285  return PA;
286 }
287 
288 namespace {
289 class InternalizeLegacyPass : public ModulePass {
290  // Client supplied callback to control wheter a symbol must be preserved.
291  std::function<bool(const GlobalValue &)> MustPreserveGV;
292 
293 public:
294  static char ID; // Pass identification, replacement for typeid
295 
296  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
297 
298  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
299  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
301  }
302 
303  bool runOnModule(Module &M) override {
304  if (skipModule(M))
305  return false;
306 
307  CallGraphWrapperPass *CGPass =
308  getAnalysisIfAvailable<CallGraphWrapperPass>();
309  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
310  return internalizeModule(M, MustPreserveGV, CG);
311  }
312 
313  void getAnalysisUsage(AnalysisUsage &AU) const override {
314  AU.setPreservesCFG();
316  }
317 };
318 }
319 
321 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
322  "Internalize Global Symbols", false, false)
323 
325  return new InternalizeLegacyPass();
326 }
327 
329  std::function<bool(const GlobalValue &)> MustPreserveGV) {
330  return new InternalizeLegacyPass(std::move(MustPreserveGV));
331 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
MemoryBuffer.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CallGraph::getExternalCallingNode
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:127
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:304
llvm::line_iterator
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:33
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
llvm::Function
Definition: Function.h:60
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::CallGraphWrapperPass::getCallGraph
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:358
llvm::Comdat::NoDeduplicate
@ NoDeduplicate
No deduplication is performed.
Definition: Comdat.h:39
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:165
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::InternalizePass::internalizeModule
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
Definition: Internalize.cpp:186
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
llvm::logicalview::LVAttributeKind::Filename
@ Filename
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Module.h
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::GlobPattern
Definition: GlobPattern.h:29
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:34
llvm::Triple::isOSBinFormatWasm
bool isOSBinFormatWasm() const
Tests whether the OS uses the Wasm binary format.
Definition: Triple.h:691
Internalize.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:800
INITIALIZE_PASS
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
Definition: Internalize.cpp:321
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:266
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InternalizePass::InternalizePass
InternalizePass()
Definition: Internalize.cpp:277
LineIterator.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Comdat
Definition: Comdat.h:33
llvm::Triple::isOSAIX
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:668
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SmallString< 0 >
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::cl::opt
Definition: CommandLine.h:1411
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:507
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
IPO.h
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
llvm::CallGraphWrapperPass
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:347
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:532
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1861
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:523
APIFile
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::internalizeModule
bool internalizeModule(Module &TheModule, std::function< bool(const GlobalValue &)> MustPreserveGV, CallGraph *CG=nullptr)
Helper function to internalize functions and variables in a Module.
Definition: Internalize.h:79
Triple.h
llvm::GlobPattern::create
static Expected< GlobPattern > create(StringRef Pat)
Definition: GlobPattern.cpp:107
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::InternalizePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: Internalize.cpp:279
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, Optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:248
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
GlobPattern.h
StringSet.h
llvm::CallGraphNode::removeOneAbstractEdgeTo
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function.
Definition: CallGraph.cpp:245
std
Definition: BitVector.h:851
llvm::GlobalValue::hasDLLExportStorageClass
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:277
llvm::toString
const char * toString(DWARFSectionKind Kind)
Definition: DWARFUnitIndex.h:67
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::GlobalValue::getComdat
const Comdat * getComdat() const
Definition: Globals.cpp:176
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:793
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:328
llvm::cl::value_desc
Definition: CommandLine.h:421
llvm::StringMap::count
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:245
llvm::initializeInternalizeLegacyPassPass
void initializeInternalizeLegacyPassPass(PassRegistry &)
CallGraph.h
llvm::Pattern
Definition: FileCheckImpl.h:614
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::GlobPattern::match
bool match(StringRef S) const
Definition: GlobPattern.cpp:142
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:250
InitializePasses.h
Debug.h
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
APIList
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
llvm::cl::list
Definition: CommandLine.h:1647