LLVM 19.0.0git
SymbolRewriter.cpp
Go to the documentation of this file.
1//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
10// existing code. It is implemented as a compiler pass and is configured via a
11// YAML configuration file.
12//
13// The YAML configuration file format is as follows:
14//
15// RewriteMapFile := RewriteDescriptors
16// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
17// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
18// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
19// RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
20// RewriteDescriptorType := Identifier
21// FieldIdentifier := Identifier
22// FieldValue := Identifier
23// Identifier := [0-9a-zA-Z]+
24//
25// Currently, the following descriptor types are supported:
26//
27// - function: (function rewriting)
28// + Source (original name of the function)
29// + Target (explicit transformation)
30// + Transform (pattern transformation)
31// + Naked (boolean, whether the function is undecorated)
32// - global variable: (external linkage global variable rewriting)
33// + Source (original name of externally visible variable)
34// + Target (explicit transformation)
35// + Transform (pattern transformation)
36// - global alias: (global alias rewriting)
37// + Source (original name of the aliased name)
38// + Target (explicit transformation)
39// + Transform (pattern transformation)
40//
41// Note that source and exactly one of [Target, Transform] must be provided
42//
43// New rewrite descriptors can be created. Addding a new rewrite descriptor
44// involves:
45//
46// a) extended the rewrite descriptor kind enumeration
47// (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
48// b) implementing the new descriptor
49// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
50// c) extending the rewrite map parser
51// (<anonymous>::RewriteMapParser::parseEntry)
52//
53// Specify to rewrite the symbols using the `-rewrite-symbols` option, and
54// specify the map file to use for the rewriting via the `-rewrite-map-file`
55// option.
56//
57//===----------------------------------------------------------------------===//
58
61#include "llvm/ADT/StringRef.h"
62#include "llvm/ADT/ilist.h"
64#include "llvm/IR/Comdat.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/GlobalAlias.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Value.h"
76#include "llvm/Support/Regex.h"
79#include <memory>
80#include <string>
81#include <vector>
82
83using namespace llvm;
84using namespace SymbolRewriter;
85
86#define DEBUG_TYPE "symbol-rewriter"
87
88static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
89 cl::desc("Symbol Rewrite Map"),
90 cl::value_desc("filename"),
92
93static void rewriteComdat(Module &M, GlobalObject *GO,
94 const std::string &Source,
95 const std::string &Target) {
96 if (Comdat *CD = GO->getComdat()) {
97 auto &Comdats = M.getComdatSymbolTable();
98
99 Comdat *C = M.getOrInsertComdat(Target);
100 C->setSelectionKind(CD->getSelectionKind());
101 GO->setComdat(C);
102
103 Comdats.erase(Comdats.find(Source));
104 }
105}
106
107namespace {
108
109template <RewriteDescriptor::Type DT, typename ValueType,
110 ValueType *(Module::*Get)(StringRef) const>
111class ExplicitRewriteDescriptor : public RewriteDescriptor {
112public:
113 const std::string Source;
114 const std::string Target;
115
116 ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
117 : RewriteDescriptor(DT),
118 Source(std::string(Naked ? StringRef("\01" + S.str()) : S)),
119 Target(std::string(T)) {}
120
121 bool performOnModule(Module &M) override;
122
123 static bool classof(const RewriteDescriptor *RD) {
124 return RD->getType() == DT;
125 }
126};
127
128} // end anonymous namespace
129
130template <RewriteDescriptor::Type DT, typename ValueType,
131 ValueType *(Module::*Get)(StringRef) const>
132bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
133 bool Changed = false;
134 if (ValueType *S = (M.*Get)(Source)) {
135 if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
136 rewriteComdat(M, GO, Source, Target);
137
138 if (Value *T = (M.*Get)(Target))
139 S->setValueName(T->getValueName());
140 else
141 S->setName(Target);
142
143 Changed = true;
144 }
145 return Changed;
146}
147
148namespace {
149
150template <RewriteDescriptor::Type DT, typename ValueType,
151 ValueType *(Module::*Get)(StringRef) const,
153 (Module::*Iterator)()>
154class PatternRewriteDescriptor : public RewriteDescriptor {
155public:
156 const std::string Pattern;
157 const std::string Transform;
158
159 PatternRewriteDescriptor(StringRef P, StringRef T)
161 Transform(std::string(T)) {}
162
163 bool performOnModule(Module &M) override;
164
165 static bool classof(const RewriteDescriptor *RD) {
166 return RD->getType() == DT;
167 }
168};
169
170} // end anonymous namespace
171
172template <RewriteDescriptor::Type DT, typename ValueType,
173 ValueType *(Module::*Get)(StringRef) const,
175 (Module::*Iterator)()>
176bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
177performOnModule(Module &M) {
178 bool Changed = false;
179 for (auto &C : (M.*Iterator)()) {
180 std::string Error;
181
182 std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
183 if (!Error.empty())
184 report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " +
185 M.getModuleIdentifier() + ": " + Error);
186
187 if (C.getName() == Name)
188 continue;
189
190 if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
191 rewriteComdat(M, GO, std::string(C.getName()), Name);
192
193 if (Value *V = (M.*Get)(Name))
194 C.setValueName(V->getValueName());
195 else
196 C.setName(Name);
197
198 Changed = true;
199 }
200 return Changed;
201}
202
203namespace {
204
205/// Represents a rewrite for an explicitly named (function) symbol. Both the
206/// source function name and target function name of the transformation are
207/// explicitly spelt out.
208using ExplicitRewriteFunctionDescriptor =
209 ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
211
212/// Represents a rewrite for an explicitly named (global variable) symbol. Both
213/// the source variable name and target variable name are spelt out. This
214/// applies only to module level variables.
215using ExplicitRewriteGlobalVariableDescriptor =
216 ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
218
219/// Represents a rewrite for an explicitly named global alias. Both the source
220/// and target name are explicitly spelt out.
221using ExplicitRewriteNamedAliasDescriptor =
222 ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
224
225/// Represents a rewrite for a regular expression based pattern for functions.
226/// A pattern for the function name is provided and a transformation for that
227/// pattern to determine the target function name create the rewrite rule.
228using PatternRewriteFunctionDescriptor =
229 PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
231
232/// Represents a rewrite for a global variable based upon a matching pattern.
233/// Each global variable matching the provided pattern will be transformed as
234/// described in the transformation pattern for the target. Applies only to
235/// module level variables.
236using PatternRewriteGlobalVariableDescriptor =
237 PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
240
241/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
242/// aliases which match a given pattern. The provided transformation will be
243/// applied to each of the matching names.
244using PatternRewriteNamedAliasDescriptor =
245 PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
247
248} // end anonymous namespace
249
250bool RewriteMapParser::parse(const std::string &MapFile,
253 MemoryBuffer::getFile(MapFile);
254
255 if (!Mapping)
256 report_fatal_error(Twine("unable to read rewrite map '") + MapFile +
257 "': " + Mapping.getError().message());
258
259 if (!parse(*Mapping, DL))
260 report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'");
261
262 return true;
263}
264
265bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
267 SourceMgr SM;
268 yaml::Stream YS(MapFile->getBuffer(), SM);
269
270 for (auto &Document : YS) {
271 yaml::MappingNode *DescriptorList;
272
273 // ignore empty documents
274 if (isa<yaml::NullNode>(Document.getRoot()))
275 continue;
276
277 DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
278 if (!DescriptorList) {
279 YS.printError(Document.getRoot(), "DescriptorList node must be a map");
280 return false;
281 }
282
283 for (auto &Descriptor : *DescriptorList)
284 if (!parseEntry(YS, Descriptor, DL))
285 return false;
286 }
287
288 return true;
289}
290
291bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
295 SmallString<32> KeyStorage;
296 StringRef RewriteType;
297
298 Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
299 if (!Key) {
300 YS.printError(Entry.getKey(), "rewrite type must be a scalar");
301 return false;
302 }
303
304 Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
305 if (!Value) {
306 YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
307 return false;
308 }
309
310 RewriteType = Key->getValue(KeyStorage);
311 if (RewriteType.equals("function"))
312 return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
313 else if (RewriteType.equals("global variable"))
314 return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
315 else if (RewriteType.equals("global alias"))
316 return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
317
318 YS.printError(Entry.getKey(), "unknown rewrite type");
319 return false;
320}
321
322bool RewriteMapParser::
323parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
324 yaml::MappingNode *Descriptor,
326 bool Naked = false;
327 std::string Source;
328 std::string Target;
329 std::string Transform;
330
331 for (auto &Field : *Descriptor) {
334 SmallString<32> KeyStorage;
335 SmallString<32> ValueStorage;
336 StringRef KeyValue;
337
338 Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
339 if (!Key) {
340 YS.printError(Field.getKey(), "descriptor key must be a scalar");
341 return false;
342 }
343
344 Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
345 if (!Value) {
346 YS.printError(Field.getValue(), "descriptor value must be a scalar");
347 return false;
348 }
349
350 KeyValue = Key->getValue(KeyStorage);
351 if (KeyValue.equals("source")) {
352 std::string Error;
353
354 Source = std::string(Value->getValue(ValueStorage));
355 if (!Regex(Source).isValid(Error)) {
356 YS.printError(Field.getKey(), "invalid regex: " + Error);
357 return false;
358 }
359 } else if (KeyValue.equals("target")) {
360 Target = std::string(Value->getValue(ValueStorage));
361 } else if (KeyValue.equals("transform")) {
362 Transform = std::string(Value->getValue(ValueStorage));
363 } else if (KeyValue.equals("naked")) {
364 std::string Undecorated;
365
366 Undecorated = std::string(Value->getValue(ValueStorage));
367 Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
368 } else {
369 YS.printError(Field.getKey(), "unknown key for function");
370 return false;
371 }
372 }
373
374 if (Transform.empty() == Target.empty()) {
375 YS.printError(Descriptor,
376 "exactly one of transform or target must be specified");
377 return false;
378 }
379
380 // TODO see if there is a more elegant solution to selecting the rewrite
381 // descriptor type
382 if (!Target.empty())
383 DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
384 Source, Target, Naked));
385 else
386 DL->push_back(
387 std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
388
389 return true;
390}
391
392bool RewriteMapParser::
393parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
394 yaml::MappingNode *Descriptor,
396 std::string Source;
397 std::string Target;
398 std::string Transform;
399
400 for (auto &Field : *Descriptor) {
403 SmallString<32> KeyStorage;
404 SmallString<32> ValueStorage;
405 StringRef KeyValue;
406
407 Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
408 if (!Key) {
409 YS.printError(Field.getKey(), "descriptor Key must be a scalar");
410 return false;
411 }
412
413 Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
414 if (!Value) {
415 YS.printError(Field.getValue(), "descriptor value must be a scalar");
416 return false;
417 }
418
419 KeyValue = Key->getValue(KeyStorage);
420 if (KeyValue.equals("source")) {
421 std::string Error;
422
423 Source = std::string(Value->getValue(ValueStorage));
424 if (!Regex(Source).isValid(Error)) {
425 YS.printError(Field.getKey(), "invalid regex: " + Error);
426 return false;
427 }
428 } else if (KeyValue.equals("target")) {
429 Target = std::string(Value->getValue(ValueStorage));
430 } else if (KeyValue.equals("transform")) {
431 Transform = std::string(Value->getValue(ValueStorage));
432 } else {
433 YS.printError(Field.getKey(), "unknown Key for Global Variable");
434 return false;
435 }
436 }
437
438 if (Transform.empty() == Target.empty()) {
439 YS.printError(Descriptor,
440 "exactly one of transform or target must be specified");
441 return false;
442 }
443
444 if (!Target.empty())
445 DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
446 Source, Target,
447 /*Naked*/ false));
448 else
449 DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
450 Source, Transform));
451
452 return true;
453}
454
455bool RewriteMapParser::
456parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
457 yaml::MappingNode *Descriptor,
459 std::string Source;
460 std::string Target;
461 std::string Transform;
462
463 for (auto &Field : *Descriptor) {
466 SmallString<32> KeyStorage;
467 SmallString<32> ValueStorage;
468 StringRef KeyValue;
469
470 Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
471 if (!Key) {
472 YS.printError(Field.getKey(), "descriptor key must be a scalar");
473 return false;
474 }
475
476 Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
477 if (!Value) {
478 YS.printError(Field.getValue(), "descriptor value must be a scalar");
479 return false;
480 }
481
482 KeyValue = Key->getValue(KeyStorage);
483 if (KeyValue.equals("source")) {
484 std::string Error;
485
486 Source = std::string(Value->getValue(ValueStorage));
487 if (!Regex(Source).isValid(Error)) {
488 YS.printError(Field.getKey(), "invalid regex: " + Error);
489 return false;
490 }
491 } else if (KeyValue.equals("target")) {
492 Target = std::string(Value->getValue(ValueStorage));
493 } else if (KeyValue.equals("transform")) {
494 Transform = std::string(Value->getValue(ValueStorage));
495 } else {
496 YS.printError(Field.getKey(), "unknown key for Global Alias");
497 return false;
498 }
499 }
500
501 if (Transform.empty() == Target.empty()) {
502 YS.printError(Descriptor,
503 "exactly one of transform or target must be specified");
504 return false;
505 }
506
507 if (!Target.empty())
508 DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
509 Source, Target,
510 /*Naked*/ false));
511 else
512 DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
513 Source, Transform));
514
515 return true;
516}
517
519 if (!runImpl(M))
520 return PreservedAnalyses::all();
521
523}
524
526 bool Changed;
527
528 Changed = false;
529 for (auto &Descriptor : Descriptors)
530 Changed |= Descriptor->performOnModule(M);
531
532 return Changed;
533}
534
535void RewriteSymbolPass::loadAndParseMapFiles() {
536 const std::vector<std::string> MapFiles(RewriteMapFiles);
538
539 for (const auto &MapFile : MapFiles)
540 Parser.parse(MapFile, &Descriptors);
541}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
std::string Name
Provides ErrorOr<T> smart pointer.
static bool runImpl(Function &F, const TargetLowering &TLI)
Module.h This file contains the declarations for the Module class.
#define P(N)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallString class.
static cl::list< std::string > RewriteMapFiles("rewrite-map-file", cl::desc("Symbol Rewrite Map"), cl::value_desc("filename"), cl::Hidden)
static void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, const std::string &Target)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
Represents either an error or a value T.
Definition: ErrorOr.h:56
std::error_code getError() const
Definition: ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
void setComdat(Comdat *C)
Definition: Globals.cpp:197
const Comdat * getComdat() const
Definition: GlobalObject.h:129
void push_back(MachineInstr *MI)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:191
iterator_range< iterator > functions()
Definition: Module.h:721
iterator_range< alias_iterator > aliases()
Definition: Module.h:739
iterator_range< global_iterator > globals()
Definition: Module.h:699
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
Definition: Module.h:446
GlobalAlias * getNamedAlias(StringRef Name) const
Return the global alias in the module with the specified name, of arbitrary type.
Definition: Module.cpp:249
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
std::string sub(StringRef Repl, StringRef String, std::string *Error=nullptr) const
sub - Return the result of replacing the first match of the regex in String with the Repl string.
Definition: Regex.cpp:137
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition: SourceMgr.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
std::string lower() const
Definition: StringRef.cpp:111
The basic entity representing a rewrite operation.
virtual bool performOnModule(Module &M)=0
bool parse(const std::string &MapFile, RewriteDescriptorList *Descriptors)
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
A range adaptor for a pair of iterators.
A key and value pair.
Definition: YAMLParser.h:290
Represents a YAML map created from either a block map for a flow map.
Definition: YAMLParser.h:419
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
Definition: YAMLParser.h:212
This class represents a YAML stream potentially containing multiple documents.
Definition: YAMLParser.h:86
void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
This file defines classes to implement an intrusive doubly linked list class (i.e.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
Key
PAL metadata keys.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::list< std::unique_ptr< RewriteDescriptor > > RewriteDescriptorList
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
Definition: regcomp.c:192