LLVM 23.0.0git
BitcodeReader.h
Go to the documentation of this file.
1//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header defines interfaces to read LLVM bitcode files/streams.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_BITCODE_BITCODEREADER_H
14#define LLVM_BITCODE_BITCODEREADER_H
15
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/GlobalValue.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/Error.h"
25#include <cstdint>
26#include <memory>
27#include <optional>
28#include <string>
29#include <system_error>
30#include <vector>
31namespace llvm {
32
33class LLVMContext;
34class Module;
35class MemoryBuffer;
36class Metadata;
38class Type;
39class Value;
40
41// Callback to override the data layout string of an imported bitcode module.
42// The first argument is the target triple, the second argument the data layout
43// string from the input, or a default string. It will be used if the callback
44// returns std::nullopt.
45typedef std::function<std::optional<std::string>(StringRef, StringRef)>
47
48typedef std::function<Type *(unsigned)> GetTypeByIDTy;
49
50typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy;
51
52typedef std::function<void(Value *, unsigned, GetTypeByIDTy,
55
56typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy,
59
60// These functions are for converting Expected/Error values to
61// ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
62// Remove these functions once no longer needed by the C and libLTO APIs.
63
65 Error Err);
66
67template <typename T>
69 if (!Val)
71 return std::move(*Val);
72}
73
75 std::optional<DataLayoutCallbackFuncTy> DataLayout;
76 /// The ValueType callback is called for every function definition or
77 /// declaration and allows accessing the type information, also behind
78 /// pointers. This can be useful, when the opaque pointer upgrade cleans all
79 /// type information behind pointers.
80 /// The second argument to ValueTypeCallback is the type ID of the
81 /// function, the two passed functions can be used to extract type
82 /// information.
83 std::optional<ValueTypeCallbackTy> ValueType;
84 /// The MDType callback is called for every value in metadata.
85 std::optional<MDTypeCallbackTy> MDType;
86
87 /// If true, do not auto-upgrade debug intrinsic calls (llvm.dbg.*) to
88 /// non-instruction debug records during bitcode read. This flag allows
89 /// direct manipulation of the old intrinsic-form debug info; beware that
90 /// LLVM does not support using these intrinsics any more. The caller is
91 /// responsible for performing the upgrade manually (e.g. via
92 /// Module::convertToNewDbgValues()).
94
95 ParserCallbacks() = default;
98};
99
100 struct BitcodeFileContents;
101
102 /// Basic information extracted from a bitcode module to be used for LTO.
109
110 /// Represents a module in a bitcode file.
111 class BitcodeModule {
112 // This covers the identification (if present) and module blocks.
113 ArrayRef<uint8_t> Buffer;
114 StringRef ModuleIdentifier;
115
116 // The string table used to interpret this module.
117 StringRef Strtab;
118
119 // The bitstream location of the IDENTIFICATION_BLOCK.
120 uint64_t IdentificationBit;
121
122 // The bitstream location of this module's MODULE_BLOCK.
123 uint64_t ModuleBit;
124
125 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
126 uint64_t IdentificationBit, uint64_t ModuleBit)
127 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
128 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
129
130 // Calls the ctor.
133
135 getModuleImpl(LLVMContext &Context, bool MaterializeAll,
136 bool ShouldLazyLoadMetadata, bool IsImporting,
137 ParserCallbacks Callbacks = {});
138
139 public:
141 return StringRef((const char *)Buffer.begin(), Buffer.size());
142 }
143
144 StringRef getStrtab() const { return Strtab; }
145
146 StringRef getModuleIdentifier() const { return ModuleIdentifier; }
147
148 // Assign a new module identifier to this bitcode module.
150 ModuleIdentifier = ModuleId;
151 }
152
153 /// Read the bitcode module and prepare for lazy deserialization of function
154 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
155 /// If IsImporting is true, this module is being parsed for ThinLTO
156 /// importing into another module.
158 getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
159 bool IsImporting, ParserCallbacks Callbacks = {});
160
161 /// Read the entire bitcode module and return it.
162 LLVM_ABI Expected<std::unique_ptr<Module>>
163 parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {});
164
165 /// Returns information about the module to be used for LTO: whether to
166 /// compile with ThinLTO, and whether it has a summary.
167 LLVM_ABI Expected<BitcodeLTOInfo> getLTOInfo();
168
169 /// Parse the specified bitcode buffer, returning the module summary index.
170 LLVM_ABI Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
171
172 /// Parse the specified bitcode buffer and merge its module summary index
173 /// into CombinedIndex.
175 readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
176 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
177 };
178
180 std::vector<BitcodeModule> Mods;
182 };
183
184 /// Returns the contents of a bitcode file. This includes the raw contents of
185 /// the symbol table embedded in the bitcode file. Clients which require a
186 /// symbol table should prefer to use irsymtab::read instead of this function
187 /// because it creates a reader for the irsymtab and handles upgrading bitcode
188 /// files without a symbol table or with an old symbol table.
191
192 /// Returns a list of modules in the specified bitcode buffer.
195
196 /// Read the header of the specified bitcode buffer and prepare for lazy
197 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
198 /// lazily load metadata as well. If IsImporting is true, this module is
199 /// being parsed for ThinLTO importing into another module.
202 bool ShouldLazyLoadMetadata = false,
203 bool IsImporting = false,
204 ParserCallbacks Callbacks = {});
205
206 /// Like getLazyBitcodeModule, except that the module takes ownership of
207 /// the memory buffer if successful. If successful, this moves Buffer. On
208 /// error, this *does not* move Buffer. If IsImporting is true, this module is
209 /// being parsed for ThinLTO importing into another module.
210 LLVM_ABI Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
211 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
212 bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
213 ParserCallbacks Callbacks = {});
214
215 /// Read the header of the specified bitcode buffer and extract just the
216 /// triple information. If successful, this returns a string. On error, this
217 /// returns "".
219
220 /// Return true if \p Buffer contains a bitcode file with ObjC code (category
221 /// or class) in it.
224
225 /// Read the header of the specified bitcode buffer and extract just the
226 /// producer string information. If successful, this returns a string. On
227 /// error, this returns "".
230
231 /// Read the specified bitcode file, returning the module.
234 ParserCallbacks Callbacks = {});
235
236 /// Returns LTO information for the specified bitcode file.
238
239 /// Parse the specified bitcode buffer, returning the module summary index.
242
243 /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
245 ModuleSummaryIndex &CombinedIndex);
246
247 /// Parse the module summary index out of an IR file and return the module
248 /// summary index object if found, or an empty summary if not. If Path refers
249 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
250 /// this function will return nullptr.
253 bool IgnoreEmptyThinLTOIndexFile = false);
254
255 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
256 /// for an LLVM IR bitcode wrapper.
257 inline bool isBitcodeWrapper(const unsigned char *BufPtr,
258 const unsigned char *BufEnd) {
259 // See if you can find the hidden message in the magic bytes :-).
260 // (Hint: it's a little-endian encoding.)
261 return BufPtr != BufEnd &&
262 BufPtr[0] == 0xDE &&
263 BufPtr[1] == 0xC0 &&
264 BufPtr[2] == 0x17 &&
265 BufPtr[3] == 0x0B;
266 }
267
268 /// isRawBitcode - Return true if the given bytes are the magic bytes for
269 /// raw LLVM IR bitcode (without a wrapper).
270 inline bool isRawBitcode(const unsigned char *BufPtr,
271 const unsigned char *BufEnd) {
272 // These bytes sort of have a hidden message, but it's not in
273 // little-endian this time, and it's a little redundant.
274 return BufPtr != BufEnd &&
275 BufPtr[0] == 'B' &&
276 BufPtr[1] == 'C' &&
277 BufPtr[2] == 0xc0 &&
278 BufPtr[3] == 0xde;
279 }
280
281 /// isBitcode - Return true if the given bytes are the magic bytes for
282 /// LLVM IR bitcode, either with or without a wrapper.
283 inline bool isBitcode(const unsigned char *BufPtr,
284 const unsigned char *BufEnd) {
285 return isBitcodeWrapper(BufPtr, BufEnd) ||
286 isRawBitcode(BufPtr, BufEnd);
287 }
288
289 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
290 /// header for padding or other reasons. The format of this header is:
291 ///
292 /// struct bc_header {
293 /// uint32_t Magic; // 0x0B17C0DE
294 /// uint32_t Version; // Version, currently always 0.
295 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
296 /// uint32_t BitcodeSize; // Size of traditional bitcode file.
297 /// ... potentially other gunk ...
298 /// };
299 ///
300 /// This function is called when we find a file with a matching magic number.
301 /// In this case, skip down to the subsection of the file that is actually a
302 /// BC file.
303 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
304 /// contain the whole bitcode file.
305 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
306 const unsigned char *&BufEnd,
307 bool VerifyBufferSize) {
308 // Must contain the offset and size field!
309 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
310 return true;
311
313 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
314 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
315
316 // Verify that Offset+Size fits in the file.
317 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
318 return true;
319 BufPtr += Offset;
320 BufEnd = BufPtr+Size;
321 return false;
322 }
323
324 LLVM_ABI APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
325
326 LLVM_ABI const std::error_category &BitcodeErrorCategory();
327 enum class BitcodeError { CorruptedBitcode = 1 };
328 inline std::error_code make_error_code(BitcodeError E) {
329 return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
330 }
331
332} // end namespace llvm
333
334namespace std {
335
336template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
337
338} // end namespace std
339
340#endif // LLVM_BITCODE_BITCODEREADER_H
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:215
Provides ErrorOr<T> smart pointer.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
void setModuleIdentifier(llvm::StringRef ModuleId)
LLVM_ABI friend Expected< BitcodeFileContents > getBitcodeFileContents(MemoryBufferRef Buffer)
Returns the contents of a bitcode file.
StringRef getModuleIdentifier() const
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getSummary()
Parse the specified bitcode buffer, returning the module summary index.
LLVM_ABI Expected< BitcodeLTOInfo > getLTOInfo()
Returns information about the module to be used for LTO: whether to compile with ThinLTO,...
StringRef getBuffer() const
LLVM_ABI Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, std::function< bool(GlobalValue::GUID)> IsPrevailing=nullptr)
Parse the specified bitcode buffer and merge its module summary index into CombinedIndex.
LLVM_ABI Expected< std::unique_ptr< Module > > parseModule(LLVMContext &Context, ParserCallbacks Callbacks={})
Read the entire bitcode module and return it.
StringRef getStrtab() const
LLVM_ABI Expected< std::unique_ptr< Module > > getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting, ParserCallbacks Callbacks={})
Read the bitcode module and prepare for lazy deserialization of function bodies.
Represents either an error or a value T.
Definition ErrorOr.h:56
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Root of the metadata hierarchy.
Definition Metadata.h:64
Class to hold module path string table and global value map, and encapsulate methods for operating on...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
LLVM_ABI const std::error_category & BitcodeErrorCategory()
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
std::error_code make_error_code(BitcodeError E)
std::function< Type *(unsigned)> GetTypeByIDTy
LLVM_ABI Expected< bool > isBitcodeContainingObjCCategory(MemoryBufferRef Buffer)
Return true if Buffer contains a bitcode file with ObjC code (category or class) in it.
std::function< unsigned(unsigned, unsigned)> GetContainedTypeIDTy
@ BWH_OffsetField
@ BWH_SizeField
LLVM_ABI Expected< std::string > getBitcodeTargetTriple(MemoryBufferRef Buffer)
Read the header of the specified bitcode buffer and extract just the triple information.
LLVM_ABI Expected< BitcodeFileContents > getBitcodeFileContents(MemoryBufferRef Buffer)
Returns the contents of a bitcode file.
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isRawBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd)
isRawBitcode - Return true if the given bytes are the magic bytes for raw LLVM IR bitcode (without a ...
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
LLVM_ABI Expected< std::string > getBitcodeProducerString(MemoryBufferRef Buffer)
Read the header of the specified bitcode buffer and extract just the producer string information.
LLVM_ABI Expected< std::unique_ptr< Module > > getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata=false, bool IsImporting=false, ParserCallbacks Callbacks={})
Read the header of the specified bitcode buffer and prepare for lazy deserialization of function bodi...
std::function< void(Metadata **, unsigned, GetTypeByIDTy, GetContainedTypeIDTy)> MDTypeCallbackTy
LLVM_ABI Expected< std::vector< BitcodeModule > > getBitcodeModuleList(MemoryBufferRef Buffer)
Returns a list of modules in the specified bitcode buffer.
LLVM_ABI Expected< BitcodeLTOInfo > getBitcodeLTOInfo(MemoryBufferRef Buffer)
Returns LTO information for the specified bitcode file.
bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, const unsigned char *&BufEnd, bool VerifyBufferSize)
SkipBitcodeWrapperHeader - Some systems wrap bc files with a special header for padding or other reas...
bool isBitcodeWrapper(const unsigned char *BufPtr, const unsigned char *BufEnd)
isBitcodeWrapper - Return true if the given bytes are the magic bytes for an LLVM IR bitcode wrapper.
LLVM_ABI APInt readWideAPInt(ArrayRef< uint64_t > Vals, unsigned TypeBits)
std::function< void(Value *, unsigned, GetTypeByIDTy, GetContainedTypeIDTy)> ValueTypeCallbackTy
std::function< std::optional< std::string >(StringRef, StringRef)> DataLayoutCallbackFuncTy
bool isBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd)
isBitcode - Return true if the given bytes are the magic bytes for LLVM IR bitcode,...
LLVM_ABI Error readModuleSummaryIndex(MemoryBufferRef Buffer, ModuleSummaryIndex &CombinedIndex)
Parse the specified bitcode buffer and merge the index into CombinedIndex.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndexForFile(StringRef Path, bool IgnoreEmptyThinLTOIndexFile=false)
Parse the module summary index out of an IR file and return the module summary index object if found,...
LLVM_ABI Expected< std::unique_ptr< Module > > getOwningLazyBitcodeModule(std::unique_ptr< MemoryBuffer > &&Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata=false, bool IsImporting=false, ParserCallbacks Callbacks={})
Like getLazyBitcodeModule, except that the module takes ownership of the memory buffer if successful.
LLVM_ABI std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err)
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
std::vector< BitcodeModule > Mods
Basic information extracted from a bitcode module to be used for LTO.
std::optional< ValueTypeCallbackTy > ValueType
The ValueType callback is called for every function definition or declaration and allows accessing th...
std::optional< DataLayoutCallbackFuncTy > DataLayout
ParserCallbacks(DataLayoutCallbackFuncTy DataLayout)
std::optional< MDTypeCallbackTy > MDType
The MDType callback is called for every value in metadata.
bool SkipDebugIntrinsicUpgrade
If true, do not auto-upgrade debug intrinsic calls (llvm.dbg.