LLVM  11.0.0git
GsymReader.h
Go to the documentation of this file.
1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11 
12 
13 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/ErrorOr.h"
22 
23 #include <inttypes.h>
24 #include <memory>
25 #include <stdint.h>
26 #include <string>
27 #include <vector>
28 
29 namespace llvm {
30 class MemoryBuffer;
31 class raw_ostream;
32 
33 namespace gsym {
34 
35 /// GsymReader is used to read GSYM data from a file or buffer.
36 ///
37 /// This class is optimized for very quick lookups when the endianness matches
38 /// the host system. The Header, address table, address info offsets, and file
39 /// table is designed to be mmap'ed as read only into memory and used without
40 /// any parsing needed. If the endianness doesn't match, we swap these objects
41 /// and tables into GsymReader::SwappedData and then point our header and
42 /// ArrayRefs to this swapped internal data.
43 ///
44 /// GsymReader objects must use one of the static functions to create an
45 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
46 
47 class GsymReader {
48  GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
50 
51  std::unique_ptr<MemoryBuffer> MemBuffer;
52  StringRef GsymBytes;
54  const Header *Hdr = nullptr;
55  ArrayRef<uint8_t> AddrOffsets;
56  ArrayRef<uint32_t> AddrInfoOffsets;
57  ArrayRef<FileEntry> Files;
58  StringTable StrTab;
59  /// When the GSYM file's endianness doesn't match the host system then
60  /// we must decode all data structures that need to be swapped into
61  /// local storage and set point the ArrayRef objects above to these swapped
62  /// copies.
63  struct SwappedData {
64  Header Hdr;
65  std::vector<uint8_t> AddrOffsets;
66  std::vector<uint32_t> AddrInfoOffsets;
67  std::vector<FileEntry> Files;
68  };
69  std::unique_ptr<SwappedData> Swap;
70 
71 public:
72  GsymReader(GsymReader &&RHS);
73  ~GsymReader();
74 
75  /// Construct a GsymReader from a file on disk.
76  ///
77  /// \param Path The file path the GSYM file to read.
78  /// \returns An expected GsymReader that contains the object or an error
79  /// object that indicates reason for failing to read the GSYM.
81 
82  /// Construct a GsymReader from a buffer.
83  ///
84  /// \param Bytes A set of bytes that will be copied and owned by the
85  /// returned object on success.
86  /// \returns An expected GsymReader that contains the object or an error
87  /// object that indicates reason for failing to read the GSYM.
89 
90  /// Access the GSYM header.
91  /// \returns A native endian version of the GSYM header.
92  const Header &getHeader() const;
93 
94  /// Get the full function info for an address.
95  ///
96  /// This should be called when a client will store a copy of the complete
97  /// FunctionInfo for a given address. For one off lookups, use the lookup()
98  /// function below.
99  ///
100  /// Symbolication server processes might want to parse the entire function
101  /// info for a given address and cache it if the process stays around to
102  /// service many symbolication addresses, like for parsing profiling
103  /// information.
104  ///
105  /// \param Addr A virtual address from the orignal object file to lookup.
106  ///
107  /// \returns An expected FunctionInfo that contains the function info object
108  /// or an error object that indicates reason for failing to lookup the
109  /// address.
110  llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
111 
112  /// Lookup an address in the a GSYM.
113  ///
114  /// Lookup just the information needed for a specific address \a Addr. This
115  /// function is faster that calling getFunctionInfo() as it will only return
116  /// information that pertains to \a Addr and allows the parsing to skip any
117  /// extra information encoded for other addresses. For example the line table
118  /// parsing can stop when a matching LineEntry has been fouhnd, and the
119  /// InlineInfo can stop parsing early once a match has been found and also
120  /// skip information that doesn't match. This avoids memory allocations and
121  /// is much faster for lookups.
122  ///
123  /// \param Addr A virtual address from the orignal object file to lookup.
124  /// \returns An expected LookupResult that contains only the information
125  /// needed for the current address, or an error object that indicates reason
126  /// for failing to lookup the address.
127  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
128 
129  /// Get a string from the string table.
130  ///
131  /// \param Offset The string table offset for the string to retrieve.
132  /// \returns The string from the strin table.
133  StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
134 
135  /// Get the a file entry for the suppplied file index.
136  ///
137  /// Used to convert any file indexes in the FunctionInfo data back into
138  /// files. This function can be used for iteration, but is more commonly used
139  /// for random access when doing lookups.
140  ///
141  /// \param Index An index into the file table.
142  /// \returns An optional FileInfo that will be valid if the file index is
143  /// valid, or llvm::None if the file index is out of bounds,
145  if (Index < Files.size())
146  return Files[Index];
147  return llvm::None;
148  }
149 
150  /// Dump the entire Gsym data contained in this object.
151  ///
152  /// \param OS The output stream to dump to.
153  void dump(raw_ostream &OS);
154 
155  /// Dump a FunctionInfo object.
156  ///
157  /// This function will convert any string table indexes and file indexes
158  /// into human readable format.
159  ///
160  /// \param OS The output stream to dump to.
161  ///
162  /// \param FI The object to dump.
163  void dump(raw_ostream &OS, const FunctionInfo &FI);
164 
165  /// Dump a LineTable object.
166  ///
167  /// This function will convert any string table indexes and file indexes
168  /// into human readable format.
169  ///
170  ///
171  /// \param OS The output stream to dump to.
172  ///
173  /// \param LT The object to dump.
174  void dump(raw_ostream &OS, const LineTable &LT);
175 
176  /// Dump a InlineInfo object.
177  ///
178  /// This function will convert any string table indexes and file indexes
179  /// into human readable format.
180  ///
181  /// \param OS The output stream to dump to.
182  ///
183  /// \param II The object to dump.
184  ///
185  /// \param Indent The indentation as number of spaces. Used for recurive
186  /// dumping.
187  void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
188 
189  /// Dump a FileEntry object.
190  ///
191  /// This function will convert any string table indexes into human readable
192  /// format.
193  ///
194  /// \param OS The output stream to dump to.
195  ///
196  /// \param FE The object to dump.
197  void dump(raw_ostream &OS, Optional<FileEntry> FE);
198 
199  /// Get the number of addresses in this Gsym file.
201  return Hdr->NumAddresses;
202  }
203 
204  /// Gets an address from the address table.
205  ///
206  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
207  ///
208  /// \param Index A index into the address table.
209  /// \returns A resolved virtual address for adddress in the address table
210  /// or llvm::None if Index is out of bounds.
211  Optional<uint64_t> getAddress(size_t Index) const;
212 
213 protected:
214 
215  /// Get an appropriate address info offsets array.
216  ///
217  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
218  /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
219  /// internally as a array of bytes that are in the correct endianness. When
220  /// we access this table we must get an array that matches those sizes. This
221  /// templatized helper function is used when accessing address offsets in the
222  /// AddrOffsets member variable.
223  ///
224  /// \returns An ArrayRef of an appropriate address offset size.
225  template <class T> ArrayRef<T>
226  getAddrOffsets() const {
227  return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
228  AddrOffsets.size()/sizeof(T));
229  }
230 
231  /// Get an appropriate address from the address table.
232  ///
233  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
234  /// byte address offsets from the The gsym::Header::BaseAddress. The table is
235  /// stored internally as a array of bytes that are in the correct endianness.
236  /// In order to extract an address from the address table we must access the
237  /// address offset using the correct size and then add it to the BaseAddress
238  /// in the header.
239  ///
240  /// \param Index An index into the AddrOffsets array.
241  /// \returns An virtual address that matches the original object file for the
242  /// address as the specified index, or llvm::None if Index is out of bounds.
243  template <class T> Optional<uint64_t>
244  addressForIndex(size_t Index) const {
245  ArrayRef<T> AIO = getAddrOffsets<T>();
246  if (Index < AIO.size())
247  return AIO[Index] + Hdr->BaseAddress;
248  return llvm::None;
249  }
250  /// Lookup an address offset in the AddrOffsets table.
251  ///
252  /// Given an address offset, look it up using a binary search of the
253  /// AddrOffsets table.
254  ///
255  /// \param AddrOffset An address offset, that has already been computed by
256  /// subtracting the gsym::Header::BaseAddress.
257  /// \returns The matching address offset index. This index will be used to
258  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
259  template <class T>
260  llvm::Optional<uint64_t> getAddressOffsetIndex(const uint64_t AddrOffset) const {
261  ArrayRef<T> AIO = getAddrOffsets<T>();
262  const auto Begin = AIO.begin();
263  const auto End = AIO.end();
264  auto Iter = std::lower_bound(Begin, End, AddrOffset);
265  // Watch for addresses that fall between the gsym::Header::BaseAddress and
266  // the first address offset.
267  if (Iter == Begin && AddrOffset < *Begin)
268  return llvm::None;
269  if (Iter == End || AddrOffset < *Iter)
270  --Iter;
271  return std::distance(Begin, Iter);
272  }
273 
274  /// Create a GSYM from a memory buffer.
275  ///
276  /// Called by both openFile() and copyBuffer(), this function does all of the
277  /// work of parsing the GSYM file and returning an error.
278  ///
279  /// \param MemBuffer A memory buffer that will transfer ownership into the
280  /// GsymReader.
281  /// \returns An expected GsymReader that contains the object or an error
282  /// object that indicates reason for failing to read the GSYM.
284  create(std::unique_ptr<MemoryBuffer> &MemBuffer);
285 
286 
287  /// Given an address, find the address index.
288  ///
289  /// Binary search the address table and find the matching address index.
290  ///
291  /// \param Addr A virtual address that matches the original object file
292  /// to lookup.
293  /// \returns An index into the address table. This index can be used to
294  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
295  /// Returns an error if the address isn't in the GSYM with details of why.
296  Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
297 
298  /// Given an address index, get the offset for the FunctionInfo.
299  ///
300  /// Looking up an address is done by finding the corresponding address
301  /// index for the address. This index is then used to get the offset of the
302  /// FunctionInfo data that we will decode using this function.
303  ///
304  /// \param Index An index into the address table.
305  /// \returns An optional GSYM data offset for the offset of the FunctionInfo
306  /// that needs to be decoded.
307  Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
308 };
309 
310 } // namespace gsym
311 } // namespace llvm
312 
313 #endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
GsymReader is used to read GSYM data from a file or buffer.
Definition: GsymReader.h:47
static llvm::Expected< GsymReader > copyBuffer(StringRef Bytes)
Construct a GsymReader from a buffer.
Definition: GsymReader.cpp:44
const Header & getHeader() const
Access the GSYM header.
Definition: GsymReader.cpp:201
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator begin() const
Definition: ArrayRef.h:144
Inline information stores the name of the inline function along with an array of address ranges...
Definition: InlineInfo.h:61
llvm::Optional< uint64_t > getAddressOffsetIndex(const uint64_t AddrOffset) const
Lookup an address offset in the AddrOffsets table.
Definition: GsymReader.h:260
Function information in GSYM files encodes information for one contiguous address range...
Definition: FunctionInfo.h:88
uint64_t BaseAddress
The 64 bit base address that all address offsets in the address offsets table are relative to...
Definition: Header.h:62
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
Definition: regcomp.c:192
Optional< FileEntry > getFile(uint32_t Index) const
Get the a file entry for the suppplied file index.
Definition: GsymReader.h:144
StringRef getString(uint32_t Offset) const
Get a string from the string table.
Definition: GsymReader.h:133
llvm::Expected< FunctionInfo > getFunctionInfo(uint64_t Addr) const
Get the full function info for an address.
Definition: GsymReader.cpp:257
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1594
Expected< uint64_t > getAddressIndex(const uint64_t Addr) const
Given an address, find the address index.
Definition: GsymReader.cpp:227
Optional< uint64_t > getAddress(size_t Index) const
Gets an address from the address table.
Definition: GsymReader.cpp:209
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:156
LineTable class contains deserialized versions of line tables for each function&#39;s address ranges...
Definition: LineTable.h:118
String tables in GSYM files are required to start with an empty string at offset zero.
Definition: StringTable.h:24
static llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
Definition: GsymReader.cpp:34
const T * data() const
Definition: ArrayRef.h:153
llvm::Expected< LookupResult > lookup(uint64_t Addr) const
Lookup an address in the a GSYM.
Definition: GsymReader.cpp:279
iterator end() const
Definition: ArrayRef.h:145
The GSYM header.
Definition: Header.h:45
static llvm::Expected< llvm::gsym::GsymReader > create(std::unique_ptr< MemoryBuffer > &MemBuffer)
Create a GSYM from a memory buffer.
Definition: GsymReader.cpp:50
Optional< uint64_t > addressForIndex(size_t Index) const
Get an appropriate address from the address table.
Definition: GsymReader.h:244
void dump(raw_ostream &OS)
Dump the entire Gsym data contained in this object.
Definition: GsymReader.cpp:294
uint32_t getNumAddresses() const
Get the number of addresses in this Gsym file.
Definition: GsymReader.h:200
Provides ErrorOr<T> smart pointer.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
ArrayRef< T > getAddrOffsets() const
Get an appropriate address info offsets array.
Definition: GsymReader.h:226
uint32_t NumAddresses
The number of addresses stored in the address offsets table.
Definition: Header.h:64
Optional< uint64_t > getAddressInfoOffset(size_t Index) const
Given an address index, get the offset for the FunctionInfo.
Definition: GsymReader.cpp:219