LLVM 18.0.0git
GsymReader.cpp
Go to the documentation of this file.
1//===- GsymReader.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include <assert.h>
12#include <inttypes.h>
13#include <stdio.h>
14#include <stdlib.h>
15
22
23using namespace llvm;
24using namespace gsym;
25
26GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) :
27 MemBuffer(std::move(Buffer)),
28 Endian(support::endian::system_endianness()) {}
29
30 GsymReader::GsymReader(GsymReader &&RHS) = default;
31
32GsymReader::~GsymReader() = default;
33
35 // Open the input file and return an appropriate error if needed.
38 auto Err = BuffOrErr.getError();
39 if (Err)
40 return llvm::errorCodeToError(Err);
41 return create(BuffOrErr.get());
42}
43
45 auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
46 return create(MemBuffer);
47}
48
50GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
51 if (!MemBuffer)
52 return createStringError(std::errc::invalid_argument,
53 "invalid memory buffer");
54 GsymReader GR(std::move(MemBuffer));
55 llvm::Error Err = GR.parse();
56 if (Err)
57 return std::move(Err);
58 return std::move(GR);
59}
60
62GsymReader::parse() {
63 BinaryStreamReader FileData(MemBuffer->getBuffer(),
65 // Check for the magic bytes. This file format is designed to be mmap'ed
66 // into a process and accessed as read only. This is done for performance
67 // and efficiency for symbolicating and parsing GSYM data.
68 if (FileData.readObject(Hdr))
69 return createStringError(std::errc::invalid_argument,
70 "not enough data for a GSYM header");
71
72 const auto HostByteOrder = support::endian::system_endianness();
73 switch (Hdr->Magic) {
74 case GSYM_MAGIC:
75 Endian = HostByteOrder;
76 break;
77 case GSYM_CIGAM:
78 // This is a GSYM file, but not native endianness.
80 Swap.reset(new SwappedData);
81 break;
82 default:
83 return createStringError(std::errc::invalid_argument,
84 "not a GSYM file");
85 }
86
87 bool DataIsLittleEndian = HostByteOrder != support::little;
88 // Read a correctly byte swapped header if we need to.
89 if (Swap) {
90 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
91 if (auto ExpectedHdr = Header::decode(Data))
92 Swap->Hdr = ExpectedHdr.get();
93 else
94 return ExpectedHdr.takeError();
95 Hdr = &Swap->Hdr;
96 }
97
98 // Detect errors in the header and report any that are found. If we make it
99 // past this without errors, we know we have a good magic value, a supported
100 // version number, verified address offset size and a valid UUID size.
101 if (Error Err = Hdr->checkForError())
102 return Err;
103
104 if (!Swap) {
105 // This is the native endianness case that is most common and optimized for
106 // efficient lookups. Here we just grab pointers to the native data and
107 // use ArrayRef objects to allow efficient read only access.
108
109 // Read the address offsets.
110 if (FileData.padToAlignment(Hdr->AddrOffSize) ||
111 FileData.readArray(AddrOffsets,
112 Hdr->NumAddresses * Hdr->AddrOffSize))
113 return createStringError(std::errc::invalid_argument,
114 "failed to read address table");
115
116 // Read the address info offsets.
117 if (FileData.padToAlignment(4) ||
118 FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
119 return createStringError(std::errc::invalid_argument,
120 "failed to read address info offsets table");
121
122 // Read the file table.
123 uint32_t NumFiles = 0;
124 if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
125 return createStringError(std::errc::invalid_argument,
126 "failed to read file table");
127
128 // Get the string table.
129 FileData.setOffset(Hdr->StrtabOffset);
130 if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
131 return createStringError(std::errc::invalid_argument,
132 "failed to read string table");
133} else {
134 // This is the non native endianness case that is not common and not
135 // optimized for lookups. Here we decode the important tables into local
136 // storage and then set the ArrayRef objects to point to these swapped
137 // copies of the read only data so lookups can be as efficient as possible.
138 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
139
140 // Read the address offsets.
141 uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
142 Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
143 switch (Hdr->AddrOffSize) {
144 case 1:
145 if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
146 return createStringError(std::errc::invalid_argument,
147 "failed to read address table");
148 break;
149 case 2:
150 if (!Data.getU16(&Offset,
151 reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
152 Hdr->NumAddresses))
153 return createStringError(std::errc::invalid_argument,
154 "failed to read address table");
155 break;
156 case 4:
157 if (!Data.getU32(&Offset,
158 reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
159 Hdr->NumAddresses))
160 return createStringError(std::errc::invalid_argument,
161 "failed to read address table");
162 break;
163 case 8:
164 if (!Data.getU64(&Offset,
165 reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
166 Hdr->NumAddresses))
167 return createStringError(std::errc::invalid_argument,
168 "failed to read address table");
169 }
170 AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
171
172 // Read the address info offsets.
173 Offset = alignTo(Offset, 4);
174 Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
175 if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
176 AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
177 else
178 return createStringError(std::errc::invalid_argument,
179 "failed to read address table");
180 // Read the file table.
181 const uint32_t NumFiles = Data.getU32(&Offset);
182 if (NumFiles > 0) {
183 Swap->Files.resize(NumFiles);
184 if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
185 Files = ArrayRef<FileEntry>(Swap->Files);
186 else
187 return createStringError(std::errc::invalid_argument,
188 "failed to read file table");
189 }
190 // Get the string table.
191 StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
192 Hdr->StrtabSize);
193 if (StrTab.Data.empty())
194 return createStringError(std::errc::invalid_argument,
195 "failed to read string table");
196 }
197 return Error::success();
198
199}
200
202 // The only way to get a GsymReader is from GsymReader::openFile(...) or
203 // GsymReader::copyBuffer() and the header must be valid and initialized to
204 // a valid pointer value, so the assert below should not trigger.
205 assert(Hdr);
206 return *Hdr;
207}
208
209std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
210 switch (Hdr->AddrOffSize) {
211 case 1: return addressForIndex<uint8_t>(Index);
212 case 2: return addressForIndex<uint16_t>(Index);
213 case 4: return addressForIndex<uint32_t>(Index);
214 case 8: return addressForIndex<uint64_t>(Index);
215 }
216 return std::nullopt;
217}
218
219std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
220 const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
221 if (Index < NumAddrInfoOffsets)
222 return AddrInfoOffsets[Index];
223 return std::nullopt;
224}
225
228 if (Addr >= Hdr->BaseAddress) {
229 const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
230 std::optional<uint64_t> AddrOffsetIndex;
231 switch (Hdr->AddrOffSize) {
232 case 1:
233 AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
234 break;
235 case 2:
236 AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
237 break;
238 case 4:
239 AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
240 break;
241 case 8:
242 AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
243 break;
244 default:
245 return createStringError(std::errc::invalid_argument,
246 "unsupported address offset size %u",
247 Hdr->AddrOffSize);
248 }
249 if (AddrOffsetIndex)
250 return *AddrOffsetIndex;
251 }
252 return createStringError(std::errc::invalid_argument,
253 "address 0x%" PRIx64 " is not in GSYM", Addr);
254
255}
256
259 if (!AddressIndex)
260 return AddressIndex.takeError();
261 // Address info offsets size should have been checked in parse().
262 assert(*AddressIndex < AddrInfoOffsets.size());
263 auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
264 DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
265 if (std::optional<uint64_t> OptAddr = getAddress(*AddressIndex)) {
266 auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr);
267 if (ExpectedFI) {
268 if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0)
269 return ExpectedFI;
270 return createStringError(std::errc::invalid_argument,
271 "address 0x%" PRIx64 " is not in GSYM", Addr);
272 }
273 }
274 return createStringError(std::errc::invalid_argument,
275 "failed to extract address[%" PRIu64 "]",
276 *AddressIndex);
277}
278
281 if (!AddressIndex)
282 return AddressIndex.takeError();
283 // Address info offsets size should have been checked in parse().
284 assert(*AddressIndex < AddrInfoOffsets.size());
285 auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
286 DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
287 if (std::optional<uint64_t> OptAddr = getAddress(*AddressIndex))
288 return FunctionInfo::lookup(Data, *this, *OptAddr, Addr);
289 return createStringError(std::errc::invalid_argument,
290 "failed to extract address[%" PRIu64 "]",
291 *AddressIndex);
292}
293
295 const auto &Header = getHeader();
296 // Dump the GSYM header.
297 OS << Header << "\n";
298 // Dump the address table.
299 OS << "Address Table:\n";
300 OS << "INDEX OFFSET";
301
302 switch (Hdr->AddrOffSize) {
303 case 1: OS << "8 "; break;
304 case 2: OS << "16"; break;
305 case 4: OS << "32"; break;
306 case 8: OS << "64"; break;
307 default: OS << "??"; break;
308 }
309 OS << " (ADDRESS)\n";
310 OS << "====== =============================== \n";
311 for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
312 OS << format("[%4u] ", I);
313 switch (Hdr->AddrOffSize) {
314 case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
315 case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
316 case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
317 case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
318 default: break;
319 }
320 OS << " (" << HEX64(*getAddress(I)) << ")\n";
321 }
322 // Dump the address info offsets table.
323 OS << "\nAddress Info Offsets:\n";
324 OS << "INDEX Offset\n";
325 OS << "====== ==========\n";
326 for (uint32_t I = 0; I < Header.NumAddresses; ++I)
327 OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
328 // Dump the file table.
329 OS << "\nFiles:\n";
330 OS << "INDEX DIRECTORY BASENAME PATH\n";
331 OS << "====== ========== ========== ==============================\n";
332 for (uint32_t I = 0; I < Files.size(); ++I) {
333 OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '
334 << HEX32(Files[I].Base) << ' ';
335 dump(OS, getFile(I));
336 OS << "\n";
337 }
338 OS << "\n" << StrTab << "\n";
339
340 for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
341 OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
342 if (auto FI = getFunctionInfo(*getAddress(I)))
343 dump(OS, *FI);
344 else
345 logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
346 }
347}
348
350 OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
351 if (FI.OptLineTable)
352 dump(OS, *FI.OptLineTable);
353 if (FI.Inline)
354 dump(OS, *FI.Inline);
355}
356
358 OS << "LineTable:\n";
359 for (auto &LE: LT) {
360 OS << " " << HEX64(LE.Addr) << ' ';
361 if (LE.File)
362 dump(OS, getFile(LE.File));
363 OS << ':' << LE.Line << '\n';
364 }
365}
366
368 if (Indent == 0)
369 OS << "InlineInfo:\n";
370 else
371 OS.indent(Indent);
372 OS << II.Ranges << ' ' << getString(II.Name);
373 if (II.CallFile != 0) {
374 if (auto File = getFile(II.CallFile)) {
375 OS << " called from ";
376 dump(OS, File);
377 OS << ':' << II.CallLine;
378 }
379 }
380 OS << '\n';
381 for (const auto &ChildII: II.Children)
382 dump(OS, ChildII, Indent + 2);
383}
384
385void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
386 if (FE) {
387 // IF we have the file from index 0, then don't print anything
388 if (FE->Dir == 0 && FE->Base == 0)
389 return;
390 StringRef Dir = getString(FE->Dir);
391 StringRef Base = getString(FE->Base);
392 if (!Dir.empty()) {
393 OS << Dir;
394 if (Dir.contains('\\') && !Dir.contains('/'))
395 OS << '\\';
396 else
397 OS << '/';
398 }
399 if (!Base.empty()) {
400 OS << Base;
401 }
402 if (!Dir.empty() || !Base.empty())
403 return;
404 }
405 OS << "<invalid-file>";
406}
uint64_t Addr
#define HEX64(v)
Definition: ExtractRanges.h:21
#define HEX32(v)
Definition: ExtractRanges.h:20
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
endianness Endian
raw_pwrite_stream & OS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Provides read only access to a subclass of BinaryStream.
Represents either an error or a value T.
Definition: ErrorOr.h:56
reference get()
Definition: ErrorOr.h:149
std::error_code getError() const
Definition: ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:334
Tagged union holding either a T or a Error.
Definition: Error.h:474
Error takeError()
Take ownership of the stored error.
Definition: Error.h:601
static std::unique_ptr< MemoryBuffer > getMemBufferCopy(StringRef InputData, const Twine &BufferName="")
Open the specified memory range as a MemoryBuffer, copying the contents and taking ownership of it.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileOrSTDIN(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, or open stdin if the Filename is "-".
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
GsymReader is used to read GSYM data from a file or buffer.
Definition: GsymReader.h:44
std::optional< FileEntry > getFile(uint32_t Index) const
Get the a file entry for the suppplied file index.
Definition: GsymReader.h:141
void dump(raw_ostream &OS)
Dump the entire Gsym data contained in this object.
Definition: GsymReader.cpp:294
static llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
Definition: GsymReader.cpp:34
std::optional< uint64_t > getAddress(size_t Index) const
Gets an address from the address table.
Definition: GsymReader.cpp:209
std::optional< uint64_t > getAddressInfoOffset(size_t Index) const
Given an address index, get the offset for the FunctionInfo.
Definition: GsymReader.cpp:219
StringRef getString(uint32_t Offset) const
Get a string from the string table.
Definition: GsymReader.h:130
llvm::Expected< FunctionInfo > getFunctionInfo(uint64_t Addr) const
Get the full function info for an address.
Definition: GsymReader.cpp:257
const Header & getHeader() const
Access the GSYM header.
Definition: GsymReader.cpp:201
Expected< uint64_t > getAddressIndex(const uint64_t Addr) const
Given an address, find the address index.
Definition: GsymReader.cpp:227
llvm::Expected< LookupResult > lookup(uint64_t Addr) const
Lookup an address in the a GSYM.
Definition: GsymReader.cpp:279
static llvm::Expected< GsymReader > copyBuffer(StringRef Bytes)
Construct a GsymReader from a buffer.
Definition: GsymReader.cpp:44
static llvm::Expected< llvm::gsym::GsymReader > create(std::unique_ptr< MemoryBuffer > &MemBuffer)
Create a GSYM from a memory buffer.
Definition: GsymReader.cpp:50
LineTable class contains deserialized versions of line tables for each function's address ranges.
Definition: LineTable.h:118
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
constexpr uint32_t GSYM_MAGIC
Definition: Header.h:24
constexpr uint32_t GSYM_CIGAM
Definition: Header.h:25
constexpr endianness system_endianness()
Definition: Endian.h:44
constexpr bool IsBigEndianHost
Definition: SwapByteOrder.h:54
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
Definition: Error.cpp:65
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1244
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1854
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:103
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
Function information in GSYM files encodes information for one contiguous address range.
Definition: FunctionInfo.h:89
std::optional< InlineInfo > Inline
Definition: FunctionInfo.h:93
static llvm::Expected< LookupResult > lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr, uint64_t Addr)
Lookup an address within a FunctionInfo object's data stream.
uint32_t Name
String table offset in the string table.
Definition: FunctionInfo.h:91
std::optional< LineTable > OptLineTable
Definition: FunctionInfo.h:92
static llvm::Expected< FunctionInfo > decode(DataExtractor &Data, uint64_t BaseAddr)
Decode an object from a binary data stream.
The GSYM header.
Definition: Header.h:45
uint8_t AddrOffSize
The size in bytes of each address offset in the address offsets table.
Definition: Header.h:56
static llvm::Expected< Header > decode(DataExtractor &Data)
Decode an object from a binary data stream.
Definition: Header.cpp:64
uint32_t Magic
The magic bytes should be set to GSYM_MAGIC.
Definition: Header.h:49
uint32_t StrtabOffset
The file relative offset of the start of the string table for strings contained in the GSYM file.
Definition: Header.h:72
uint32_t StrtabSize
The size in bytes of the string table.
Definition: Header.h:80
llvm::Error checkForError() const
Check if a header is valid and return an error if anything is wrong.
Definition: Header.cpp:41
uint32_t NumAddresses
The number of addresses stored in the address offsets table.
Definition: Header.h:64
uint64_t BaseAddress
The 64 bit base address that all address offsets in the address offsets table are relative to.
Definition: Header.h:62
Inline information stores the name of the inline function along with an array of address ranges.
Definition: InlineInfo.h:59
std::vector< InlineInfo > Children
Definition: InlineInfo.h:65
AddressRanges Ranges
Definition: InlineInfo.h:64
uint32_t CallFile
1 based file index in the file table.
Definition: InlineInfo.h:62
uint32_t CallLine
Source line number.
Definition: InlineInfo.h:63
uint32_t Name
String table offset in the string table.
Definition: InlineInfo.h:61