LLVM 19.0.0git
DwarfTransformer.cpp
Go to the documentation of this file.
1//===- DwarfTransformer.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <thread>
10#include <unordered_set>
11
15#include "llvm/Support/Error.h"
18
25
26#include <optional>
27
28using namespace llvm;
29using namespace gsym;
30
33 const char *CompDir;
34 std::vector<uint32_t> FileCache;
36 uint8_t AddrSize = 0;
37
40 CompDir = CU->getCompilationDir();
41 FileCache.clear();
42 if (LineTable)
43 FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
44 DWARFDie Die = CU->getUnitDIE();
45 Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
46 AddrSize = CU->getAddressByteSize();
47 }
48
49 /// Return true if Addr is the highest address for a given compile unit. The
50 /// highest address is encoded as -1, of all ones in the address. These high
51 /// addresses are used by some linkers to indicate that a function has been
52 /// dead stripped or didn't end up in the linked executable.
54 if (AddrSize == 4)
55 return Addr == UINT32_MAX;
56 else if (AddrSize == 8)
57 return Addr == UINT64_MAX;
58 return false;
59 }
60
61 /// Convert a DWARF compile unit file index into a GSYM global file index.
62 ///
63 /// Each compile unit in DWARF has its own file table in the line table
64 /// prologue. GSYM has a single large file table that applies to all files
65 /// from all of the info in a GSYM file. This function converts between the
66 /// two and caches and DWARF CU file index that has already been converted so
67 /// the first client that asks for a compile unit file index will end up
68 /// doing the conversion, and subsequent clients will get the cached GSYM
69 /// index.
70 std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
71 uint32_t DwarfFileIdx) {
72 if (!LineTable || DwarfFileIdx >= FileCache.size())
73 return std::nullopt;
74 uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
75 if (GsymFileIdx != UINT32_MAX)
76 return GsymFileIdx;
77 std::string File;
78 if (LineTable->getFileNameByIndex(
79 DwarfFileIdx, CompDir,
80 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
81 GsymFileIdx = Gsym.insertFile(File);
82 else
83 GsymFileIdx = 0;
84 return GsymFileIdx;
85 }
86};
87
88
90 if (DWARFDie SpecDie =
91 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
92 if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
93 return SpecParent;
94 }
95 if (DWARFDie AbstDie =
96 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
97 if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
98 return AbstParent;
99 }
100
101 // We never want to follow parent for inlined subroutine - that would
102 // give us information about where the function is inlined, not what
103 // function is inlined
104 if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
105 return DWARFDie();
106
107 DWARFDie ParentDie = Die.getParent();
108 if (!ParentDie)
109 return DWARFDie();
110
111 switch (ParentDie.getTag()) {
112 case dwarf::DW_TAG_namespace:
113 case dwarf::DW_TAG_structure_type:
114 case dwarf::DW_TAG_union_type:
115 case dwarf::DW_TAG_class_type:
116 case dwarf::DW_TAG_subprogram:
117 return ParentDie; // Found parent decl context DIE
118 case dwarf::DW_TAG_lexical_block:
119 return GetParentDeclContextDIE(ParentDie);
120 default:
121 break;
122 }
123
124 return DWARFDie();
125}
126
127/// Get the GsymCreator string table offset for the qualified name for the
128/// DIE passed in. This function will avoid making copies of any strings in
129/// the GsymCreator when possible. We don't need to copy a string when the
130/// string comes from our .debug_str section or is an inlined string in the
131/// .debug_info. If we create a qualified name string in this function by
132/// combining multiple strings in the DWARF string table or info, we will make
133/// a copy of the string when we add it to the string table.
134static std::optional<uint32_t>
136 // If the dwarf has mangled name, use mangled name
137 if (auto LinkageName = Die.getLinkageName()) {
138 // We have seen cases were linkage name is actually empty.
139 if (strlen(LinkageName) > 0)
140 return Gsym.insertString(LinkageName, /* Copy */ false);
141 }
142
143 StringRef ShortName(Die.getName(DINameKind::ShortName));
144 if (ShortName.empty())
145 return std::nullopt;
146
147 // For C++ and ObjC, prepend names of all parent declaration contexts
148 if (!(Language == dwarf::DW_LANG_C_plus_plus ||
149 Language == dwarf::DW_LANG_C_plus_plus_03 ||
150 Language == dwarf::DW_LANG_C_plus_plus_11 ||
151 Language == dwarf::DW_LANG_C_plus_plus_14 ||
152 Language == dwarf::DW_LANG_ObjC_plus_plus ||
153 // This should not be needed for C, but we see C++ code marked as C
154 // in some binaries. This should hurt, so let's do it for C as well
155 Language == dwarf::DW_LANG_C))
156 return Gsym.insertString(ShortName, /* Copy */ false);
157
158 // Some GCC optimizations create functions with names ending with .isra.<num>
159 // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
160 // If it looks like it could be the case, don't add any prefix
161 if (ShortName.starts_with("_Z") &&
162 (ShortName.contains(".isra.") || ShortName.contains(".part.")))
163 return Gsym.insertString(ShortName, /* Copy */ false);
164
165 DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
166 if (ParentDeclCtxDie) {
167 std::string Name = ShortName.str();
168 while (ParentDeclCtxDie) {
169 StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
170 if (!ParentName.empty()) {
171 // "lambda" names are wrapped in < >. Replace with { }
172 // to be consistent with demangled names and not to confuse with
173 // templates
174 if (ParentName.front() == '<' && ParentName.back() == '>')
175 Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
176 "::" + Name;
177 else
178 Name = ParentName.str() + "::" + Name;
179 }
180 ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
181 }
182 // Copy the name since we created a new name in a std::string.
183 return Gsym.insertString(Name, /* Copy */ true);
184 }
185 // Don't copy the name since it exists in the DWARF object file.
186 return Gsym.insertString(ShortName, /* Copy */ false);
187}
188
190 bool CheckChildren = true;
191 switch (Die.getTag()) {
192 case dwarf::DW_TAG_subprogram:
193 // Don't look into functions within functions.
194 CheckChildren = Depth == 0;
195 break;
196 case dwarf::DW_TAG_inlined_subroutine:
197 return true;
198 default:
199 break;
200 }
201 if (!CheckChildren)
202 return false;
203 for (DWARFDie ChildDie : Die.children()) {
204 if (hasInlineInfo(ChildDie, Depth + 1))
205 return true;
206 }
207 return false;
208}
209
210static AddressRanges
212 AddressRanges Ranges;
213 for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
214 if (DwarfRange.LowPC < DwarfRange.HighPC)
215 Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
216 }
217 return Ranges;
218}
219
221 CUInfo &CUI, DWARFDie Die, uint32_t Depth,
222 FunctionInfo &FI, InlineInfo &Parent,
223 const AddressRanges &AllParentRanges,
224 bool &WarnIfEmpty) {
225 if (!hasInlineInfo(Die, Depth))
226 return;
227
228 dwarf::Tag Tag = Die.getTag();
229 if (Tag == dwarf::DW_TAG_inlined_subroutine) {
230 // create new InlineInfo and append to parent.children
231 InlineInfo II;
232 AddressRanges AllInlineRanges;
234 if (RangesOrError) {
235 AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
236 uint32_t EmptyCount = 0;
237 for (const AddressRange &InlineRange : AllInlineRanges) {
238 // Check for empty inline range in case inline function was outlined
239 // or has not code
240 if (InlineRange.empty()) {
241 ++EmptyCount;
242 } else {
243 if (Parent.Ranges.contains(InlineRange)) {
244 II.Ranges.insert(InlineRange);
245 } else {
246 // Only warn if the current inline range is not within any of all
247 // of the parent ranges. If we have a DW_TAG_subpgram with multiple
248 // ranges we will emit a FunctionInfo for each range of that
249 // function that only emits information within the current range,
250 // so we only want to emit an error if the DWARF has issues, not
251 // when a range currently just isn't in the range we are currently
252 // parsing for.
253 if (AllParentRanges.contains(InlineRange)) {
254 WarnIfEmpty = false;
255 } else
256 Out.Report("Function DIE has uncontained address range",
257 [&](raw_ostream &OS) {
258 OS << "error: inlined function DIE at "
259 << HEX32(Die.getOffset()) << " has a range ["
260 << HEX64(InlineRange.start()) << " - "
261 << HEX64(InlineRange.end())
262 << ") that isn't contained in "
263 << "any parent address ranges, this inline range "
264 "will be "
265 "removed.\n";
266 });
267 }
268 }
269 }
270 // If we have all empty ranges for the inlines, then don't warn if we
271 // have an empty InlineInfo at the top level as all inline functions
272 // were elided.
273 if (EmptyCount == AllInlineRanges.size())
274 WarnIfEmpty = false;
275 }
276 if (II.Ranges.empty())
277 return;
278
279 if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
280 II.Name = *NameIndex;
281 const uint64_t DwarfFileIdx = dwarf::toUnsigned(
282 Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
283 std::optional<uint32_t> OptGSymFileIdx =
284 CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
285 if (OptGSymFileIdx) {
286 II.CallFile = OptGSymFileIdx.value();
287 II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
288 // parse all children and append to parent
289 for (DWARFDie ChildDie : Die.children())
290 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, II,
291 AllInlineRanges, WarnIfEmpty);
292 Parent.Children.emplace_back(std::move(II));
293 } else
294 Out.Report(
295 "Inlined function die has invlaid file index in DW_AT_call_file",
296 [&](raw_ostream &OS) {
297 OS << "error: inlined function DIE at " << HEX32(Die.getOffset())
298 << " has an invalid file index " << DwarfFileIdx
299 << " in its DW_AT_call_file attribute, this inline entry and "
300 "all "
301 << "children will be removed.\n";
302 });
303 return;
304 }
305 if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
306 // skip this Die and just recurse down
307 for (DWARFDie ChildDie : Die.children())
308 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, Parent,
309 AllParentRanges, WarnIfEmpty);
310 }
311}
312
314 DWARFDie Die, GsymCreator &Gsym,
315 FunctionInfo &FI) {
316 std::vector<uint32_t> RowVector;
317 const uint64_t StartAddress = FI.startAddress();
318 const uint64_t EndAddress = FI.endAddress();
319 const uint64_t RangeSize = EndAddress - StartAddress;
320 const object::SectionedAddress SecAddress{
322
323
324 if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
325 // If we have a DW_TAG_subprogram but no line entries, fall back to using
326 // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
327 std::string FilePath = Die.getDeclFile(
328 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
329 if (FilePath.empty()) {
330 // If we had a DW_AT_decl_file, but got no file then we need to emit a
331 // warning.
332 Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
333 const uint64_t DwarfFileIdx = dwarf::toUnsigned(
334 Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
335 OS << "error: function DIE at " << HEX32(Die.getOffset())
336 << " has an invalid file index " << DwarfFileIdx
337 << " in its DW_AT_decl_file attribute, unable to create a single "
338 << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
339 << "attributes.\n";
340 });
341 return;
342 }
343 if (auto Line =
344 dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
345 LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
346 FI.OptLineTable = LineTable();
347 FI.OptLineTable->push(LE);
348 }
349 return;
350 }
351
352 FI.OptLineTable = LineTable();
353 DWARFDebugLine::Row PrevRow;
354 for (uint32_t RowIndex : RowVector) {
355 // Take file number and line/column from the row.
356 const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
357 std::optional<uint32_t> OptFileIdx =
358 CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
359 if (!OptFileIdx) {
360 Out.Report(
361 "Invalid file index in DWARF line table", [&](raw_ostream &OS) {
362 OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
363 << "a line entry with invalid DWARF file index, this entry will "
364 << "be removed:\n";
365 Row.dumpTableHeader(OS, /*Indent=*/0);
366 Row.dump(OS);
367 OS << "\n";
368 });
369 continue;
370 }
371 const uint32_t FileIdx = OptFileIdx.value();
372 uint64_t RowAddress = Row.Address.Address;
373 // Watch out for a RowAddress that is in the middle of a line table entry
374 // in the DWARF. If we pass an address in between two line table entries
375 // we will get a RowIndex for the previous valid line table row which won't
376 // be contained in our function. This is usually a bug in the DWARF due to
377 // linker problems or LTO or other DWARF re-linking so it is worth emitting
378 // an error, but not worth stopping the creation of the GSYM.
379 if (!FI.Range.contains(RowAddress)) {
380 if (RowAddress < FI.Range.start()) {
381 Out.Report("Start address lies between valid Row table entries",
382 [&](raw_ostream &OS) {
383 OS << "error: DIE has a start address whose LowPC is "
384 "between the "
385 "line table Row["
386 << RowIndex << "] with address " << HEX64(RowAddress)
387 << " and the next one.\n";
389 });
390 RowAddress = FI.Range.start();
391 } else {
392 continue;
393 }
394 }
395
396 LineEntry LE(RowAddress, FileIdx, Row.Line);
397 if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
398 // We have seen full duplicate line tables for functions in some
399 // DWARF files. Watch for those here by checking the last
400 // row was the function's end address (HighPC) and that the
401 // current line table entry's address is the same as the first
402 // line entry we already have in our "function_info.Lines". If
403 // so break out after printing a warning.
404 auto FirstLE = FI.OptLineTable->first();
405 if (FirstLE && *FirstLE == LE)
406 // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird
407 Out.Report("Duplicate line table detected", [&](raw_ostream &OS) {
408 OS << "warning: duplicate line table detected for DIE:\n";
410 });
411 else
412 Out.Report("Non-monotonically increasing addresses",
413 [&](raw_ostream &OS) {
414 OS << "error: line table has addresses that do not "
415 << "monotonically increase:\n";
416 for (uint32_t RowIndex2 : RowVector)
417 CUI.LineTable->Rows[RowIndex2].dump(OS);
419 });
420 break;
421 }
422
423 // Skip multiple line entries for the same file and line.
424 auto LastLE = FI.OptLineTable->last();
425 if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
426 continue;
427 // Only push a row if it isn't an end sequence. End sequence markers are
428 // included for the last address in a function or the last contiguous
429 // address in a sequence.
430 if (Row.EndSequence) {
431 // End sequence means that the next line entry could have a lower address
432 // that the previous entries. So we clear the previous row so we don't
433 // trigger the line table error about address that do not monotonically
434 // increase.
435 PrevRow = DWARFDebugLine::Row();
436 } else {
437 FI.OptLineTable->push(LE);
438 PrevRow = Row;
439 }
440 }
441 // If not line table rows were added, clear the line table so we don't encode
442 // on in the GSYM file.
443 if (FI.OptLineTable->empty())
444 FI.OptLineTable = std::nullopt;
445}
446
447void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
448 DWARFDie Die) {
449 switch (Die.getTag()) {
450 case dwarf::DW_TAG_subprogram: {
452 if (!RangesOrError) {
453 consumeError(RangesOrError.takeError());
454 break;
455 }
456 const DWARFAddressRangesVector &Ranges = RangesOrError.get();
457 if (Ranges.empty())
458 break;
459 auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
460 if (!NameIndex) {
461 Out.Report("Function has no name", [&](raw_ostream &OS) {
462 OS << "error: function at " << HEX64(Die.getOffset())
463 << " has no name\n ";
465 });
466 break;
467 }
468 // All ranges for the subprogram DIE in case it has multiple. We need to
469 // pass this down into parseInlineInfo so we don't warn about inline
470 // ranges that are not in the current subrange of a function when they
471 // actually are in another subgrange. We do this because when a function
472 // has discontiguos ranges, we create multiple function entries with only
473 // the info for that range contained inside of it.
474 AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
475
476 // Create a function_info for each range
477 for (const DWARFAddressRange &Range : Ranges) {
478 // The low PC must be less than the high PC. Many linkers don't remove
479 // DWARF for functions that don't get linked into the final executable.
480 // If both the high and low pc have relocations, linkers will often set
481 // the address values for both to the same value to indicate the function
482 // has been remove. Other linkers have been known to set the one or both
483 // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
484 // byte addresses to indicate the function isn't valid. The check below
485 // tries to watch for these cases and abort if it runs into them.
486 if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
487 break;
488
489 // Many linkers can't remove DWARF and might set the LowPC to zero. Since
490 // high PC can be an offset from the low PC in more recent DWARF versions
491 // we need to watch for a zero'ed low pc which we do using ValidTextRanges
492 // below.
493 if (!Gsym.IsValidTextAddress(Range.LowPC)) {
494 // We expect zero and -1 to be invalid addresses in DWARF depending
495 // on the linker of the DWARF. This indicates a function was stripped
496 // and the debug info wasn't able to be stripped from the DWARF. If
497 // the LowPC isn't zero or -1, then we should emit an error.
498 if (Range.LowPC != 0) {
499 if (!Gsym.isQuiet()) {
500 // Unexpected invalid address, emit a warning
501 Out.Report("Address range starts outside executable section",
502 [&](raw_ostream &OS) {
503 OS << "warning: DIE has an address range whose "
504 "start address "
505 "is not in any executable sections ("
506 << *Gsym.GetValidTextRanges()
507 << ") and will not be processed:\n";
509 });
510 }
511 }
512 break;
513 }
514
515 FunctionInfo FI;
516 FI.Range = {Range.LowPC, Range.HighPC};
517 FI.Name = *NameIndex;
518 if (CUI.LineTable)
519 convertFunctionLineTable(Out, CUI, Die, Gsym, FI);
520
521 if (hasInlineInfo(Die, 0)) {
522 FI.Inline = InlineInfo();
523 FI.Inline->Name = *NameIndex;
524 FI.Inline->Ranges.insert(FI.Range);
525 bool WarnIfEmpty = true;
526 parseInlineInfo(Gsym, Out, CUI, Die, 0, FI, *FI.Inline,
527 AllSubprogramRanges, WarnIfEmpty);
528 // Make sure we at least got some valid inline info other than just
529 // the top level function. If we didn't then remove the inline info
530 // from the function info. We have seen cases where LTO tries to modify
531 // the DWARF for functions and it messes up the address ranges for
532 // the inline functions so it is no longer valid.
533 //
534 // By checking if there are any valid children on the top level inline
535 // information object, we will know if we got anything valid from the
536 // debug info.
537 if (FI.Inline->Children.empty()) {
538 if (WarnIfEmpty && !Gsym.isQuiet())
539 Out.Report("DIE contains inline functions with no valid ranges",
540 [&](raw_ostream &OS) {
541 OS << "warning: DIE contains inline function "
542 "information that has no valid ranges, removing "
543 "inline information:\n";
545 });
546 FI.Inline = std::nullopt;
547 }
548 }
549 Gsym.addFunctionInfo(std::move(FI));
550 }
551 } break;
552 default:
553 break;
554 }
555 for (DWARFDie ChildDie : Die.children())
556 handleDie(Out, CUI, ChildDie);
557}
558
560 size_t NumBefore = Gsym.getNumFunctionInfos();
561 auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
562 DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
563 if (DwarfUnit.getDWOId()) {
564 DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
565 if (!DWOCU->isDWOUnit())
566 Out.Report(
567 "warning: Unable to retrieve DWO .debug_info section for some "
568 "object files. (Remove the --quiet flag for full output)",
569 [&](raw_ostream &OS) {
570 std::string DWOName = dwarf::toString(
571 DwarfUnit.getUnitDIE().find(
572 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
573 "");
574 OS << "warning: Unable to retrieve DWO .debug_info section for "
575 << DWOName << "\n";
576 });
577 else {
578 ReturnDie = DWOCU->getUnitDIE(false);
579 }
580 }
581 return ReturnDie;
582 };
583 if (NumThreads == 1) {
584 // Parse all DWARF data from this thread, use the same string/file table
585 // for everything
586 for (const auto &CU : DICtx.compile_units()) {
587 DWARFDie Die = getDie(*CU);
588 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
589 handleDie(Out, CUI, Die);
590 }
591 } else {
592 // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
593 // front before we start accessing any DIEs since there might be
594 // cross compile unit references in the DWARF. If we don't do this we can
595 // end up crashing.
596
597 // We need to call getAbbreviations sequentially first so that getUnitDIE()
598 // only works with its local data.
599 for (const auto &CU : DICtx.compile_units())
600 CU->getAbbreviations();
601
602 // Now parse all DIEs in case we have cross compile unit references in a
603 // thread pool.
604 DefaultThreadPool pool(hardware_concurrency(NumThreads));
605 for (const auto &CU : DICtx.compile_units())
606 pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
607 pool.wait();
608
609 // Now convert all DWARF to GSYM in a thread pool.
610 std::mutex LogMutex;
611 for (const auto &CU : DICtx.compile_units()) {
612 DWARFDie Die = getDie(*CU);
613 if (Die) {
614 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
615 pool.async([this, CUI, &LogMutex, Out, Die]() mutable {
616 std::string storage;
617 raw_string_ostream StrStream(storage);
618 OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
619 handleDie(ThreadOut, CUI, Die);
620 // Print ThreadLogStorage lines into an actual stream under a lock
621 std::lock_guard<std::mutex> guard(LogMutex);
622 if (Out.GetOS()) {
623 StrStream.flush();
624 Out << storage;
625 }
626 Out.Merge(ThreadOut);
627 });
628 }
629 }
630 pool.wait();
631 }
632 size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
633 Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
634 return Error::success();
635}
636
638 OutputAggregator &Out) {
639 Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
640
641 auto Gsym = GsymReader::openFile(GsymPath);
642 if (!Gsym)
643 return Gsym.takeError();
644
645 auto NumAddrs = Gsym->getNumAddresses();
647 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
648 DILineInfoSpecifier::FunctionNameKind::LinkageName);
649 std::string gsymFilename;
650 for (uint32_t I = 0; I < NumAddrs; ++I) {
651 auto FuncAddr = Gsym->getAddress(I);
652 if (!FuncAddr)
653 return createStringError(std::errc::invalid_argument,
654 "failed to extract address[%i]", I);
655
656 auto FI = Gsym->getFunctionInfo(*FuncAddr);
657 if (!FI)
658 return createStringError(
659 std::errc::invalid_argument,
660 "failed to extract function info for address 0x%" PRIu64, *FuncAddr);
661
662 for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
663 const object::SectionedAddress SectAddr{
665 auto LR = Gsym->lookup(Addr);
666 if (!LR)
667 return LR.takeError();
668
669 auto DwarfInlineInfos =
670 DICtx.getInliningInfoForAddress(SectAddr, DLIS);
671 uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
672 if (NumDwarfInlineInfos == 0) {
673 DwarfInlineInfos.addFrame(
674 DICtx.getLineInfoForAddress(SectAddr, DLIS));
675 }
676
677 // Check for 1 entry that has no file and line info
678 if (NumDwarfInlineInfos == 1 &&
679 DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
680 DwarfInlineInfos = DIInliningInfo();
681 NumDwarfInlineInfos = 0;
682 }
683 if (NumDwarfInlineInfos > 0 &&
684 NumDwarfInlineInfos != LR->Locations.size()) {
685 if (Out.GetOS()) {
686 raw_ostream &Log = *Out.GetOS();
687 Log << "error: address " << HEX64(Addr) << " has "
688 << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
689 << LR->Locations.size() << "\n";
690 Log << " " << NumDwarfInlineInfos << " DWARF frames:\n";
691 for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
692 const auto &dii = DwarfInlineInfos.getFrame(Idx);
693 Log << " [" << Idx << "]: " << dii.FunctionName << " @ "
694 << dii.FileName << ':' << dii.Line << '\n';
695 }
696 Log << " " << LR->Locations.size() << " GSYM frames:\n";
697 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
698 ++Idx) {
699 const auto &gii = LR->Locations[Idx];
700 Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
701 << '/' << gii.Base << ':' << gii.Line << '\n';
702 }
703 DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS);
704 Gsym->dump(Log, *FI);
705 }
706 continue;
707 }
708
709 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
710 ++Idx) {
711 const auto &gii = LR->Locations[Idx];
712 if (Idx < NumDwarfInlineInfos) {
713 const auto &dii = DwarfInlineInfos.getFrame(Idx);
714 gsymFilename = LR->getSourceFile(Idx);
715 // Verify function name
716 if (dii.FunctionName.find(gii.Name.str()) != 0)
717 Out << "error: address " << HEX64(Addr) << " DWARF function \""
718 << dii.FunctionName.c_str()
719 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
720
721 // Verify source file path
722 if (dii.FileName != gsymFilename)
723 Out << "error: address " << HEX64(Addr) << " DWARF path \""
724 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
725 << gsymFilename.c_str() << "\"\n";
726 // Verify source file line
727 if (dii.Line != gii.Line)
728 Out << "error: address " << HEX64(Addr) << " DWARF line "
729 << dii.Line << " != GSYM line " << gii.Line << "\n";
730 }
731 }
732 }
733 }
734 return Error::success();
735}
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out, CUInfo &CUI, DWARFDie Die, uint32_t Depth, FunctionInfo &FI, InlineInfo &Parent, const AddressRanges &AllParentRanges, bool &WarnIfEmpty)
static bool hasInlineInfo(DWARFDie Die, uint32_t Depth)
static AddressRanges ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges)
static std::optional< uint32_t > getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym)
Get the GsymCreator string table offset for the qualified name for the DIE passed in.
static DWARFDie GetParentDeclContextDIE(DWARFDie &Die)
static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, DWARFDie Die, GsymCreator &Gsym, FunctionInfo &FI)
uint64_t Addr
std::string Name
#define HEX64(v)
Definition: ExtractRanges.h:20
#define HEX32(v)
Definition: ExtractRanges.h:19
#define I(x, y, z)
Definition: MD5.cpp:58
raw_pwrite_stream & OS
A class that represents an address range.
Definition: AddressRanges.h:22
uint64_t start() const
Definition: AddressRanges.h:28
bool contains(uint64_t Addr) const
Definition: AddressRanges.h:32
bool contains(uint64_t Addr) const
Definition: AddressRanges.h:66
The AddressRanges class helps normalize address range collections.
Collection::const_iterator insert(AddressRange Range)
A format-neutral container for inlined code description.
Definition: DIContext.h:92
DWARFContext This data structure is the top level entity that deals with dwarf debug information pars...
Definition: DWARFContext.h:48
DIInliningInfo getInliningInfoForAddress(object::SectionedAddress Address, DILineInfoSpecifier Specifier=DILineInfoSpecifier()) override
DILineInfo getLineInfoForAddress(object::SectionedAddress Address, DILineInfoSpecifier Specifier=DILineInfoSpecifier()) override
compile_unit_range compile_units()
Get compile units in this context.
Definition: DWARFContext.h:188
const DWARFDebugLine::LineTable * getLineTableForUnit(DWARFUnit *U)
Get a pointer to a parsed line table corresponding to a compile unit.
Utility class that carries the DWARF compile/type unit and the debug info entry in an object.
Definition: DWARFDie.h:42
uint64_t getOffset() const
Get the absolute offset into the debug info or types section.
Definition: DWARFDie.h:66
Expected< DWARFAddressRangesVector > getAddressRanges() const
Get the address ranges for this DIE.
Definition: DWARFDie.cpp:378
iterator_range< iterator > children() const
Definition: DWARFDie.h:395
DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const
Extract the specified attribute from this DIE as the referenced DIE.
Definition: DWARFDie.cpp:307
DWARFDie getParent() const
Get the parent of this DIE object.
Definition: DWARFDie.cpp:637
std::optional< DWARFFormValue > find(dwarf::Attribute Attr) const
Extract the specified attribute from this DIE.
Definition: DWARFDie.cpp:250
std::optional< DWARFFormValue > findRecursively(ArrayRef< dwarf::Attribute > Attrs) const
Extract the first value of any attribute in Attrs from this DIE and recurse into any DW_AT_specificat...
Definition: DWARFDie.cpp:274
const char * getName(DINameKind Kind) const
Return the DIE name resolving DW_AT_specification or DW_AT_abstract_origin references if necessary.
Definition: DWARFDie.cpp:445
std::string getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const
Definition: DWARFDie.cpp:477
dwarf::Tag getTag() const
Definition: DWARFDie.h:71
const char * getLinkageName() const
Return the DIE linkage name resolving DW_AT_specification or DW_AT_abstract_origin references if nece...
Definition: DWARFDie.cpp:463
void dump(raw_ostream &OS, unsigned indent=0, DIDumpOptions DumpOpts=DIDumpOptions()) const
Dump the DIE and all of its attributes to the supplied stream.
Definition: DWARFDie.cpp:577
DWARFDie getUnitDIE(bool ExtractUnitDIEOnly=true)
Definition: DWARFUnit.h:443
bool isDWOUnit() const
Definition: DWARFUnit.h:318
This dwarf writer support class manages information associated with a source file.
Definition: DwarfUnit.h:35
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:334
Tagged union holding either a T or a Error.
Definition: Error.h:474
Error takeError()
Take ownership of the stored error.
Definition: Error.h:601
reference get()
Returns a reference to the stored T value.
Definition: Error.h:571
A non-threaded implementation.
Definition: ThreadPool.h:218
void wait() override
Blocking wait for all the tasks to execute first.
Definition: ThreadPool.cpp:201
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:567
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
char back() const
back - Get the last character in the string.
Definition: StringRef.h:146
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
char front() const
front - Get the first character in the string.
Definition: StringRef.h:140
auto async(Function &&F, Args &&...ArgList)
Asynchronous submission of a task to the pool.
Definition: ThreadPool.h:78
llvm::Error convert(uint32_t NumThreads, OutputAggregator &OS)
Extract the DWARF from the supplied object file and convert it into the Gsym format in the GsymCreato...
llvm::Error verify(StringRef GsymPath, OutputAggregator &OS)
GsymCreator is used to emit GSYM data to a stand alone file or section within a file.
Definition: GsymCreator.h:134
void addFunctionInfo(FunctionInfo &&FI)
Add a function info to this GSYM creator.
uint32_t insertString(StringRef S, bool Copy=true)
Insert a string into the GSYM string table.
const std::optional< AddressRanges > GetValidTextRanges() const
Get the valid text ranges.
Definition: GsymCreator.h:397
bool isQuiet() const
Whether the transformation should be quiet, i.e. not output warnings.
Definition: GsymCreator.h:438
uint32_t insertFile(StringRef Path, sys::path::Style Style=sys::path::Style::native)
Insert a file into this GSYM creator.
Definition: GsymCreator.cpp:29
size_t getNumFunctionInfos() const
Get the current number of FunctionInfo objects contained in this object.
bool IsValidTextAddress(uint64_t Addr) const
Check if an address is a valid code address.
static llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
Definition: GsymReader.cpp:33
LineTable class contains deserialized versions of line tables for each function's address ranges.
Definition: LineTable.h:118
size_t size() const
Definition: LineTable.h:193
void Report(StringRef s, std::function< void(raw_ostream &o)> detailCallback)
raw_ostream * GetOS() const
void Merge(const OutputAggregator &other)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define UINT64_MAX
Definition: DataTypes.h:77
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
std::optional< uint64_t > toUnsigned(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract an unsigned constant.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition: Threading.h:185
std::vector< DWARFAddressRange > DWARFAddressRangesVector
DWARFAddressRangesVector - represents a set of absolute address ranges.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1258
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
std::function< Expected< AddStreamFn >(unsigned Task, StringRef Key, const Twine &ModuleName)> FileCache
This is the type of a file cache.
Definition: Caching.h:58
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1041
static DIDumpOptions getForSingleDIE()
Return default option set for printing a single DIE without children.
Definition: DIContext.h:214
Controls which fields of DILineInfo container should be filled with data.
Definition: DIContext.h:144
bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, std::vector< uint32_t > &Result) const
Standard .debug_line state machine structure.
object::SectionedAddress Address
The program-counter value corresponding to a machine instruction generated by the compiler and sectio...
const DWARFDebugLine::LineTable * LineTable
std::optional< uint32_t > DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx)
Convert a DWARF compile unit file index into a GSYM global file index.
CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU)
bool isHighestAddress(uint64_t Addr) const
Return true if Addr is the highest address for a given compile unit.
std::vector< uint32_t > FileCache
Function information in GSYM files encodes information for one contiguous address range.
Definition: FunctionInfo.h:88
std::optional< InlineInfo > Inline
Definition: FunctionInfo.h:92
uint64_t startAddress() const
Definition: FunctionInfo.h:185
uint64_t endAddress() const
Definition: FunctionInfo.h:186
uint64_t size() const
Definition: FunctionInfo.h:187
uint32_t Name
String table offset in the string table.
Definition: FunctionInfo.h:90
std::optional< LineTable > OptLineTable
Definition: FunctionInfo.h:91
Inline information stores the name of the inline function along with an array of address ranges.
Definition: InlineInfo.h:59
std::vector< InlineInfo > Children
Definition: InlineInfo.h:65
AddressRanges Ranges
Definition: InlineInfo.h:64
uint32_t CallFile
1 based file index in the file table.
Definition: InlineInfo.h:62
uint32_t CallLine
Source line number.
Definition: InlineInfo.h:63
uint32_t Name
String table offset in the string table.
Definition: InlineInfo.h:61
Line entries are used to encode the line tables in FunctionInfo objects.
Definition: LineEntry.h:22
static const uint64_t UndefSection
Definition: ObjectFile.h:146