LLVM 22.0.0git
Symbolize.cpp
Go to the documentation of this file.
1//===-- LLVMSymbolize.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation for LLVM symbolization library.
10//
11//===----------------------------------------------------------------------===//
12
14
15#include "llvm/ADT/STLExtras.h"
24#include "llvm/Object/Archive.h"
25#include "llvm/Object/BuildID.h"
26#include "llvm/Object/COFF.h"
28#include "llvm/Object/MachO.h"
30#include "llvm/Support/CRC.h"
33#include "llvm/Support/Errc.h"
36#include "llvm/Support/Path.h"
37#include <cassert>
38#include <cstring>
39
40namespace llvm {
41namespace codeview {
42union DebugInfo;
43}
44namespace symbolize {
45
47
49 : Opts(Opts),
50 BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {}
51
53
54template <typename T>
56LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
57 object::SectionedAddress ModuleOffset) {
58
59 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
60 if (!InfoOrErr)
61 return InfoOrErr.takeError();
62
63 SymbolizableModule *Info = *InfoOrErr;
64
65 // A null module means an error has already been reported. Return an empty
66 // result.
67 if (!Info)
68 return DILineInfo();
69
70 // If the user is giving us relative addresses, add the preferred base of the
71 // object to the offset before we do the query. It's what DIContext expects.
72 if (Opts.RelativeAddresses)
73 ModuleOffset.Address += Info->getModulePreferredBase();
74
75 DILineInfo LineInfo = Info->symbolizeCode(
76 ModuleOffset,
78 Opts.SkipLineZero),
79 Opts.UseSymbolTable);
80 if (Opts.Demangle)
81 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
82 return LineInfo;
83}
84
87 object::SectionedAddress ModuleOffset) {
88 return symbolizeCodeCommon(Obj, ModuleOffset);
89}
90
93 object::SectionedAddress ModuleOffset) {
94 return symbolizeCodeCommon(ModuleName, ModuleOffset);
95}
96
99 object::SectionedAddress ModuleOffset) {
100 return symbolizeCodeCommon(BuildID, ModuleOffset);
101}
102
103template <typename T>
104Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
105 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
106 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
107 if (!InfoOrErr)
108 return InfoOrErr.takeError();
109
110 SymbolizableModule *Info = *InfoOrErr;
111
112 // A null module means an error has already been reported. Return an empty
113 // result.
114 if (!Info)
115 return DIInliningInfo();
116
117 // If the user is giving us relative addresses, add the preferred base of the
118 // object to the offset before we do the query. It's what DIContext expects.
119 if (Opts.RelativeAddresses)
120 ModuleOffset.Address += Info->getModulePreferredBase();
121
122 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
123 ModuleOffset,
125 Opts.SkipLineZero),
126 Opts.UseSymbolTable);
127 if (Opts.Demangle) {
128 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
129 auto *Frame = InlinedContext.getMutableFrame(i);
130 Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
131 }
132 }
133 return InlinedContext;
134}
135
136Expected<DIInliningInfo>
138 object::SectionedAddress ModuleOffset) {
139 return symbolizeInlinedCodeCommon(Obj, ModuleOffset);
140}
141
144 object::SectionedAddress ModuleOffset) {
145 return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
146}
147
150 object::SectionedAddress ModuleOffset) {
151 return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
152}
153
154template <typename T>
156LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
157 object::SectionedAddress ModuleOffset) {
158
159 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
160 if (!InfoOrErr)
161 return InfoOrErr.takeError();
162
163 SymbolizableModule *Info = *InfoOrErr;
164 // A null module means an error has already been reported. Return an empty
165 // result.
166 if (!Info)
167 return DIGlobal();
168
169 // If the user is giving us relative addresses, add the preferred base of
170 // the object to the offset before we do the query. It's what DIContext
171 // expects.
172 if (Opts.RelativeAddresses)
173 ModuleOffset.Address += Info->getModulePreferredBase();
174
175 DIGlobal Global = Info->symbolizeData(ModuleOffset);
176 if (Opts.Demangle)
177 Global.Name = DemangleName(Global.Name, Info);
178 return Global;
179}
180
183 object::SectionedAddress ModuleOffset) {
184 return symbolizeDataCommon(Obj, ModuleOffset);
185}
186
189 object::SectionedAddress ModuleOffset) {
190 return symbolizeDataCommon(ModuleName, ModuleOffset);
191}
192
195 object::SectionedAddress ModuleOffset) {
196 return symbolizeDataCommon(BuildID, ModuleOffset);
197}
198
199template <typename T>
201LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
202 object::SectionedAddress ModuleOffset) {
203 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
204 if (!InfoOrErr)
205 return InfoOrErr.takeError();
206
207 SymbolizableModule *Info = *InfoOrErr;
208 // A null module means an error has already been reported. Return an empty
209 // result.
210 if (!Info)
211 return std::vector<DILocal>();
212
213 // If the user is giving us relative addresses, add the preferred base of
214 // the object to the offset before we do the query. It's what DIContext
215 // expects.
216 if (Opts.RelativeAddresses)
217 ModuleOffset.Address += Info->getModulePreferredBase();
218
219 return Info->symbolizeFrame(ModuleOffset);
220}
221
224 object::SectionedAddress ModuleOffset) {
225 return symbolizeFrameCommon(Obj, ModuleOffset);
226}
227
230 object::SectionedAddress ModuleOffset) {
231 return symbolizeFrameCommon(ModuleName, ModuleOffset);
232}
233
236 object::SectionedAddress ModuleOffset) {
237 return symbolizeFrameCommon(BuildID, ModuleOffset);
238}
239
240template <typename T>
242LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
244 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
245 if (!InfoOrErr)
246 return InfoOrErr.takeError();
247
248 SymbolizableModule *Info = *InfoOrErr;
249 std::vector<DILineInfo> Result;
250
251 // A null module means an error has already been reported. Return an empty
252 // result.
253 if (!Info)
254 return Result;
255
256 for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
257 DILineInfo LineInfo = Info->symbolizeCode(
259 Opts.UseSymbolTable);
260 if (LineInfo.FileName != DILineInfo::BadString) {
261 if (Opts.Demangle)
262 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
263 Result.push_back(std::move(LineInfo));
264 }
265 }
266
267 return Result;
268}
269
270Expected<std::vector<DILineInfo>>
273 return findSymbolCommon(Obj, Symbol, Offset);
274}
275
279 return findSymbolCommon(ModuleName, Symbol, Offset);
280}
281
285 return findSymbolCommon(BuildID, Symbol, Offset);
286}
287
289 ObjectFileCache.clear();
290 LRUBinaries.clear();
291 CacheSize = 0;
292 BinaryForPath.clear();
293 ObjectPairForPathArch.clear();
294 Modules.clear();
295 BuildIDPaths.clear();
296}
297
298namespace {
299
300// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
301// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
302// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
303// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
304std::string getDarwinDWARFResourceForPath(const std::string &Path,
305 const std::string &Basename) {
306 SmallString<16> ResourceName = StringRef(Path);
307 if (sys::path::extension(Path) != ".dSYM") {
308 ResourceName += ".dSYM";
309 }
310 sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
311 sys::path::append(ResourceName, Basename);
312 return std::string(ResourceName);
313}
314
315bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
316 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
318 if (!MB)
319 return false;
320 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
321}
322
323bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
324 uint32_t &CRCHash) {
325 if (!Obj)
326 return false;
327 for (const SectionRef &Section : Obj->sections()) {
328 StringRef Name;
329 consumeError(Section.getName().moveInto(Name));
330
331 Name = Name.substr(Name.find_first_not_of("._"));
332 if (Name == "gnu_debuglink") {
333 Expected<StringRef> ContentsOrErr = Section.getContents();
334 if (!ContentsOrErr) {
335 consumeError(ContentsOrErr.takeError());
336 return false;
337 }
338 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
339 uint64_t Offset = 0;
340 if (const char *DebugNameStr = DE.getCStr(&Offset)) {
341 // 4-byte align the offset.
342 Offset = (Offset + 3) & ~0x3;
343 if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
344 DebugName = DebugNameStr;
345 CRCHash = DE.getU32(&Offset);
346 return true;
347 }
348 }
349 break;
350 }
351 }
352 return false;
353}
354
355bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
356 const MachOObjectFile *Obj) {
357 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
358 ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
359 if (dbg_uuid.empty() || bin_uuid.empty())
360 return false;
361 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
362}
363
364} // end anonymous namespace
365
366ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
367 const MachOObjectFile *MachExeObj,
368 const std::string &ArchName) {
369 // On Darwin we may find DWARF in separate object file in
370 // resource directory.
371 std::vector<std::string> DsymPaths;
372 StringRef Filename = sys::path::filename(ExePath);
373 DsymPaths.push_back(
374 getDarwinDWARFResourceForPath(ExePath, std::string(Filename)));
375 for (const auto &Path : Opts.DsymHints) {
376 DsymPaths.push_back(
377 getDarwinDWARFResourceForPath(Path, std::string(Filename)));
378 }
379 for (const auto &Path : DsymPaths) {
380 auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
381 if (!DbgObjOrErr) {
382 // Ignore errors, the file might not exist.
383 consumeError(DbgObjOrErr.takeError());
384 continue;
385 }
386 ObjectFile *DbgObj = DbgObjOrErr.get();
387 if (!DbgObj)
388 continue;
389 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
390 if (!MachDbgObj)
391 continue;
392 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
393 return DbgObj;
394 }
395 return nullptr;
396}
397
398ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
399 const ObjectFile *Obj,
400 const std::string &ArchName) {
401 std::string DebuglinkName;
402 uint32_t CRCHash;
403 std::string DebugBinaryPath;
404 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
405 return nullptr;
406 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
407 return nullptr;
408 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
409 if (!DbgObjOrErr) {
410 // Ignore errors, the file might not exist.
411 consumeError(DbgObjOrErr.takeError());
412 return nullptr;
413 }
414 return DbgObjOrErr.get();
415}
416
417ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
418 const ELFObjectFileBase *Obj,
419 const std::string &ArchName) {
420 auto BuildID = getBuildID(Obj);
421 if (BuildID.size() < 2)
422 return nullptr;
423 std::string DebugBinaryPath;
424 if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
425 return nullptr;
426 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
427 if (!DbgObjOrErr) {
428 consumeError(DbgObjOrErr.takeError());
429 return nullptr;
430 }
431 return DbgObjOrErr.get();
432}
433
434bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
435 const std::string &DebuglinkName,
436 uint32_t CRCHash, std::string &Result) {
437 SmallString<16> OrigDir(OrigPath);
439 SmallString<16> DebugPath = OrigDir;
440 // Try relative/path/to/original_binary/debuglink_name
441 llvm::sys::path::append(DebugPath, DebuglinkName);
442 if (checkFileCRC(DebugPath, CRCHash)) {
443 Result = std::string(DebugPath);
444 return true;
445 }
446 // Try relative/path/to/original_binary/.debug/debuglink_name
447 DebugPath = OrigDir;
448 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
449 if (checkFileCRC(DebugPath, CRCHash)) {
450 Result = std::string(DebugPath);
451 return true;
452 }
453 // Make the path absolute so that lookups will go to
454 // "/usr/lib/debug/full/path/to/debug", not
455 // "/usr/lib/debug/to/debug"
457 if (!Opts.FallbackDebugPath.empty()) {
458 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
459 DebugPath = Opts.FallbackDebugPath;
460 } else {
461#if defined(__NetBSD__)
462 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
463 DebugPath = "/usr/libdata/debug";
464#else
465 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
466 DebugPath = "/usr/lib/debug";
467#endif
468 }
470 DebuglinkName);
471 if (checkFileCRC(DebugPath, CRCHash)) {
472 Result = std::string(DebugPath);
473 return true;
474 }
475 return false;
476}
477
479 return StringRef(reinterpret_cast<const char *>(BuildID.data()),
480 BuildID.size());
481}
482
483bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
484 std::string &Result) {
485 StringRef BuildIDStr = getBuildIDStr(BuildID);
486 auto I = BuildIDPaths.find(BuildIDStr);
487 if (I != BuildIDPaths.end()) {
488 Result = I->second;
489 return true;
490 }
491 if (!BIDFetcher)
492 return false;
493 if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) {
494 Result = *Path;
495 auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
496 assert(InsertResult.second);
497 (void)InsertResult;
498 return true;
499 }
500
501 return false;
502}
503
504std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) {
505 if (Opts.DisableGsym)
506 return {};
507
508 auto CheckGsymFile = [](const llvm::StringRef &GsymPath) {
509 sys::fs::file_status Status;
510 std::error_code EC = llvm::sys::fs::status(GsymPath, Status);
511 return !EC && !llvm::sys::fs::is_directory(Status);
512 };
513
514 // First, look beside the binary file
515 if (const auto GsymPath = Path + ".gsym"; CheckGsymFile(GsymPath))
516 return GsymPath;
517
518 // Then, look in the directories specified by GsymFileDirectory
519
520 for (const auto &Directory : Opts.GsymFileDirectory) {
521 SmallString<16> GsymPath = llvm::StringRef{Directory};
523 llvm::sys::path::filename(Path) + ".gsym");
524
525 if (CheckGsymFile(GsymPath))
526 return static_cast<std::string>(GsymPath);
527 }
528
529 return {};
530}
531
532Expected<LLVMSymbolizer::ObjectPair>
533LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
534 const std::string &ArchName) {
535 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
536 if (I != ObjectPairForPathArch.end()) {
537 recordAccess(BinaryForPath.find(Path)->second);
538 return I->second;
539 }
540
541 auto ObjOrErr = getOrCreateObject(Path, ArchName);
542 if (!ObjOrErr) {
543 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
544 ObjectPair(nullptr, nullptr));
545 return ObjOrErr.takeError();
546 }
547
548 ObjectFile *Obj = ObjOrErr.get();
549 assert(Obj != nullptr);
550 ObjectFile *DbgObj = nullptr;
551
552 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
553 DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
554 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
555 DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
556 if (!DbgObj)
557 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
558 if (!DbgObj)
559 DbgObj = Obj;
560 ObjectPair Res = std::make_pair(Obj, DbgObj);
561 auto Pair =
562 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
563 std::string FullDbgObjKey;
564 auto It = ObjectToArchivePath.find(DbgObj);
565 if (It != ObjectToArchivePath.end()) {
566 StringRef ArchivePath = It->second;
567 StringRef MemberName = sys::path::filename(DbgObj->getFileName());
568 FullDbgObjKey = (ArchivePath + "(" + MemberName + ")").str();
569 } else {
570 FullDbgObjKey = DbgObj->getFileName().str();
571 }
572 BinaryForPath.find(FullDbgObjKey)
573 ->second.pushEvictor(
574 [this, I = Pair.first]() { ObjectPairForPathArch.erase(I); });
575 return Res;
576}
577
578Expected<object::Binary *>
579LLVMSymbolizer::loadOrGetBinary(const std::string &ArchivePathKey,
580 std::optional<StringRef> FullPathKey) {
581 // If no separate cache key is provided, use the archive path itself.
582 std::string FullPathKeyStr =
583 FullPathKey ? FullPathKey->str() : ArchivePathKey;
584 auto Pair = BinaryForPath.emplace(FullPathKeyStr, OwningBinary<Binary>());
585 if (!Pair.second) {
586 recordAccess(Pair.first->second);
587 return Pair.first->second->getBinary();
588 }
589
590 Expected<OwningBinary<Binary>> BinOrErr = createBinary(ArchivePathKey);
591 if (!BinOrErr)
592 return BinOrErr.takeError();
593
594 CachedBinary &CachedBin = Pair.first->second;
595 CachedBin = std::move(*BinOrErr);
596 CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
597 LRUBinaries.push_back(CachedBin);
598 CacheSize += CachedBin.size();
599 return CachedBin->getBinary();
600}
601
602Expected<ObjectFile *> LLVMSymbolizer::findOrCacheObject(
603 const ContainerCacheKey &Key,
604 llvm::function_ref<Expected<std::unique_ptr<ObjectFile>>()> Loader,
605 const std::string &PathForBinaryCache) {
606 auto It = ObjectFileCache.find(Key);
607 if (It != ObjectFileCache.end())
608 return It->second.get();
609
610 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = Loader();
611 if (!ObjOrErr) {
612 ObjectFileCache.emplace(Key, std::unique_ptr<ObjectFile>());
613 return ObjOrErr.takeError();
614 }
615
616 ObjectFile *Res = ObjOrErr->get();
617 auto NewEntry = ObjectFileCache.emplace(Key, std::move(*ObjOrErr));
618 auto CacheIter = BinaryForPath.find(PathForBinaryCache);
619 if (CacheIter != BinaryForPath.end())
620 CacheIter->second.pushEvictor(
621 [this, Iter = NewEntry.first]() { ObjectFileCache.erase(Iter); });
622 return Res;
623}
624
625Expected<ObjectFile *> LLVMSymbolizer::getOrCreateObjectFromArchive(
626 StringRef ArchivePath, StringRef MemberName, StringRef ArchName,
627 StringRef FullPath) {
628 Expected<object::Binary *> BinOrErr =
629 loadOrGetBinary(ArchivePath.str(), FullPath);
630 if (!BinOrErr)
631 return BinOrErr.takeError();
632 object::Binary *Bin = *BinOrErr;
633
635 if (!Archive)
636 return createStringError(std::errc::invalid_argument,
637 "'%s' is not a valid archive",
638 ArchivePath.str().c_str());
639
640 Error Err = Error::success();
641 for (auto &Child : Archive->children(Err, /*SkipInternal=*/true)) {
642 Expected<StringRef> NameOrErr = Child.getName();
643 if (!NameOrErr) {
644 // TODO: Report this as a warning to the client. Consider adding a
645 // callback mechanism to report warning-level issues.
646 consumeError(NameOrErr.takeError());
647 continue;
648 }
649 if (*NameOrErr == MemberName) {
650 Expected<std::unique_ptr<object::Binary>> MemberOrErr =
651 Child.getAsBinary();
652 if (!MemberOrErr) {
653 // TODO: Report this as a warning to the client. Consider adding a
654 // callback mechanism to report warning-level issues.
655 consumeError(MemberOrErr.takeError());
656 continue;
657 }
658
659 std::unique_ptr<object::Binary> Binary = std::move(*MemberOrErr);
660 if (auto *Obj = dyn_cast<object::ObjectFile>(Binary.get())) {
661 ObjectToArchivePath[Obj] = ArchivePath.str();
662 Triple::ArchType ObjArch = Obj->makeTriple().getArch();
663 Triple RequestedTriple;
664 RequestedTriple.setArch(Triple::getArchTypeForLLVMName(ArchName));
665 if (ObjArch != RequestedTriple.getArch())
666 continue;
667
668 ContainerCacheKey CacheKey{ArchivePath.str(), MemberName.str(),
669 ArchName.str()};
670 Expected<ObjectFile *> Res = findOrCacheObject(
671 CacheKey,
672 [O = std::unique_ptr<ObjectFile>(
673 Obj)]() mutable -> Expected<std::unique_ptr<ObjectFile>> {
674 return std::move(O);
675 },
676 ArchivePath.str());
677 Binary.release();
678 return Res;
679 }
680 }
681 }
682 if (Err)
683 return std::move(Err);
684 return createStringError(std::errc::invalid_argument,
685 "no matching member '%s' with arch '%s' in '%s'",
686 MemberName.str().c_str(), ArchName.str().c_str(),
687 ArchivePath.str().c_str());
688}
689
690Expected<ObjectFile *>
691LLVMSymbolizer::getOrCreateObject(const std::string &Path,
692 const std::string &ArchName) {
693 // First check for archive(member) format - more efficient to check closing
694 // paren first.
695 if (!Path.empty() && Path.back() == ')') {
696 size_t OpenParen = Path.rfind('(', Path.size() - 1);
697 if (OpenParen != std::string::npos) {
698 StringRef ArchivePath = StringRef(Path).substr(0, OpenParen);
699 StringRef MemberName =
700 StringRef(Path).substr(OpenParen + 1, Path.size() - OpenParen - 2);
701 return getOrCreateObjectFromArchive(ArchivePath, MemberName, ArchName,
702 Path);
703 }
704 }
705
706 Expected<object::Binary *> BinOrErr = loadOrGetBinary(Path);
707 if (!BinOrErr)
708 return BinOrErr.takeError();
709 object::Binary *Bin = *BinOrErr;
710
711 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
712 ContainerCacheKey CacheKey{Path, "", ArchName};
713 return findOrCacheObject(
714 CacheKey,
715 [UB, ArchName]() -> Expected<std::unique_ptr<ObjectFile>> {
716 return UB->getMachOObjectForArch(ArchName);
717 },
718 Path);
719 }
720 if (Bin->isObject()) {
721 return cast<ObjectFile>(Bin);
722 }
723 return errorCodeToError(object_error::arch_not_found);
724}
725
726Expected<SymbolizableModule *>
727LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
728 std::unique_ptr<DIContext> Context,
729 StringRef ModuleName) {
730 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context),
731 Opts.UntagAddresses);
732 std::unique_ptr<SymbolizableModule> SymMod;
733 if (InfoOrErr)
734 SymMod = std::move(*InfoOrErr);
735 auto InsertResult = Modules.insert(
736 std::make_pair(std::string(ModuleName), std::move(SymMod)));
737 assert(InsertResult.second);
738 if (!InfoOrErr)
739 return InfoOrErr.takeError();
740 return InsertResult.first->second.get();
741}
742
743Expected<SymbolizableModule *>
745 StringRef BinaryName = ModuleName;
746 StringRef ArchName = Opts.DefaultArch;
747 size_t ColonPos = ModuleName.find_last_of(':');
748 // Verify that substring after colon form a valid arch name.
749 if (ColonPos != std::string::npos) {
750 StringRef ArchStr = ModuleName.substr(ColonPos + 1);
751 if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
752 BinaryName = ModuleName.substr(0, ColonPos);
753 ArchName = ArchStr;
754 }
755 }
756
757 auto I = Modules.find(ModuleName);
758 if (I != Modules.end()) {
759 recordAccess(BinaryForPath.find(BinaryName)->second);
760 return I->second.get();
761 }
762
763 auto ObjectsOrErr =
764 getOrCreateObjectPair(std::string{BinaryName}, std::string{ArchName});
765 if (!ObjectsOrErr) {
766 // Failed to find valid object file.
767 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
768 return ObjectsOrErr.takeError();
769 }
770 ObjectPair Objects = ObjectsOrErr.get();
771
772 std::unique_ptr<DIContext> Context;
773 // If this is a COFF object containing PDB info and not containing DWARF
774 // section, use a PDBContext to symbolize. Otherwise, use DWARF.
775 // Create a DIContext to symbolize as follows:
776 // - If there is a GSYM file, create a GsymContext.
777 // - Otherwise, if this is a COFF object containing PDB info, create a
778 // PDBContext.
779 // - Otherwise, create a DWARFContext.
780 const auto GsymFile = lookUpGsymFile(BinaryName.str());
781 if (!GsymFile.empty()) {
782 auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);
783
784 if (ReaderOrErr) {
785 std::unique_ptr<gsym::GsymReader> Reader =
786 std::make_unique<gsym::GsymReader>(std::move(*ReaderOrErr));
787
788 Context = std::make_unique<gsym::GsymContext>(std::move(Reader));
789 }
790 }
791 if (!Context) {
792 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
794 StringRef PDBFileName;
795 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
796 // Use DWARF if there're DWARF sections.
797 bool HasDwarf = llvm::any_of(
798 Objects.first->sections(), [](SectionRef Section) -> bool {
799 if (Expected<StringRef> SectionName = Section.getName())
800 return SectionName.get() == ".debug_info";
801 return false;
802 });
803 if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
804 using namespace pdb;
805 std::unique_ptr<IPDBSession> Session;
806
807 PDB_ReaderType ReaderType =
808 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
809 if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
810 Session)) {
811 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
812 // Return along the PDB filename to provide more context
813 return createFileError(PDBFileName, std::move(Err));
814 }
815 Context.reset(new PDBContext(*CoffObject, std::move(Session)));
816 }
817 }
818 }
819 if (!Context)
820 Context = DWARFContext::create(
822 nullptr, Opts.DWPName);
823 auto ModuleOrErr =
824 createModuleInfo(Objects.first, std::move(Context), ModuleName);
825 if (ModuleOrErr) {
826 auto I = Modules.find(ModuleName);
827 BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
828 Modules.erase(I);
829 });
830 }
831 return ModuleOrErr;
832}
833
834// For BPF programs .BTF.ext section contains line numbers information,
835// use it if regular DWARF is not available (e.g. for stripped binary).
836static bool useBTFContext(const ObjectFile &Obj) {
837 return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
839}
840
843 StringRef ObjName = Obj.getFileName();
844 auto I = Modules.find(ObjName);
845 if (I != Modules.end())
846 return I->second.get();
847
848 std::unique_ptr<DIContext> Context;
849 if (useBTFContext(Obj))
850 Context = BTFContext::create(Obj);
851 else
852 Context = DWARFContext::create(Obj);
853 // FIXME: handle COFF object with PDB info to use PDBContext
854 return createModuleInfo(&Obj, std::move(Context), ObjName);
855}
856
857Expected<SymbolizableModule *>
859 std::string Path;
860 if (!getOrFindDebugBinary(BuildID, Path)) {
862 "could not find build ID");
863 }
864 return getOrCreateModuleInfo(Path);
865}
866
867namespace {
868
869// Undo these various manglings for Win32 extern "C" functions:
870// cdecl - _foo
871// stdcall - _foo@12
872// fastcall - @foo@12
873// vectorcall - foo@@12
874// These are all different linkage names for 'foo'.
875StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
876 char Front = SymbolName.empty() ? '\0' : SymbolName[0];
877
878 // Remove any '@[0-9]+' suffix.
879 bool HasAtNumSuffix = false;
880 if (Front != '?') {
881 size_t AtPos = SymbolName.rfind('@');
882 if (AtPos != StringRef::npos &&
883 all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) {
884 SymbolName = SymbolName.substr(0, AtPos);
885 HasAtNumSuffix = true;
886 }
887 }
888
889 // Remove any ending '@' for vectorcall.
890 bool IsVectorCall = false;
891 if (HasAtNumSuffix && SymbolName.ends_with("@")) {
892 SymbolName = SymbolName.drop_back();
893 IsVectorCall = true;
894 }
895
896 // If not vectorcall, remove any '_' or '@' prefix.
897 if (!IsVectorCall && (Front == '_' || Front == '@'))
898 SymbolName = SymbolName.drop_front();
899
900 return SymbolName;
901}
902
903} // end anonymous namespace
904
905std::string
907 const SymbolizableModule *DbiModuleDescriptor) {
908 std::string Result;
909 if (nonMicrosoftDemangle(Name, Result))
910 return Result;
911
912 if (Name.starts_with('?')) {
913 // Only do MSVC C++ demangling on symbols starting with '?'.
914 int status = 0;
915 char *DemangledName = microsoftDemangle(
916 Name, nullptr, &status,
919 if (status != 0)
920 return std::string{Name};
921 Result = DemangledName;
922 free(DemangledName);
923 return Result;
924 }
925
926 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
927 std::string DemangledCName(demanglePE32ExternCFunc(Name));
928 // On i386 Windows, the C name mangling for different calling conventions
929 // may also be applied on top of the Itanium or Rust name mangling.
930 if (nonMicrosoftDemangle(DemangledCName, Result))
931 return Result;
932 return DemangledCName;
933 }
934 return std::string{Name};
935}
936
937void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
938 if (Bin->getBinary())
939 LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());
940}
941
943 // Evict the LRU binary until the max cache size is reached or there's <= 1
944 // item in the cache. The MRU binary is always kept to avoid thrashing if it's
945 // larger than the cache size.
946 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
947 std::next(LRUBinaries.begin()) != LRUBinaries.end()) {
948 CachedBinary &Bin = LRUBinaries.front();
949 CacheSize -= Bin.size();
950 LRUBinaries.pop_front();
951 Bin.evict();
952 }
953}
954
955void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
956 if (Evictor) {
957 this->Evictor = [OldEvictor = std::move(this->Evictor),
958 NewEvictor = std::move(NewEvictor)]() {
959 NewEvictor();
960 OldEvictor();
961 };
962 } else {
963 this->Evictor = std::move(NewEvictor);
964 }
965}
966
967} // namespace symbolize
968} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a library for handling Build IDs and using them to find debug info.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define I(x, y, z)
Definition MD5.cpp:57
Merge contiguous icmps into a memcmp
#define T
This file contains some templates that are useful if you are working with the STL at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
static std::unique_ptr< BTFContext > create(const object::ObjectFile &Obj, std::function< void(Error)> ErrorHandler=WithColor::defaultErrorHandler)
static LLVM_ABI bool hasBTFSections(const ObjectFile &Obj)
A format-neutral container for inlined code description.
Definition DIContext.h:94
static std::unique_ptr< DWARFContext > create(const object::ObjectFile &Obj, ProcessDebugRelocations RelocAction=ProcessDebugRelocations::Process, const LoadedObjectInfo *L=nullptr, std::string DWPName="", std::function< void(Error)> RecoverableErrorHandler=WithColor::defaultErrorHandler, std::function< void(Error)> WarningHandler=WithColor::defaultWarningHandler, bool ThreadSafe=false)
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileOrSTDIN(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, or open stdin if the Filename is "-".
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
iterator end()
Definition StringMap.h:224
iterator find(StringRef Key)
Definition StringMap.h:237
bool insert(MapEntryTy *KeyValue)
insert - Insert the specified key/value pair into the map.
Definition StringMap.h:321
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
@ UnknownArch
Definition Triple.h:50
static LLVM_ABI ArchType getArchTypeForLLVMName(StringRef Str)
The canonical type for the given LLVM architecture name (e.g., "x86").
Definition Triple.cpp:446
static LLVM_ABI llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
BuildIDFetcher searches local cache directories for debug info.
Definition BuildID.h:40
This class is the base class for all object file types.
Definition ObjectFile.h:231
This is a value type class that represents a single section in the list of sections in the object fil...
Definition ObjectFile.h:83
LLVM_ABI void pushEvictor(std::function< void()> Evictor)
static LLVM_ABI std::string DemangleName(StringRef Name, const SymbolizableModule *DbiModuleDescriptor)
LLVM_ABI Expected< std::vector< DILineInfo > > findSymbol(const ObjectFile &Obj, StringRef Symbol, uint64_t Offset)
LLVM_ABI Expected< DIInliningInfo > symbolizeInlinedCode(const ObjectFile &Obj, object::SectionedAddress ModuleOffset)
LLVM_ABI Expected< DILineInfo > symbolizeCode(const ObjectFile &Obj, object::SectionedAddress ModuleOffset)
Definition Symbolize.cpp:86
LLVM_ABI Expected< DIGlobal > symbolizeData(const ObjectFile &Obj, object::SectionedAddress ModuleOffset)
LLVM_ABI Expected< std::vector< DILocal > > symbolizeFrame(const ObjectFile &Obj, object::SectionedAddress ModuleOffset)
LLVM_ABI Expected< SymbolizableModule * > getOrCreateModuleInfo(StringRef ModuleName)
Returns a SymbolizableModule or an error if loading debug info failed.
virtual bool isWin32Module() const =0
static Expected< std::unique_ptr< SymbolizableObjectFile > > create(const object::ObjectFile *Obj, std::unique_ptr< DIContext > DICtx, bool UntagAddresses)
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
SmallVector< uint8_t, 10 > BuildID
A build ID in binary form.
Definition BuildID.h:26
LLVM_ABI BuildIDRef getBuildID(const ObjectFile *Obj)
Returns the build ID, if any, contained in the given object file.
Definition BuildID.cpp:70
LLVM_ABI Expected< std::unique_ptr< Binary > > createBinary(MemoryBufferRef Source, LLVMContext *Context=nullptr, bool InitContent=true)
Create a Binary from Source, autodetecting the file type.
Definition Binary.cpp:45
static bool useBTFContext(const ObjectFile &Obj)
static StringRef getBuildIDStr(ArrayRef< uint8_t > BuildID)
LLVM_ABI std::error_code make_absolute(SmallVectorImpl< char > &path)
Make path an absolute path.
Definition Path.cpp:958
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI bool is_directory(const basic_file_status &status)
Does status represent a directory?
Definition Path.cpp:1101
LLVM_ABI void remove_filename(SmallVectorImpl< char > &path, Style style=Style::native)
Remove the last component from path unless it is the root dir.
Definition Path.cpp:475
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:578
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
LLVM_ABI StringRef extension(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get extension.
Definition Path.cpp:591
LLVM_ABI StringRef relative_path(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get relative path.
Definition Path.cpp:414
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
DEMANGLE_ABI bool nonMicrosoftDemangle(std::string_view MangledName, std::string &Result, bool CanHaveLeadingDot=true, bool ParseParams=true)
Definition Demangle.cpp:50
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
constexpr bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ no_such_file_or_directory
Definition Errc.h:65
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
@ Global
Append to llvm.global_dtors.
LLVM_ABI uint32_t crc32(ArrayRef< uint8_t > Data)
Definition CRC.cpp:101
DEMANGLE_ABI char * microsoftDemangle(std::string_view mangled_name, size_t *n_read, int *status, MSDemangleFlags Flags=MSDF_None)
Demangles the Microsoft symbol pointed at by mangled_name and returns it.
MSDemangleFlags
Definition Demangle.h:40
@ MSDF_NoReturnType
Definition Demangle.h:45
@ MSDF_NoMemberType
Definition Demangle.h:46
@ MSDF_NoCallingConvention
Definition Demangle.h:44
@ MSDF_NoAccessSpecifier
Definition Demangle.h:43
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1748
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
Container for description of a global variable.
Definition DIContext.h:120
Controls which fields of DILineInfo container should be filled with data.
Definition DIContext.h:146
A format-neutral container for source line information.
Definition DIContext.h:32
static constexpr const char *const BadString
Definition DIContext.h:35