LLVM 23.0.0git
LVBinaryReader.cpp
Go to the documentation of this file.
1//===-- LVBinaryReader.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the LVBinaryReader class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/Support/Errc.h"
17
18using namespace llvm;
19using namespace llvm::logicalview;
20
21#define DEBUG_TYPE "BinaryReader"
22
23// Function names extracted from the object symbol table.
25 LVSectionIndex SectionIndex) {
26 std::string SymbolName(Name);
27 auto [It, Inserted] =
28 SymbolNames.try_emplace(SymbolName, Function, 0, SectionIndex, false);
29 if (!Inserted) {
30 // Update a recorded entry with its logical scope and section index.
31 It->second.Scope = Function;
32 if (SectionIndex)
33 It->second.SectionIndex = SectionIndex;
34 }
35
36 if (Function && It->second.IsComdat)
37 Function->setIsComdat();
38
39 LLVM_DEBUG({ print(dbgs()); });
40}
41
43 LVSectionIndex SectionIndex, bool IsComdat) {
44 std::string SymbolName(Name);
45 auto [It, Inserted] = SymbolNames.try_emplace(SymbolName, nullptr, Address,
46 SectionIndex, IsComdat);
47 if (!Inserted)
48 // Update a recorded symbol name with its logical scope.
49 It->second.Address = Address;
50
51 LVScope *Function = It->second.Scope;
52 if (Function && IsComdat)
53 Function->setIsComdat();
54 LLVM_DEBUG({ print(dbgs()); });
55}
56
59 StringRef Name = Function->getLinkageName();
60 if (Name.empty())
61 Name = Function->getName();
62 std::string SymbolName(Name);
63
64 if (SymbolName.empty())
65 return SectionIndex;
66
67 auto It = SymbolNames.find(SymbolName);
68 if (It == SymbolNames.end())
69 return SectionIndex;
70
71 // Update a recorded entry with its logical scope, only if the scope has
72 // ranges. That is the case when in DWARF there are 2 DIEs connected via
73 // the DW_AT_specification.
74 if (Function->getHasRanges()) {
75 It->second.Scope = Function;
76 SectionIndex = It->second.SectionIndex;
77 } else {
78 SectionIndex = UndefinedSectionIndex;
79 }
80
81 if (It->second.IsComdat)
82 Function->setIsComdat();
83
84 LLVM_DEBUG({ print(dbgs()); });
85 return SectionIndex;
86}
87
90 LVSymbolNames::iterator Iter = SymbolNames.find(Name);
91 return Iter != SymbolNames.end() ? Iter->second : Empty;
92}
94 LVSymbolNames::iterator Iter = SymbolNames.find(Name);
95 return Iter != SymbolNames.end() ? Iter->second.Address : 0;
96}
98 LVSymbolNames::iterator Iter = SymbolNames.find(Name);
99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex
101}
103 LVSymbolNames::iterator Iter = SymbolNames.find(Name);
104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false;
105}
106
108 OS << "Symbol Table\n";
109 for (LVSymbolNames::reference Entry : SymbolNames) {
110 LVSymbolTableEntry &SymbolName = Entry.second;
111 LVScope *Scope = SymbolName.Scope;
112 LVOffset Offset = Scope ? Scope->getOffset() : 0;
113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5)
114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N")
115 << " Scope: " << hexValue(Offset)
116 << " Address: " << hexValue(SymbolName.Address)
117 << " Name: " << Entry.first << "\n";
118 }
119}
120
122 LVSectionIndex SectionIndex) {
123 SymbolTable.add(Name, Function, SectionIndex);
124}
126 LVSectionIndex SectionIndex,
127 bool IsComdat) {
128 SymbolTable.add(Name, Address, SectionIndex, IsComdat);
129}
133
135 return SymbolTable.getEntry(Name);
136}
138 return SymbolTable.getAddress(Name);
139}
141 return SymbolTable.getIndex(Name);
142}
144 return SymbolTable.getIsComdat(Name);
145}
146
148 for (const object::SectionRef &Section : Obj.sections()) {
149 LLVM_DEBUG({
150 Expected<StringRef> SectionNameOrErr = Section.getName();
152 if (!SectionNameOrErr)
153 consumeError(SectionNameOrErr.takeError());
154 else
155 Name = *SectionNameOrErr;
156 dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", "
157 << "Address: " << hexValue(Section.getAddress()) << ", "
158 << "Size: " << hexValue(Section.getSize()) << ", "
159 << "Name: " << Name << "\n";
160 dbgs() << "isCompressed: " << Section.isCompressed() << ", "
161 << "isText: " << Section.isText() << ", "
162 << "isData: " << Section.isData() << ", "
163 << "isBSS: " << Section.isBSS() << ", "
164 << "isVirtual: " << Section.isVirtual() << "\n";
165 dbgs() << "isBitcode: " << Section.isBitcode() << ", "
166 << "isStripped: " << Section.isStripped() << ", "
167 << "isBerkeleyText: " << Section.isBerkeleyText() << ", "
168 << "isBerkeleyData: " << Section.isBerkeleyData() << ", "
169 << "isDebugSection: " << Section.isDebugSection() << "\n";
170 dbgs() << "\n";
171 });
172
173 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
174 continue;
175
176 // Record section information required for symbol resolution.
177 // Note: The section index returned by 'getIndex()' is one based.
178 Sections.emplace(Section.getIndex(), Section);
179 addSectionAddress(Section);
180
181 // Identify the ".text" section.
182 Expected<StringRef> SectionNameOrErr = Section.getName();
183 if (!SectionNameOrErr) {
184 consumeError(SectionNameOrErr.takeError());
185 continue;
186 }
187 if (*SectionNameOrErr == ".text" || *SectionNameOrErr == "CODE" ||
188 *SectionNameOrErr == ".code") {
189 DotTextSectionIndex = Section.getIndex();
190 // If the object is WebAssembly, update the address offset that
191 // will be added to DWARF DW_AT_* attributes.
192 if (Obj.isWasm())
193 WasmCodeSectionOffset = Section.getAddress();
194 }
195 }
196
197 // Process the symbol table.
198 mapRangeAddress(Obj);
199
200 LLVM_DEBUG({
201 dbgs() << "\nSections Information:\n";
202 for (LVSections::reference Entry : Sections) {
203 LVSectionIndex SectionIndex = Entry.first;
204 const object::SectionRef Section = Entry.second;
205 Expected<StringRef> SectionNameOrErr = Section.getName();
206 if (!SectionNameOrErr)
207 consumeError(SectionNameOrErr.takeError());
208 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
209 << " Name: " << *SectionNameOrErr << "\n"
210 << "Size: " << hexValue(Section.getSize()) << "\n"
211 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
212 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n";
213 }
214 dbgs() << "\nObject Section Information:\n";
215 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
216 dbgs() << "[" << hexValue(Entry.first) << ":"
217 << hexValue(Entry.first + Entry.second.getSize())
218 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
219 });
220}
221
223 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
224 if (ImageBase)
225 ImageBaseAddress = ImageBase.get();
226
227 LLVM_DEBUG({
228 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
229 });
230
232
233 for (const object::SectionRef &Section : COFFObj.sections()) {
234 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
235 continue;
236
237 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
238 VirtualAddress = COFFSection->VirtualAddress;
239 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
240
241 // Record section information required for symbol resolution.
242 // Note: The section index returned by 'getIndex()' is zero based.
243 Sections.emplace(Section.getIndex() + 1, Section);
244 addSectionAddress(Section);
245
246 // Additional initialization on the specific object format.
247 mapRangeAddress(COFFObj, Section, IsComdat);
248 }
249
250 LLVM_DEBUG({
251 dbgs() << "\nSections Information:\n";
252 for (LVSections::reference Entry : Sections) {
253 LVSectionIndex SectionIndex = Entry.first;
254 const object::SectionRef Section = Entry.second;
255 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
256 Expected<StringRef> SectionNameOrErr = Section.getName();
257 if (!SectionNameOrErr)
258 consumeError(SectionNameOrErr.takeError());
259 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
260 << " Name: " << *SectionNameOrErr << "\n"
261 << "Size: " << hexValue(Section.getSize()) << "\n"
262 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
263 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
264 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
265 << "\n"
266 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
267 << "\n";
268 }
269 dbgs() << "\nObject Section Information:\n";
270 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
271 dbgs() << "[" << hexValue(Entry.first) << ":"
272 << hexValue(Entry.first + Entry.second.getSize())
273 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
274 });
275}
276
278 StringRef TheFeatures,
279 StringRef TheCPU) {
280 Triple TheTriple(TripleName);
281 std::string TargetLookupError;
282 const Target *TheTarget =
283 TargetRegistry::lookupTarget(TheTriple, TargetLookupError);
284 if (!TheTarget)
285 return createStringError(errc::invalid_argument, TargetLookupError.c_str());
286
287 // Register information.
288 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple);
289 if (!RegisterInfo)
291 "no register info for target " + TripleName);
292 MRI.reset(RegisterInfo);
293
294 // Assembler properties and features.
295 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions));
296 if (!AsmInfo)
298 "no assembly info for target " + TripleName);
299 MAI.reset(AsmInfo);
300
301 // Target subtargets.
302 MCSubtargetInfo *SubtargetInfo(
303 TheTarget->createMCSubtargetInfo(TheTriple, TheCPU, TheFeatures));
304 if (!SubtargetInfo)
306 "no subtarget info for target " + TripleName);
307 STI.reset(SubtargetInfo);
308
309 // Instructions Info.
310 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo());
311 if (!InstructionInfo)
313 "no instruction info for target " + TripleName);
314 MII.reset(InstructionInfo);
315
316 MC = std::make_unique<MCContext>(Triple(TheTriple), *MAI, *MRI, *STI);
317
318 // Assembler.
319 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC));
320 if (!DisAsm)
322 "no disassembler for target " + TripleName);
323 MD.reset(DisAsm);
324
325 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter(
326 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI));
327 if (!InstructionPrinter)
329 "no target assembly language printer for target " +
330 TripleName);
331 MIP.reset(InstructionPrinter);
332 InstructionPrinter->setPrintImmHex(true);
333
334 return Error::success();
335}
336
339 LVSectionIndex SectionIndex) {
340 // Return the 'text' section with the code for this logical scope.
341 // COFF: SectionIndex is zero. Use 'SectionAddresses' data.
342 // ELF: SectionIndex is the section index in the file.
343 if (SectionIndex) {
344 LVSections::iterator Iter = Sections.find(SectionIndex);
345 if (Iter == Sections.end()) {
347 "invalid section index for: '%s'",
348 Scope->getName().str().c_str());
349 }
350 const object::SectionRef Section = Iter->second;
351 return std::make_pair(Section.getAddress(), Section);
352 }
353
354 // Ensure a valid starting address for the public names.
355 LVSectionAddresses::const_iterator Iter =
356 SectionAddresses.upper_bound(Address);
357 if (Iter == SectionAddresses.begin())
359 "invalid section address for: '%s'",
360 Scope->getName().str().c_str());
361
362 // Get section that contains the code for this function.
363 Iter = SectionAddresses.lower_bound(Address);
364 if (Iter != SectionAddresses.begin())
365 --Iter;
366 return std::make_pair(Iter->first, Iter->second);
367}
368
370 LVSectionIndex SectionIndex,
371 const LVNameInfo &NameInfo) {
372 assert(Scope && "Scope is null.");
373
374 // Skip stripped functions.
375 if (Scope->getIsDiscarded())
376 return Error::success();
377
378 // Find associated address and size for the given function entry point.
379 LVAddress Address = NameInfo.first;
380 uint64_t Size = NameInfo.second;
381
382 LLVM_DEBUG({
383 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '"
384 << Scope->getLinkageName() << "'\n"
385 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
386 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n";
387 });
388
390 getSection(Scope, Address, SectionIndex);
391 if (!SectionOrErr)
392 return SectionOrErr.takeError();
393 const object::SectionRef Section = (*SectionOrErr).second;
394 uint64_t SectionAddress = (*SectionOrErr).first;
395
396 Expected<StringRef> SectionContentsOrErr = Section.getContents();
397 if (!SectionContentsOrErr)
398 return SectionOrErr.takeError();
399
400 // There are cases where the section size is smaller than the [LowPC,HighPC]
401 // range; it causes us to decode invalid addresses. The recorded size in the
402 // logical scope is one less than the real size.
403 LLVM_DEBUG({
404 dbgs() << " Size: " << hexValue(Size)
405 << ", Section Size: " << hexValue(Section.getSize()) << "\n";
406 });
407 Size = std::min(Size + 1, Section.getSize());
408
409 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr);
410 uint64_t Offset = Address - SectionAddress;
411 if (Offset > Bytes.size()) {
412 LLVM_DEBUG({
413 dbgs() << "offset (" << hexValue(Offset) << ") is beyond section size ("
414 << hexValue(Bytes.size()) << "); malformed input?\n";
415 });
416 return createStringError(
418 "Failed to parse instructions; offset beyond section size");
419 }
420 uint8_t const *Begin = Bytes.data() + Offset;
421 uint8_t const *End = Bytes.data() + Offset + Size;
422
423 LLVM_DEBUG({
424 Expected<StringRef> SectionNameOrErr = Section.getName();
425 if (!SectionNameOrErr)
426 consumeError(SectionNameOrErr.takeError());
427 else
428 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " ["
429 << hexValue((uint64_t)Section.getAddress()) << ":"
430 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10)
431 << "] Name: '" << *SectionNameOrErr << "'\n"
432 << "Begin: " << hexValue((uint64_t)Begin)
433 << ", End: " << hexValue((uint64_t)End) << "\n";
434 });
435
436 // Address for first instruction line.
437 LVAddress FirstAddress = Address;
438 auto InstructionsSP = std::make_unique<LVLines>();
439 LVLines &Instructions = *InstructionsSP;
440 DiscoveredLines.emplace_back(std::move(InstructionsSP));
441
442 while (Begin < End) {
444 uint64_t BytesConsumed = 0;
445 SmallVector<char, 64> InsnStr;
448 MD->getInstruction(Instruction, BytesConsumed,
449 ArrayRef<uint8_t>(Begin, End), Address, outs());
450 switch (S) {
452 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; });
453 if (BytesConsumed == 0)
454 // Skip invalid bytes
455 BytesConsumed = 1;
456 break;
458 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
459 [[fallthrough]];
461 std::string Buffer;
462 raw_string_ostream Stream(Buffer);
463 StringRef AnnotationsStr = Annotations.str();
464 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream);
465 LLVM_DEBUG({
466 std::string BufferCodes;
467 raw_string_ostream StreamCodes(BufferCodes);
468 StreamCodes << format_bytes(
469 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16,
470 16);
471 dbgs() << "[" << hexValue((uint64_t)Begin) << "] "
472 << "Size: " << format_decimal(BytesConsumed, 2) << " ("
473 << formatv("{0}",
474 fmt_align(StreamCodes.str(), AlignStyle::Left, 32))
475 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str()
476 << "\n";
477 });
478 // Here we add logical lines to the Instructions. Later on,
479 // the 'processLines()' function will move each created logical line
480 // to its enclosing logical scope, using the debug ranges information
481 // and they will be released when its scope parent is deleted.
482 LVLineAssembler *Line = createLineAssembler();
483 Line->setAddress(Address);
484 Line->setName(StringRef(Stream.str()).trim());
485 Instructions.push_back(Line);
486 break;
487 }
488 }
489 Address += BytesConsumed;
490 Begin += BytesConsumed;
491 }
492
493 LLVM_DEBUG({
494 size_t Index = 0;
495 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
496 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
497 << "Address: " << hexValue(FirstAddress)
498 << formatv(" - Collected instructions lines: {0}\n",
499 Instructions.size());
500 for (const LVLine *Line : Instructions)
501 dbgs() << format_decimal(++Index, 5) << ": "
502 << hexValue(Line->getOffset()) << ", (" << Line->getName()
503 << ")\n";
504 });
505
506 // The scope in the assembler names is linked to its own instructions.
507 ScopeInstructions.add(SectionIndex, Scope, &Instructions);
508 AssemblerMappings.add(SectionIndex, FirstAddress, Scope);
509
510 return Error::success();
511}
512
514 LVSectionIndex SectionIndex) {
515 if (!options().getPrintInstructions())
516 return Error::success();
517
518 LVNameInfo Name = CompileUnit->findPublicName(Function);
519 if (Name.first != LVAddress(UINT64_MAX))
520 return createInstructions(Function, SectionIndex, Name);
521
522 return Error::success();
523}
524
526 if (!options().getPrintInstructions())
527 return Error::success();
528
529 LLVM_DEBUG({
530 size_t Index = 1;
531 dbgs() << "\nPublic Names (Scope):\n";
532 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
533 LVScope *Scope = Name.first;
534 const LVNameInfo &NameInfo = Name.second;
535 LVAddress Address = NameInfo.first;
536 uint64_t Size = NameInfo.second;
537 dbgs() << format_decimal(Index++, 5) << ": "
538 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
539 << hexValue(Address) << ":" << hexValue(Address + Size) << "] "
540 << "Name: '" << Scope->getName() << "' / '"
541 << Scope->getLinkageName() << "'\n";
542 }
543 });
544
545 // For each public name in the current compile unit, create the line
546 // records that represent the executable instructions.
547 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
548 LVScope *Scope = Name.first;
549 // The symbol table extracted from the object file always contains a
550 // non-empty name (linkage name). However, the logical scope does not
551 // guarantee to have a name for the linkage name (main is one case).
552 // For those cases, set the linkage name the same as the name.
553 if (!Scope->getLinkageNameIndex())
554 Scope->setLinkageName(Scope->getName());
555 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName());
556 if (Error Err = createInstructions(Scope, SectionIndex, Name.second))
557 return Err;
558 }
559
560 return Error::success();
561}
562
563// During the traversal of the debug information sections, we created the
564// logical lines representing the disassembled instructions from the text
565// section and the logical lines representing the line records from the
566// debug line section. Using the ranges associated with the logical scopes,
567// we will allocate those logical lines to their logical scopes.
569 LVSectionIndex SectionIndex,
570 LVScope *Function) {
571 assert(DebugLines && "DebugLines is null.");
572
573 // Just return if this compilation unit does not have any line records
574 // and no instruction lines were created.
575 if (DebugLines->empty() && !options().getPrintInstructions())
576 return;
577
578 // Merge the debug lines and instruction lines using their text address;
579 // the logical line representing the debug line record is followed by the
580 // line(s) representing the disassembled instructions, whose addresses are
581 // equal or greater that the line address and less than the address of the
582 // next debug line record.
583 LLVM_DEBUG({
584 size_t Index = 1;
585 size_t PerLine = 4;
586 dbgs() << formatv("\nProcess debug lines: {0}\n", DebugLines->size());
587 for (const LVLine *Line : *DebugLines) {
588 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset())
589 << ", (" << Line->getLineNumber() << ")"
590 << ((Index % PerLine) ? " " : "\n");
591 ++Index;
592 }
593 dbgs() << ((Index % PerLine) ? "\n" : "");
594 });
595
596 bool TraverseLines = true;
597 LVLines::iterator Iter = DebugLines->begin();
598 while (TraverseLines && Iter != DebugLines->end()) {
599 uint64_t DebugAddress = (*Iter)->getAddress();
600
601 // Get the function with an entry point that matches this line and
602 // its associated assembler entries. In the case of COMDAT, the input
603 // 'Function' is not null. Use it to find its address ranges.
604 LVScope *Scope = Function;
605 if (!Function) {
606 Scope = AssemblerMappings.find(SectionIndex, DebugAddress);
607 if (!Scope) {
608 ++Iter;
609 continue;
610 }
611 }
612
613 // Get the associated instructions for the found 'Scope'.
614 LVLines InstructionLines;
615 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope);
616 if (Lines)
617 InstructionLines = std::move(*Lines);
618
619 LLVM_DEBUG({
620 size_t Index = 0;
621 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
622 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
623 << formatv("Process instruction lines: {0}\n",
624 InstructionLines.size());
625 for (const LVLine *Line : InstructionLines)
626 dbgs() << format_decimal(++Index, 5) << ": "
627 << hexValue(Line->getOffset()) << ", (" << Line->getName()
628 << ")\n";
629 });
630
631 // Continue with next debug line if there are not instructions lines.
632 if (InstructionLines.empty()) {
633 ++Iter;
634 continue;
635 }
636
637 for (LVLine *InstructionLine : InstructionLines) {
638 uint64_t InstructionAddress = InstructionLine->getAddress();
639 LLVM_DEBUG({
640 dbgs() << "Instruction address: " << hexValue(InstructionAddress)
641 << "\n";
642 });
643 if (TraverseLines) {
644 while (Iter != DebugLines->end()) {
645 DebugAddress = (*Iter)->getAddress();
646 LLVM_DEBUG({
647 bool IsDebug = (*Iter)->getIsLineDebug();
648 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " ["
649 << hexValue(DebugAddress) << "]";
650 if (IsDebug)
651 dbgs() << formatv(" {0}", (*Iter)->getLineNumber());
652 dbgs() << "\n";
653 });
654 // Instruction address before debug line.
655 if (InstructionAddress < DebugAddress) {
656 LLVM_DEBUG({
657 dbgs() << "Inserted instruction address: "
658 << hexValue(InstructionAddress) << " before line: "
659 << formatv("{0}", (*Iter)->getLineNumber()) << " ["
660 << hexValue(DebugAddress) << "]\n";
661 });
662 Iter = DebugLines->insert(Iter, InstructionLine);
663 // The returned iterator points to the inserted instruction.
664 // Skip it and point to the line acting as reference.
665 ++Iter;
666 break;
667 }
668 ++Iter;
669 }
670 if (Iter == DebugLines->end()) {
671 // We have reached the end of the source lines and the current
672 // instruction line address is greater than the last source line.
673 TraverseLines = false;
674 DebugLines->push_back(InstructionLine);
675 }
676 } else {
677 DebugLines->push_back(InstructionLine);
678 }
679 }
680 }
681
682 LLVM_DEBUG({
683 dbgs() << formatv("Lines after merge: {0}\n", DebugLines->size());
684 size_t Index = 0;
685 for (const LVLine *Line : *DebugLines) {
686 dbgs() << format_decimal(++Index, 5) << ": "
687 << hexValue(Line->getOffset()) << ", ("
688 << ((Line->getIsLineDebug())
689 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
690 : Line->getName())
691 << ")\n";
692 }
693 });
694
695 // If this compilation unit does not have line records, traverse its scopes
696 // and take any collected instruction lines as the working set in order
697 // to move them to their associated scope.
698 if (DebugLines->empty()) {
699 if (const LVScopes *Scopes = CompileUnit->getScopes())
700 for (LVScope *Scope : *Scopes) {
701 LVLines *Lines = ScopeInstructions.find(Scope);
702 if (Lines) {
703
704 LLVM_DEBUG({
705 size_t Index = 0;
706 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
707 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
708 << formatv("Instruction lines: {0}\n", Lines->size());
709 for (const LVLine *Line : *Lines)
710 dbgs() << format_decimal(++Index, 5) << ": "
711 << hexValue(Line->getOffset()) << ", (" << Line->getName()
712 << ")\n";
713 });
714
715 if (Scope->getIsArtificial()) {
716 // Add the instruction lines to their artificial scope.
717 for (LVLine *Line : *Lines)
718 Scope->addElement(Line);
719 } else {
720 DebugLines->append(*Lines);
721 }
722 Lines->clear();
723 }
724 }
725 }
726
727 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
728 ScopesWithRanges->startSearch();
729
730 // Process collected lines.
731 LVScope *Scope;
732 for (LVLine *Line : *DebugLines) {
733 // Using the current line address, get its associated lexical scope and
734 // add the line information to it.
735 Scope = ScopesWithRanges->getEntry(Line->getAddress());
736 if (!Scope) {
737 // If missing scope, use the compile unit.
738 Scope = CompileUnit;
739 LLVM_DEBUG({
740 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", ("
741 << ((Line->getIsLineDebug())
742 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
743 : Line->getName())
744 << ")\n";
745 });
746 }
747
748 // Add line object to scope.
749 Scope->addElement(Line);
750
751 // Report any line zero.
752 if (options().getWarningLines() && Line->getIsLineDebug() &&
753 !Line->getLineNumber())
754 CompileUnit->addLineZero(Line);
755
756 // Some compilers generate ranges in the compile unit; other compilers
757 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global
758 // variables, we need to generate the map ranges for the compile unit.
759 // If we use the ranges stored at the scope level, there are cases where
760 // the address referenced by a symbol location, is not in the enclosing
761 // scope, but in an outer one. By using the ranges stored in the compile
762 // unit, we can catch all those addresses.
763 if (Line->getIsLineDebug())
764 CompileUnit->addMapping(Line, SectionIndex);
765
766 // Resolve any given pattern.
768 }
769
770 ScopesWithRanges->endSearch();
771}
772
774 LVSectionIndex SectionIndex) {
775 assert(DebugLines && "DebugLines is null.");
776 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex))
777 return;
778
779 // If the Compile Unit does not contain comdat functions, use the whole
780 // set of debug lines, as the addresses don't have conflicts.
781 if (!CompileUnit->getHasComdatScopes()) {
782 processLines(DebugLines, SectionIndex, nullptr);
783 return;
784 }
785
786 // Find the indexes for the lines whose address is zero.
787 std::vector<size_t> AddressZero;
789 *DebugLines, [](LVLine *Line) { return !Line->getAddress(); });
790 while (It != std::end(*DebugLines)) {
791 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It));
792 It = std::find_if(std::next(It), std::end(*DebugLines),
793 [](LVLine *Line) { return !Line->getAddress(); });
794 }
795
796 // If the set of debug lines does not contain any line with address zero,
797 // use the whole set. It means we are dealing with an initialization
798 // section from a fully linked binary.
799 if (AddressZero.empty()) {
800 processLines(DebugLines, SectionIndex, nullptr);
801 return;
802 }
803
804 // The Compile unit contains comdat functions. Traverse the collected
805 // debug lines and identify logical groups based on their start and
806 // address. Each group starts with a zero address.
807 // Begin, End, Address, IsDone.
808 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>;
809 std::vector<LVBucket> Buckets;
810
812 size_t Begin = 0;
813 size_t End = 0;
814 size_t Index = 0;
815 for (Index = 0; Index < AddressZero.size() - 1; ++Index) {
816 Begin = AddressZero[Index];
817 End = AddressZero[Index + 1] - 1;
818 Address = (*DebugLines)[End]->getAddress();
819 Buckets.emplace_back(Begin, End, Address, false);
820 }
821
822 // Add the last bucket.
823 if (Index) {
824 Begin = AddressZero[Index];
825 End = DebugLines->size() - 1;
826 Address = (*DebugLines)[End]->getAddress();
827 Buckets.emplace_back(Begin, End, Address, false);
828 }
829
830 LLVM_DEBUG({
831 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n";
832 for (LVBucket &Bucket : Buckets) {
833 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", "
834 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", "
835 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n";
836 }
837 });
838
839 // Traverse the sections and buckets looking for matches on the section
840 // sizes. In the unlikely event of different buckets with the same size
841 // process them in order and mark them as done.
842 LVLines Group;
843 for (LVSections::reference Entry : Sections) {
844 LVSectionIndex SectionIndex = Entry.first;
845 const object::SectionRef Section = Entry.second;
846 uint64_t Size = Section.getSize();
847 LLVM_DEBUG({
848 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3)
849 << " , Section Size: " << hexValue(Section.getSize())
850 << " , Section Address: " << hexValue(Section.getAddress())
851 << "\n";
852 });
853
854 for (LVBucket &Bucket : Buckets) {
855 if (std::get<3>(Bucket))
856 // Already done for previous section.
857 continue;
858 if (Size == std::get<2>(Bucket)) {
859 // We have a match on the section size.
860 Group.clear();
861 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket);
862 LVLines::iterator IterEnd =
863 DebugLines->begin() + std::get<1>(Bucket) + 1;
864 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter)
865 Group.push_back(*Iter);
866 processLines(&Group, SectionIndex, /*Function=*/nullptr);
867 std::get<3>(Bucket) = true;
868 break;
869 }
870 }
871 }
872}
873
874// Traverse the scopes for the given 'Function' looking for any inlined
875// scopes with inlined lines, which are found in 'CUInlineeLines'.
877 LVScope *Function) {
879 std::function<void(LVScope * Parent)> FindInlinedScopes =
880 [&](LVScope *Parent) {
881 if (const LVScopes *Scopes = Parent->getScopes())
882 for (LVScope *Scope : *Scopes) {
883 LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope);
884 if (Iter != CUInlineeLines.end())
885 InlineeIters.push_back(Iter);
886 FindInlinedScopes(Scope);
887 }
888 };
889
890 // Find all inlined scopes for the given 'Function'.
891 FindInlinedScopes(Function);
892 for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
893 LVScope *Scope = InlineeIter->first;
894 addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex);
895
896 // TODO: Convert this into a reference.
897 LVLines *InlineeLines = InlineeIter->second.get();
898 LLVM_DEBUG({
899 dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
900 for (const LVLine *Line : *InlineeLines)
901 dbgs() << "[" << hexValue(Line->getAddress()) << "] "
902 << Line->getLineNumber() << "\n";
903 dbgs() << formatv("Debug lines: {0}\n", CULines.size());
904 for (const LVLine *Line : CULines)
905 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
906 << Line->getLineNumber() << ")\n";
907 ;
908 });
909
910 // The inlined lines must be merged using its address, in order to keep
911 // the real order of the instructions. The inlined lines are mixed with
912 // the other non-inlined lines.
913 if (InlineeLines->size()) {
914 // First address of inlinee code.
915 uint64_t InlineeStart = (InlineeLines->front())->getAddress();
916 LVLines::iterator Iter =
917 llvm::find_if(CULines, [&](LVLine *Item) -> bool {
918 return Item->getAddress() == InlineeStart;
919 });
920 if (Iter != CULines.end()) {
921 // 'Iter' points to the line where the inlined function is called.
922 // Emulate the DW_AT_call_line attribute.
923 Scope->setCallLineNumber((*Iter)->getLineNumber());
924 // Mark the referenced line as the start of the inlined function.
925 // Skip the first line during the insertion, as the address and
926 // line number as the same. Otherwise we have to erase and insert.
927 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
928 ++Iter;
929 CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end());
930 }
931 }
932
933 // Remove this set of lines from the container; each inlined function
934 // creates an unique set of lines. Remove only the created container.
935 CUInlineeLines.erase(InlineeIter);
936 InlineeLines->clear();
937 }
938 LLVM_DEBUG({
939 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
940 dbgs() << formatv("Debug lines: {0}\n", CULines.size());
941 for (const LVLine *Line : CULines)
942 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
943 << Line->getLineNumber() << ")\n";
944 ;
945 });
946}
947
949 OS << "LVBinaryReader\n";
950 LLVM_DEBUG(dbgs() << "PrintReader\n");
951}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_DEBUG(...)
Definition Debug.h:114
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
Annotations lets you mark points and ranges inside source code, for tests:
Definition Annotations.h:53
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const T * data() const
Definition ArrayRef.h:138
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:66
unsigned getAssemblerDialect() const
Definition MCAsmInfo.h:580
Superclass for all disassemblers.
DecodeStatus
Ternary decode status.
This is an instance of a target assembly language printer that converts an MCInst to valid target ass...
void setPrintImmHex(bool Value)
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
Target - Wrapper for Target specific information.
MCRegisterInfo * createMCRegInfo(const Triple &TT) const
Create a MCRegisterInfo implementation.
MCAsmInfo * createMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TheTriple, const MCTargetOptions &Options) const
Create a MCAsmInfo implementation for the specified target triple.
MCDisassembler * createMCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) const
MCInstPrinter * createMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) const
MCSubtargetInfo * createMCSubtargetInfo(const Triple &TheTriple, StringRef CPU, StringRef Features) const
createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
MCInstrInfo * createMCInstrInfo() const
createMCInstrInfo - Create a MCInstrInfo implementation.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
const LVSymbolTableEntry & getSymbolTableEntry(StringRef Name)
LVSectionIndex updateSymbolTable(LVScope *Function)
Expected< std::pair< LVSectionIndex, object::SectionRef > > getSection(LVScope *Scope, LVAddress Address, LVSectionIndex SectionIndex)
std::unique_ptr< MCContext > MC
void includeInlineeLines(LVSectionIndex SectionIndex, LVScope *Function)
std::unique_ptr< const MCInstrInfo > MII
Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures, StringRef TheCPU)
LVAddress getSymbolTableAddress(StringRef Name)
void print(raw_ostream &OS) const
std::unique_ptr< const MCSubtargetInfo > STI
void addToSymbolTable(StringRef Name, LVScope *Function, LVSectionIndex SectionIndex=0)
virtual void mapRangeAddress(const object::ObjectFile &Obj)
void processLines(LVLines *DebugLines, LVSectionIndex SectionIndex)
void mapVirtualAddress(const object::ObjectFile &Obj)
std::unique_ptr< const MCAsmInfo > MAI
LVSectionIndex getSymbolTableIndex(StringRef Name)
bool getSymbolTableIsComdat(StringRef Name)
std::unique_ptr< const MCRegisterInfo > MRI
std::unique_ptr< const MCDisassembler > MD
std::unique_ptr< MCInstPrinter > MIP
uint64_t getAddress() const
Definition LVLine.h:76
void resolvePatternMatch(LVLine *Line)
Definition LVOptions.h:609
LVScope * getEntry(LVAddress Address) const
Definition LVRange.cpp:84
LVRange * getSectionRanges(LVSectionIndex SectionIndex)
Definition LVReader.cpp:208
LVSectionIndex getDotTextSectionIndex() const
Definition LVReader.h:297
LVScopeCompileUnit * CompileUnit
Definition LVReader.h:149
LVSectionIndex DotTextSectionIndex
Definition LVReader.h:152
void addElement(LVElement *Element)
Definition LVScope.cpp:122
const LVScopes * getScopes() const
Definition LVScope.h:210
LVSectionIndex getIndex(StringRef Name)
LVAddress getAddress(StringRef Name)
void add(StringRef Name, LVScope *Function, LVSectionIndex SectionIndex=0)
LVSectionIndex update(LVScope *Function)
const LVSymbolTableEntry & getEntry(StringRef Name)
const coff_section * getCOFFSection(const SectionRef &Section) const
This class is the base class for all object file types.
Definition ObjectFile.h:231
section_iterator_range sections() const
Definition ObjectFile.h:331
This is a value type class that represents a single section in the list of sections in the object fil...
Definition ObjectFile.h:83
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
#define UINT64_MAX
Definition DataTypes.h:77
@ IMAGE_SCN_CNT_CODE
Definition COFF.h:303
@ IMAGE_SCN_LNK_COMDAT
Definition COFF.h:309
FormattedNumber hexValue(uint64_t N, unsigned Width=HEX_WIDTH, bool Upper=false)
Definition LVSupport.h:123
std::pair< LVAddress, uint64_t > LVNameInfo
Definition LVScope.h:30
LVReader & getReader()
Definition LVReader.h:360
uint64_t LVOffset
Definition LVObject.h:39
LVPatterns & patterns()
Definition LVOptions.h:645
constexpr LVSectionIndex UndefinedSectionIndex
Definition LVReader.h:29
SmallVector< LVScope *, 8 > LVScopes
Definition LVObject.h:80
uint64_t LVSectionIndex
Definition LVObject.h:35
SmallVector< LVLine *, 8 > LVLines
Definition LVObject.h:77
uint64_t LVAddress
Definition LVObject.h:36
LVOptions & options()
Definition LVOptions.h:448
This is an optimization pass for GlobalISel generic memory operations.
FormattedNumber format_decimal(int64_t N, unsigned Width)
format_decimal - Output N as a right justified, fixed-width decimal.
Definition Format.h:216
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
@ bad_address
Definition Errc.h:38
@ invalid_argument
Definition Errc.h:56
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
support::detail::AlignAdapter< T > fmt_align(T &&Item, AlignStyle Where, size_t Amount, char Fill=' ')
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1106
FormattedBytes format_bytes(ArrayRef< uint8_t > Bytes, std::optional< uint64_t > FirstByteOffset=std::nullopt, uint32_t NumPerLine=16, uint8_t ByteGroupSize=4, uint32_t IndentLevel=0, bool Upper=false)
Definition Format.h:245
static LLVM_ABI const Target * lookupTarget(const Triple &TheTriple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
support::ulittle32_t VirtualAddress
Definition COFF.h:452