43#define DEBUG_TYPE "memprof"
48template <
class T = u
int64_t>
inline T alignedRead(
const char *
Ptr) {
49 static_assert(std::is_pod<T>::value,
"Not a pod type.");
50 assert(
reinterpret_cast<size_t>(
Ptr) %
sizeof(
T) == 0 &&
"Unaligned Read");
51 return *
reinterpret_cast<const T *
>(
Ptr);
54Error checkBuffer(
const MemoryBuffer &Buffer) {
58 if (Buffer.getBufferSize() == 0)
61 if (Buffer.getBufferSize() <
sizeof(Header)) {
68 const char *Next = Buffer.getBufferStart();
69 while (Next < Buffer.getBufferEnd()) {
70 auto *
H =
reinterpret_cast<const Header *
>(Next);
71 if (
H->Version != MEMPROF_RAW_VERSION) {
75 TotalSize +=
H->TotalSize;
79 if (Buffer.getBufferSize() != TotalSize) {
86 using namespace support;
89 endian::readNext<uint64_t, llvm::endianness::little>(
Ptr);
92 Items.
push_back(*
reinterpret_cast<const SegmentEntry *
>(
93 Ptr +
I *
sizeof(SegmentEntry)));
99readMemInfoBlocks(
const char *
Ptr) {
100 using namespace support;
103 endian::readNext<uint64_t, llvm::endianness::little>(
Ptr);
107 endian::readNext<uint64_t, llvm::endianness::little>(
Ptr);
108 const MemInfoBlock MIB = *
reinterpret_cast<const MemInfoBlock *
>(
Ptr);
111 Ptr +=
sizeof(MemInfoBlock);
117 using namespace support;
120 endian::readNext<uint64_t, llvm::endianness::little>(
Ptr);
125 endian::readNext<uint64_t, llvm::endianness::little>(
Ptr);
127 endian::readNext<uint64_t, llvm::endianness::little>(
Ptr);
129 SmallVector<uint64_t> CallStack;
130 CallStack.reserve(NumPCs);
131 for (
uint64_t J = 0; J < NumPCs; J++) {
133 endian::readNext<uint64_t, llvm::endianness::little>(
Ptr));
136 Items[StackId] = CallStack;
145 for (
const auto &[Id, Stack] :
From) {
146 auto I = To.find(Id);
151 if (Stack !=
I->second)
158Error report(Error
E,
const StringRef
Context) {
163bool isRuntimePath(
const StringRef Path) {
167 return Filename.equals(
"memprof_malloc_linux.cpp") ||
168 Filename.equals(
"memprof_interceptors.cpp") ||
169 Filename.equals(
"memprof_new_delete.cpp");
172std::string getBuildIdString(
const SegmentEntry &Entry) {
174 if (Entry.BuildIdSize == 0)
178 raw_string_ostream
OS(Str);
179 for (
size_t I = 0;
I < Entry.BuildIdSize;
I++) {
189 : IdToFrame(
std::
move(FrameIdMap)),
190 FunctionProfileData(
std::
move(ProfData)) {
195 for (
auto &AS :
Record.AllocSites) {
200 for (
auto &CS :
Record.CallSites) {
202 Record.CallSiteIds.push_back(CSId);
212 if (std::error_code EC = BufferOr.getError())
215 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
216 return create(std::move(Buffer), ProfiledBinary, KeepName);
221 const StringRef ProfiledBinary,
bool KeepName) {
222 if (
Error E = checkBuffer(*Buffer))
223 return report(std::move(
E), Buffer->getBufferIdentifier());
225 if (ProfiledBinary.
empty()) {
227 const std::vector<std::string> BuildIds =
peekBuildIds(Buffer.get());
228 std::string ErrorMessage(
229 R
"(Path to profiled binary is empty, expected binary with one of the following build ids:
231 for (
const auto &Id : BuildIds) {
232 ErrorMessage +=
"\n BuildId: ";
242 return report(BinaryOr.takeError(), ProfiledBinary);
246 std::unique_ptr<RawMemProfReader> Reader(
248 if (
Error E = Reader->initialize(std::move(Buffer))) {
251 return std::move(Reader);
259 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
269 return Magic == MEMPROF_RAW_MAGIC_64;
273 uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
275 const size_t NumAllocSites = KV.second.AllocSites.size();
276 if (NumAllocSites > 0) {
278 NumMibInfo += NumAllocSites;
282 OS <<
"MemprofProfile:\n";
284 OS <<
" Version: " << MEMPROF_RAW_VERSION <<
"\n";
285 OS <<
" NumSegments: " << SegmentInfo.
size() <<
"\n";
286 OS <<
" NumMibInfo: " << NumMibInfo <<
"\n";
287 OS <<
" NumAllocFunctions: " << NumAllocFunctions <<
"\n";
288 OS <<
" NumStackOffsets: " << StackMap.
size() <<
"\n";
290 OS <<
" Segments:\n";
291 for (
const auto &Entry : SegmentInfo) {
293 OS <<
" BuildId: " << getBuildIdString(Entry) <<
"\n";
294 OS <<
" Start: 0x" << llvm::utohexstr(Entry.Start) <<
"\n";
295 OS <<
" End: 0x" << llvm::utohexstr(Entry.End) <<
"\n";
296 OS <<
" Offset: 0x" << llvm::utohexstr(Entry.Offset) <<
"\n";
300 for (
const auto &[GUID,
Record] : *
this) {
302 OS <<
" FunctionGUID: " << GUID <<
"\n";
307Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
308 const StringRef FileName = Binary.getBinary()->getFileName();
310 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
312 return report(make_error<StringError>(
Twine(
"Not an ELF file: "),
320 auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
322 auto PHdrsOr = ElfFile.program_headers();
325 make_error<StringError>(
Twine(
"Could not read program headers: "),
329 int NumExecutableSegments = 0;
330 for (
const auto &Phdr : *PHdrsOr) {
335 if (++NumExecutableSegments > 1) {
337 make_error<StringError>(
338 "Expect only one executable load segment in the binary",
347 PreferredTextSegmentAddress = Phdr.p_vaddr;
348 assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) &&
349 "Expect p_vaddr to always be page aligned");
350 assert(Phdr.p_offset == 0 &&
"Expect p_offset = 0 for symbolization.");
355 auto Triple = ElfObject->makeTriple();
357 return report(make_error<StringError>(Twine(
"Unsupported target: ") +
358 Triple.getArchName(),
363 if (Error
E = readRawProfile(std::move(DataBuffer)))
366 if (Error
E = setupForSymbolization())
369 auto *
Object = cast<object::ObjectFile>(Binary.getBinary());
374 Object, std::move(Context),
false);
376 return report(SOFOr.takeError(), FileName);
377 auto Symbolizer = std::move(SOFOr.get());
383 if (Error
E = symbolizeAndFilterStackFrames(std::move(Symbolizer)))
386 return mapRawProfileToRecords();
389Error RawMemProfReader::setupForSymbolization() {
390 auto *
Object = cast<object::ObjectFile>(Binary.getBinary());
392 if (BinaryId.empty())
393 return make_error<StringError>(Twine(
"No build id found in binary ") +
394 Binary.getBinary()->getFileName(),
398 for (
const auto &Entry : SegmentInfo) {
400 if (BinaryId == SegmentId) {
403 if (++NumMatched > 1) {
404 return make_error<StringError>(
405 "We expect only one executable segment in the profiled binary",
408 ProfiledTextSegmentStart = Entry.Start;
409 ProfiledTextSegmentEnd = Entry.End;
412 assert(NumMatched != 0 &&
"No matching executable segments in segment info.");
413 assert((PreferredTextSegmentAddress == 0 ||
414 (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) &&
415 "Expect text segment address to be 0 or equal to profiled text "
420Error RawMemProfReader::mapRawProfileToRecords() {
426 PerFunctionCallSites;
430 for (
const auto &[StackId, MIB] : CallstackProfileData) {
431 auto It = StackMap.
find(StackId);
432 if (It == StackMap.
end())
433 return make_error<InstrProfError>(
435 "memprof callstack record does not contain id: " + Twine(StackId));
439 Callstack.
reserve(It->getSecond().size());
442 for (
size_t I = 0;
I < Addresses.
size();
I++) {
445 "Address not found in SymbolizedFrame map");
446 const SmallVector<FrameId> &Frames = SymbolizedFrame[
Address];
449 "The last frame should not be inlined");
454 for (
size_t J = 0; J < Frames.size(); J++) {
455 if (
I == 0 && J == 0)
462 PerFunctionCallSites[Guid].
insert(&Frames);
466 Callstack.
append(Frames.begin(), Frames.end());
474 for (
size_t I = 0; ;
I++) {
478 IndexedMemProfRecord &Record =
Result.first->second;
479 Record.AllocSites.emplace_back(Callstack, CSId, MIB);
481 if (!
F.IsInlineFrame)
487 for (
const auto &[Id, Locs] : PerFunctionCallSites) {
491 IndexedMemProfRecord &Record =
Result.first->second;
492 for (LocationPtr Loc : Locs) {
495 Record.CallSites.push_back(*Loc);
496 Record.CallSiteIds.push_back(CSId);
505Error RawMemProfReader::symbolizeAndFilterStackFrames(
506 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) {
508 const DILineInfoSpecifier Specifier(
509 DILineInfoSpecifier::FileLineInfoKind::RawValue,
510 DILineInfoSpecifier::FunctionNameKind::LinkageName);
518 for (
auto &Entry : StackMap) {
519 for (
const uint64_t VAddr : Entry.getSecond()) {
523 if (SymbolizedFrame.count(VAddr) > 0 ||
527 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
528 getModuleOffset(VAddr), Specifier,
false);
530 return DIOr.takeError();
531 DIInliningInfo DI = DIOr.get();
535 isRuntimePath(DI.getFrame(0).FileName)) {
536 AllVAddrsToDiscard.
insert(VAddr);
540 for (
size_t I = 0, NumFrames = DI.getNumberOfFrames();
I < NumFrames;
542 const auto &DIFrame = DI.getFrame(
I);
545 const Frame
F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
552 if (KeepSymbolName) {
553 StringRef CanonicalName =
555 DIFrame.FunctionName);
556 GuidToSymbolName.
insert({Guid, CanonicalName.str()});
561 SymbolizedFrame[VAddr].push_back(Hash);
565 auto &CallStack = Entry.getSecond();
569 if (CallStack.empty())
570 EntriesToErase.
push_back(Entry.getFirst());
574 for (
const uint64_t Id : EntriesToErase) {
576 CallstackProfileData.erase(Id);
579 if (StackMap.empty())
580 return make_error<InstrProfError>(
582 "no entries in callstack map after symbolization");
587std::vector<std::string>
596 std::vector<std::string> BuildIds;
598 while (Next < DataBuffer->getBufferEnd()) {
599 auto *Header =
reinterpret_cast<const memprof::Header *
>(Next);
602 readSegmentEntries(Next + Header->SegmentOffset);
604 for (
const auto &Entry : Entries) {
605 const std::string Id = getBuildIdString(Entry);
608 BuildIds.push_back(Id);
612 Next += Header->TotalSize;
617Error RawMemProfReader::readRawProfile(
618 std::unique_ptr<MemoryBuffer> DataBuffer) {
619 const char *Next = DataBuffer->getBufferStart();
621 while (Next < DataBuffer->getBufferEnd()) {
622 auto *Header =
reinterpret_cast<const memprof::Header *
>(Next);
627 readSegmentEntries(Next + Header->SegmentOffset);
628 if (!SegmentInfo.empty() && SegmentInfo != Entries) {
632 return make_error<InstrProfError>(
634 "memprof raw profile has different segment information");
636 SegmentInfo.assign(Entries.begin(), Entries.end());
641 for (
const auto &[Id, MIB] : readMemInfoBlocks(Next + Header->MIBOffset)) {
642 if (CallstackProfileData.count(Id)) {
643 CallstackProfileData[Id].Merge(MIB);
645 CallstackProfileData[Id] = MIB;
651 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
652 if (StackMap.empty()) {
655 if (mergeStackMap(CSM, StackMap))
656 return make_error<InstrProfError>(
658 "memprof raw profile got different call stack for same id");
661 Next += Header->TotalSize;
667object::SectionedAddress
668RawMemProfReader::getModuleOffset(
const uint64_t VirtualAddress) {
669 if (VirtualAddress > ProfiledTextSegmentStart &&
670 VirtualAddress <= ProfiledTextSegmentEnd) {
676 VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart;
677 return object::SectionedAddress{AdjustedAddress};
682 return object::SectionedAddress{VirtualAddress};
691 auto IdToFrameCallback = [
this](
const FrameId Id) {
693 if (!this->KeepSymbolName)
695 auto Iter = this->GuidToSymbolName.
find(
F.Function);
697 F.SymbolName =
Iter->getSecond();
BlockVerifier::State From
This file declares a library for handling Build IDs and using them to find debug info.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallSet class.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static std::unique_ptr< DWARFContext > create(const object::ObjectFile &Obj, ProcessDebugRelocations RelocAction=ProcessDebugRelocations::Process, const LoadedObjectInfo *L=nullptr, std::string DWPName="", std::function< void(Error)> RecoverableErrorHandler=WithColor::defaultErrorHandler, std::function< void(Error)> WarningHandler=WithColor::defaultWarningHandler, bool ThreadSafe=false)
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
size_t getBufferSize() const
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileOrSTDIN(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, or open stdin if the Filename is "-".
const char * getBufferStart() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
const Frame & idToFrame(const FrameId Id) const
virtual Error readNextRecord(GuidMemProfRecordPair &GuidRecord, std::function< const Frame(const FrameId)> Callback=nullptr)
llvm::DenseMap< FrameId, Frame > IdToFrame
llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord >::iterator Iter
llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > FunctionProfileData
std::pair< GlobalValue::GUID, MemProfRecord > GuidMemProfRecordPair
llvm::DenseMap< CallStackId, llvm::SmallVector< FrameId > > CSIdToCallStack
void printYAML(raw_ostream &OS)
static Expected< std::unique_ptr< RawMemProfReader > > create(const Twine &Path, StringRef ProfiledBinary, bool KeepName=false)
static std::vector< std::string > peekBuildIds(MemoryBuffer *DataBuffer)
virtual Error readNextRecord(GuidMemProfRecordPair &GuidRecord, std::function< const Frame(const FrameId)> Callback) override
static bool hasFormat(const MemoryBuffer &DataBuffer)
This class implements an extremely fast bulk output stream that can only output to a stream.
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static Expected< std::unique_ptr< SymbolizableObjectFile > > create(const object::ObjectFile *Obj, std::unique_ptr< DIContext > DICtx, bool UntagAddresses)
CallStackId hashCallStack(ArrayRef< FrameId > CS)
llvm::DenseMap< uint64_t, llvm::SmallVector< uint64_t > > CallStackMap
void verifyFunctionProfileData(const llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > &FunctionProfileData)
BuildIDRef getBuildID(const ObjectFile *Obj)
Returns the build ID, if any, contained in the given object file.
ArrayRef< uint8_t > BuildIDRef
A reference to a BuildID in binary form.
Expected< std::unique_ptr< Binary > > createBinary(MemoryBufferRef Source, LLVMContext *Context=nullptr, bool InitContent=true)
Create a Binary from Source, autodetecting the file type.
StringRef filename(StringRef path, Style style=Style::native)
Get filename.
This is an optimization pass for GlobalISel generic memory operations.
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Error joinErrors(Error E1, Error E2)
Concatenate errors.
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Implement std::hash so that hash_code can be used in STL containers.
static constexpr const char *const BadString
GlobalValue::GUID Function
static GlobalValue::GUID getGUID(const StringRef FunctionName)