LLVM 22.0.0git
MemProfUse.cpp
Go to the documentation of this file.
1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
25#include "llvm/IR/Module.h"
30#include "llvm/Support/BLAKE3.h"
32#include "llvm/Support/Debug.h"
36#include <map>
37#include <set>
38
39using namespace llvm;
40using namespace llvm::memprof;
41
42#define DEBUG_TYPE "memprof"
43
44namespace llvm {
48} // namespace llvm
49
50// By default disable matching of allocation profiles onto operator new that
51// already explicitly pass a hot/cold hint, since we don't currently
52// override these hints anyway.
54 "memprof-match-hot-cold-new",
56 "Match allocation profiles onto existing hot/cold operator new calls"),
57 cl::Hidden, cl::init(false));
58
59static cl::opt<bool>
60 ClPrintMemProfMatchInfo("memprof-print-match-info",
61 cl::desc("Print matching stats for each allocation "
62 "context in this module's profiles"),
63 cl::Hidden, cl::init(false));
64
65static cl::opt<bool>
66 PrintFunctionGuids("memprof-print-function-guids",
67 cl::desc("Print function GUIDs computed for matching"),
68 cl::Hidden, cl::init(false));
69
70static cl::opt<bool>
71 SalvageStaleProfile("memprof-salvage-stale-profile",
72 cl::desc("Salvage stale MemProf profile"),
73 cl::init(false), cl::Hidden);
74
76 "memprof-attach-calleeguids",
78 "Attach calleeguids as value profile metadata for indirect calls."),
79 cl::init(true), cl::Hidden);
80
82 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
83 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
84
86 "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
87 cl::desc("If true, annotate the static data section prefix"));
88
89// Matching statistics
90STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
91STATISTIC(NumOfMemProfMismatch,
92 "Number of functions having mismatched memory profile hash.");
93STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
94STATISTIC(NumOfMemProfAllocContextProfiles,
95 "Number of alloc contexts in memory profile.");
96STATISTIC(NumOfMemProfCallSiteProfiles,
97 "Number of callsites in memory profile.");
98STATISTIC(NumOfMemProfMatchedAllocContexts,
99 "Number of matched memory profile alloc contexts.");
100STATISTIC(NumOfMemProfMatchedAllocs,
101 "Number of matched memory profile allocs.");
102STATISTIC(NumOfMemProfMatchedCallSites,
103 "Number of matched memory profile callsites.");
104STATISTIC(NumOfMemProfHotGlobalVars,
105 "Number of global vars annotated with 'hot' section prefix.");
106STATISTIC(NumOfMemProfColdGlobalVars,
107 "Number of global vars annotated with 'unlikely' section prefix.");
108STATISTIC(NumOfMemProfUnknownGlobalVars,
109 "Number of global vars with unknown hotness (no section prefix).");
110STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
111 "Number of global vars with user-specified section (not annotated).");
112
114 ArrayRef<uint64_t> InlinedCallStack,
115 LLVMContext &Ctx) {
116 I.setMetadata(LLVMContext::MD_callsite,
117 buildCallstackMetadata(InlinedCallStack, Ctx));
118}
119
121 uint32_t Column) {
124 HashBuilder.add(Function, LineOffset, Column);
126 uint64_t Id;
127 std::memcpy(&Id, Hash.data(), sizeof(Hash));
128 return Id;
129}
130
134
136 return getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
137 AllocInfo->Info.getAllocCount(),
138 AllocInfo->Info.getTotalLifetime());
139}
140
143 uint64_t FullStackId) {
144 SmallVector<uint64_t> StackIds;
145 for (const auto &StackFrame : AllocInfo->CallStack)
146 StackIds.push_back(computeStackId(StackFrame));
148 std::vector<ContextTotalSize> ContextSizeInfo;
150 auto TotalSize = AllocInfo->Info.getTotalSize();
151 assert(TotalSize);
152 assert(FullStackId != 0);
153 ContextSizeInfo.push_back({FullStackId, TotalSize});
154 }
155 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
156 return AllocType;
157}
158
159// Return true if InlinedCallStack, computed from a call instruction's debug
160// info, is a prefix of ProfileCallStack, a list of Frames from profile data
161// (either the allocation data or a callsite).
162static bool
164 ArrayRef<uint64_t> InlinedCallStack) {
165 return ProfileCallStack.size() >= InlinedCallStack.size() &&
166 llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),
167 InlinedCallStack, [](const Frame &F, uint64_t StackId) {
168 return computeStackId(F) == StackId;
169 });
170}
171
172static bool isAllocationWithHotColdVariant(const Function *Callee,
173 const TargetLibraryInfo &TLI) {
174 if (!Callee)
175 return false;
176 LibFunc Func;
177 if (!TLI.getLibFunc(*Callee, Func))
178 return false;
179 switch (Func) {
180 case LibFunc_Znwm:
181 case LibFunc_ZnwmRKSt9nothrow_t:
182 case LibFunc_ZnwmSt11align_val_t:
183 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
184 case LibFunc_Znam:
185 case LibFunc_ZnamRKSt9nothrow_t:
186 case LibFunc_ZnamSt11align_val_t:
187 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
188 case LibFunc_size_returning_new:
189 case LibFunc_size_returning_new_aligned:
190 return true;
191 case LibFunc_Znwm12__hot_cold_t:
192 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
193 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
194 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
195 case LibFunc_Znam12__hot_cold_t:
196 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
197 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
198 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
199 case LibFunc_size_returning_new_hot_cold:
200 case LibFunc_size_returning_new_aligned_hot_cold:
202 default:
203 return false;
204 }
205}
206
208 AnnotationKind Kind) {
210 "Should not handle AnnotationOK here");
211 SmallString<32> Reason;
212 switch (Kind) {
214 ++NumOfMemProfExplicitSectionGlobalVars;
215 Reason.append("explicit section name");
216 break;
218 Reason.append("linker declaration");
219 break;
221 Reason.append("name starts with `llvm.`");
222 break;
223 default:
224 llvm_unreachable("Unexpected annotation kind");
225 }
226 LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
227 << Reason << ".\n");
228}
229
234
237 function_ref<bool(uint64_t)> IsPresentInProfile) {
239
240 auto GetOffset = [](const DILocation *DIL) {
241 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
242 0xffff;
243 };
244
245 for (Function &F : M) {
246 if (F.isDeclaration())
247 continue;
248
249 for (auto &BB : F) {
250 for (auto &I : BB) {
252 continue;
253
254 auto *CB = dyn_cast<CallBase>(&I);
255 auto *CalledFunction = CB->getCalledFunction();
256 // Disregard indirect calls and intrinsics.
257 if (!CalledFunction || CalledFunction->isIntrinsic())
258 continue;
259
260 StringRef CalleeName = CalledFunction->getName();
261 // True if we are calling a heap allocation function that supports
262 // hot/cold variants.
263 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
264 // True for the first iteration below, indicating that we are looking at
265 // a leaf node.
266 bool IsLeaf = true;
267 for (const DILocation *DIL = I.getDebugLoc(); DIL;
268 DIL = DIL->getInlinedAt()) {
269 StringRef CallerName = DIL->getSubprogramLinkageName();
270 assert(!CallerName.empty() &&
271 "Be sure to enable -fdebug-info-for-profiling");
272 uint64_t CallerGUID = memprof::getGUID(CallerName);
273 uint64_t CalleeGUID = memprof::getGUID(CalleeName);
274 // Pretend that we are calling a function with GUID == 0 if we are
275 // in the inline stack leading to a heap allocation function.
276 if (IsAlloc) {
277 if (IsLeaf) {
278 // For leaf nodes, set CalleeGUID to 0 without consulting
279 // IsPresentInProfile.
280 CalleeGUID = 0;
281 } else if (!IsPresentInProfile(CalleeGUID)) {
282 // In addition to the leaf case above, continue to set CalleeGUID
283 // to 0 as long as we don't see CalleeGUID in the profile.
284 CalleeGUID = 0;
285 } else {
286 // Once we encounter a callee that exists in the profile, stop
287 // setting CalleeGUID to 0.
288 IsAlloc = false;
289 }
290 }
291
292 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
293 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
294 CalleeName = CallerName;
295 IsLeaf = false;
296 }
297 }
298 }
299 }
300
301 // Sort each call list by the source location.
302 for (auto &[CallerGUID, CallList] : Calls) {
303 llvm::sort(CallList);
304 CallList.erase(llvm::unique(CallList), CallList.end());
305 }
306
307 return Calls;
308}
309
312 const TargetLibraryInfo &TLI) {
314
316 MemProfReader->getMemProfCallerCalleePairs();
318 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
319 return CallsFromProfile.contains(GUID);
320 });
321
322 // Compute an undrift map for each CallerGUID.
323 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
324 auto It = CallsFromProfile.find(CallerGUID);
325 if (It == CallsFromProfile.end())
326 continue;
327 const auto &ProfileAnchors = It->second;
328
329 LocToLocMap Matchings;
331 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
332 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
333 [[maybe_unused]] bool Inserted =
334 UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;
335
336 // The insertion must succeed because we visit each GUID exactly once.
337 assert(Inserted);
338 }
339
340 return UndriftMaps;
341}
342
343// Given a MemProfRecord, undrift all the source locations present in the
344// record in place.
345static void
347 memprof::MemProfRecord &MemProfRec) {
348 // Undrift a call stack in place.
349 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
350 for (auto &F : CallStack) {
351 auto I = UndriftMaps.find(F.Function);
352 if (I == UndriftMaps.end())
353 continue;
354 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
355 if (J == I->second.end())
356 continue;
357 auto &NewLoc = J->second;
358 F.LineOffset = NewLoc.LineOffset;
359 F.Column = NewLoc.Column;
360 }
361 };
362
363 for (auto &AS : MemProfRec.AllocSites)
364 UndriftCallStack(AS.CallStack);
365
366 for (auto &CS : MemProfRec.CallSites)
367 UndriftCallStack(CS.Frames);
368}
369
370// Helper function to process CalleeGuids and create value profile metadata
372 ArrayRef<GlobalValue::GUID> CalleeGuids) {
373 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
374 return;
375
376 if (I.getMetadata(LLVMContext::MD_prof)) {
377 uint64_t Unused;
378 // TODO: When merging is implemented, increase this to a typical ICP value
379 // (e.g., 3-6) For now, we only need to check if existing data exists, so 1
380 // is sufficient
381 auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
382 /*MaxNumValueData=*/1, Unused);
383 // We don't know how to merge value profile data yet.
384 if (!ExistingVD.empty()) {
385 return;
386 }
387 }
388
390 uint64_t TotalCount = 0;
391
392 for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
393 InstrProfValueData VD;
394 VD.Value = CalleeGUID;
395 // For MemProf, we don't have actual call counts, so we assign
396 // a weight of 1 to each potential target.
397 // TODO: Consider making this weight configurable or increasing it to
398 // improve effectiveness for ICP.
399 VD.Count = 1;
400 VDs.push_back(VD);
401 TotalCount += VD.Count;
402 }
403
404 if (!VDs.empty()) {
405 annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
406 VDs.size());
407 }
408}
409
410static void
412 ArrayRef<uint64_t> InlinedCallStack, LLVMContext &Ctx,
413 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
414 const std::set<const AllocationInfo *> &AllocInfoSet,
415 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
416 &FullStackIdToAllocMatchInfo) {
417 // TODO: Remove this once the profile creation logic deduplicates contexts
418 // that are the same other than the IsInlineFrame bool. Until then, keep the
419 // largest.
420 DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
421 for (auto *AllocInfo : AllocInfoSet) {
422 auto FullStackId = computeFullStackId(AllocInfo->CallStack);
423 auto [It, Inserted] =
424 UniqueFullContextIdAllocInfo.insert({FullStackId, AllocInfo});
425 // If inserted entry, done.
426 if (Inserted)
427 continue;
428 // Keep the larger one, or the noncold one if they are the same size.
429 auto CurSize = It->second->Info.getTotalSize();
430 auto NewSize = AllocInfo->Info.getTotalSize();
431 if ((CurSize > NewSize) ||
432 (CurSize == NewSize &&
434 continue;
435 It->second = AllocInfo;
436 }
437 // We may match this instruction's location list to multiple MIB
438 // contexts. Add them to a Trie specialized for trimming the contexts to
439 // the minimal needed to disambiguate contexts with unique behavior.
440 CallStackTrie AllocTrie(&ORE, MaxColdSize);
441 uint64_t TotalSize = 0;
442 uint64_t TotalColdSize = 0;
443 for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
444 // Check the full inlined call stack against this one.
445 // If we found and thus matched all frames on the call, include
446 // this MIB.
448 InlinedCallStack)) {
449 NumOfMemProfMatchedAllocContexts++;
450 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
451 TotalSize += AllocInfo->Info.getTotalSize();
453 TotalColdSize += AllocInfo->Info.getTotalSize();
454 // Record information about the allocation if match info printing
455 // was requested.
457 assert(FullStackId != 0);
458 FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
459 InlinedCallStack.size())] = {
460 AllocInfo->Info.getTotalSize(), AllocType};
461 }
462 ORE.emit(
463 OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
464 << ore::NV("AllocationCall", CI) << " in function "
465 << ore::NV("Caller", CI->getFunction())
466 << " matched alloc context with alloc type "
468 << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize())
469 << " full context id " << ore::NV("Context", FullStackId)
470 << " frame count " << ore::NV("Frames", InlinedCallStack.size()));
471 }
472 }
473 // If the threshold for the percent of cold bytes is less than 100%,
474 // and not all bytes are cold, see if we should still hint this
475 // allocation as cold without context sensitivity.
476 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
477 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
478 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, "dominant");
479 return;
480 }
481
482 // We might not have matched any to the full inlined call stack.
483 // But if we did, create and attach metadata, or a function attribute if
484 // all contexts have identical profiled behavior.
485 if (!AllocTrie.empty()) {
486 NumOfMemProfMatchedAllocs++;
487 // MemprofMDAttached will be false if a function attribute was
488 // attached.
489 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
490 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
491 if (MemprofMDAttached) {
492 // Add callsite metadata for the instruction's location list so that
493 // it simpler later on to identify which part of the MIB contexts
494 // are from this particular instruction (including during inlining,
495 // when the callsite metadata will be updated appropriately).
496 // FIXME: can this be changed to strip out the matching stack
497 // context ids from the MIB contexts and not add any callsite
498 // metadata here to save space?
499 addCallsiteMetadata(I, InlinedCallStack, Ctx);
500 }
501 }
502}
503
504// Helper struct for maintaining refs to callsite data. As an alternative we
505// could store a pointer to the CallSiteInfo struct but we also need the frame
506// index. Using ArrayRefs instead makes it a little easier to read.
508 // Subset of frames for the corresponding CallSiteInfo.
510 // Potential targets for indirect calls.
512};
513
514static void handleCallSite(Instruction &I, const Function *CalledFunction,
515 ArrayRef<uint64_t> InlinedCallStack,
516 const std::vector<CallSiteEntry> &CallSiteEntries,
517 Module &M,
518 std::set<std::vector<uint64_t>> &MatchedCallSites,
520 auto &Ctx = M.getContext();
521 // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
522 // to support cases where the instuction's inlined frames match multiple call
523 // site entries, which can happen if the profile was collected from a binary
524 // where this instruction was eventually inlined into multiple callers.
526 bool CallsiteMDAdded = false;
527 for (const auto &CallSiteEntry : CallSiteEntries) {
528 // If we found and thus matched all frames on the call, create and
529 // attach call stack metadata.
531 InlinedCallStack)) {
532 NumOfMemProfMatchedCallSites++;
533 // Only need to find one with a matching call stack and add a single
534 // callsite metadata.
535 if (!CallsiteMDAdded) {
536 addCallsiteMetadata(I, InlinedCallStack, Ctx);
537
538 // Accumulate call site matching information upon request.
540 std::vector<uint64_t> CallStack;
541 append_range(CallStack, InlinedCallStack);
542 MatchedCallSites.insert(std::move(CallStack));
543 }
544 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
545 << ore::NV("CallSite", &I) << " in function "
546 << ore::NV("Caller", I.getFunction())
547 << " matched callsite with frame count "
548 << ore::NV("Frames", InlinedCallStack.size()));
549
550 // If this is a direct call, we're done.
551 if (CalledFunction)
552 break;
553 CallsiteMDAdded = true;
554 }
555
556 assert(!CalledFunction && "Didn't expect direct call");
557
558 // Collect Callee GUIDs from all matching CallSiteEntries.
561 }
562 }
563 // Try to attach indirect call metadata if possible.
564 addVPMetadata(M, I, CalleeGuids.getArrayRef());
565}
566
567static void readMemprof(Module &M, Function &F,
569 const TargetLibraryInfo &TLI,
570 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
571 &FullStackIdToAllocMatchInfo,
572 std::set<std::vector<uint64_t>> &MatchedCallSites,
574 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
575 auto &Ctx = M.getContext();
576 // Previously we used getIRPGOFuncName() here. If F is local linkage,
577 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
578 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
579 // contain FileName's prefix. It caused local linkage function can't
580 // find MemProfRecord. So we use getName() now.
581 // 'unique-internal-linkage-names' can make MemProf work better for local
582 // linkage function.
583 auto FuncName = F.getName();
584 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
586 errs() << "MemProf: Function GUID " << FuncGUID << " is " << FuncName
587 << "\n";
588 std::optional<memprof::MemProfRecord> MemProfRec;
589 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
590 if (Err) {
591 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
592 auto Err = IPE.get();
593 bool SkipWarning = false;
594 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
595 << ": ");
597 NumOfMemProfMissing++;
598 SkipWarning = !PGOWarnMissing;
599 LLVM_DEBUG(dbgs() << "unknown function");
600 } else if (Err == instrprof_error::hash_mismatch) {
601 NumOfMemProfMismatch++;
602 SkipWarning =
605 (F.hasComdat() ||
607 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
608 }
609
610 if (SkipWarning)
611 return;
612
613 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
614 Twine(" Hash = ") + std::to_string(FuncGUID))
615 .str();
616
617 Ctx.diagnose(
618 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
619 });
620 return;
621 }
622
623 NumOfMemProfFunc++;
624
625 // If requested, undrfit MemProfRecord so that the source locations in it
626 // match those in the IR.
628 undriftMemProfRecord(UndriftMaps, *MemProfRec);
629
630 // Detect if there are non-zero column numbers in the profile. If not,
631 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
632 // columns in the IR). The profiled binary might have been built with
633 // column numbers disabled, for example.
634 bool ProfileHasColumns = false;
635
636 // Build maps of the location hash to all profile data with that leaf location
637 // (allocation info and the callsites).
638 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
639
640 // For the callsites we need to record slices of the frame array (see comments
641 // below where the map entries are added) along with their CalleeGuids.
642 std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
643 for (auto &AI : MemProfRec->AllocSites) {
644 NumOfMemProfAllocContextProfiles++;
645 // Associate the allocation info with the leaf frame. The later matching
646 // code will match any inlined call sequences in the IR with a longer prefix
647 // of call stack frames.
648 uint64_t StackId = computeStackId(AI.CallStack[0]);
649 LocHashToAllocInfo[StackId].insert(&AI);
650 ProfileHasColumns |= AI.CallStack[0].Column;
651 }
652 for (auto &CS : MemProfRec->CallSites) {
653 NumOfMemProfCallSiteProfiles++;
654 // Need to record all frames from leaf up to and including this function,
655 // as any of these may or may not have been inlined at this point.
656 unsigned Idx = 0;
657 for (auto &StackFrame : CS.Frames) {
658 uint64_t StackId = computeStackId(StackFrame);
659 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
660 ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
661 LocHashToCallSites[StackId].push_back({FrameSlice, CalleeGuids});
662
663 ProfileHasColumns |= StackFrame.Column;
664 // Once we find this function, we can stop recording.
665 if (StackFrame.Function == FuncGUID)
666 break;
667 }
668 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
669 }
670
671 auto GetOffset = [](const DILocation *DIL) {
672 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
673 0xffff;
674 };
675
676 // Now walk the instructions, looking up the associated profile data using
677 // debug locations.
678 for (auto &BB : F) {
679 for (auto &I : BB) {
680 if (I.isDebugOrPseudoInst())
681 continue;
682 // We are only interested in calls (allocation or interior call stack
683 // context calls).
684 auto *CI = dyn_cast<CallBase>(&I);
685 if (!CI)
686 continue;
687 auto *CalledFunction = CI->getCalledFunction();
688 if (CalledFunction && CalledFunction->isIntrinsic())
689 continue;
690 // List of call stack ids computed from the location hashes on debug
691 // locations (leaf to inlined at root).
692 SmallVector<uint64_t, 8> InlinedCallStack;
693 // Was the leaf location found in one of the profile maps?
694 bool LeafFound = false;
695 // If leaf was found in a map, iterators pointing to its location in both
696 // of the maps. It might exist in neither, one, or both (the latter case
697 // can happen because we don't currently have discriminators to
698 // distinguish the case when a single line/col maps to both an allocation
699 // and another callsite).
700 auto AllocInfoIter = LocHashToAllocInfo.end();
701 auto CallSitesIter = LocHashToCallSites.end();
702 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
703 DIL = DIL->getInlinedAt()) {
704 // Use C++ linkage name if possible. Need to compile with
705 // -fdebug-info-for-profiling to get linkage name.
706 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
707 if (Name.empty())
708 Name = DIL->getScope()->getSubprogram()->getName();
709 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
710 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
711 ProfileHasColumns ? DIL->getColumn() : 0);
712 // Check if we have found the profile's leaf frame. If yes, collect
713 // the rest of the call's inlined context starting here. If not, see if
714 // we find a match further up the inlined context (in case the profile
715 // was missing debug frames at the leaf).
716 if (!LeafFound) {
717 AllocInfoIter = LocHashToAllocInfo.find(StackId);
718 CallSitesIter = LocHashToCallSites.find(StackId);
719 if (AllocInfoIter != LocHashToAllocInfo.end() ||
720 CallSitesIter != LocHashToCallSites.end())
721 LeafFound = true;
722 }
723 if (LeafFound)
724 InlinedCallStack.push_back(StackId);
725 }
726 // If leaf not in either of the maps, skip inst.
727 if (!LeafFound)
728 continue;
729
730 // First add !memprof metadata from allocation info, if we found the
731 // instruction's leaf location in that map, and if the rest of the
732 // instruction's locations match the prefix Frame locations on an
733 // allocation context with the same leaf.
734 if (AllocInfoIter != LocHashToAllocInfo.end() &&
735 // Only consider allocations which support hinting.
736 isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
737 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
738 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
739 else if (CallSitesIter != LocHashToCallSites.end())
740 // Otherwise, add callsite metadata. If we reach here then we found the
741 // instruction's leaf location in the callsites map and not the
742 // allocation map.
743 handleCallSite(I, CalledFunction, InlinedCallStack,
744 CallSitesIter->second, M, MatchedCallSites, ORE);
745 }
746 }
747}
748
749MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
751 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
752 if (!FS)
753 this->FS = vfs::getRealFileSystem();
754}
755
757 // Return immediately if the module doesn't contain any function or global
758 // variables.
759 if (M.empty() && M.globals().empty())
760 return PreservedAnalyses::all();
761
762 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
763 auto &Ctx = M.getContext();
764 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
765 if (Error E = ReaderOrErr.takeError()) {
766 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
767 Ctx.diagnose(
768 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
769 });
770 return PreservedAnalyses::all();
771 }
772
773 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
774 std::move(ReaderOrErr.get());
775 if (!MemProfReader) {
776 Ctx.diagnose(DiagnosticInfoPGOProfile(
777 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
778 return PreservedAnalyses::all();
779 }
780
781 if (!MemProfReader->hasMemoryProfile()) {
782 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
783 "Not a memory profile"));
784 return PreservedAnalyses::all();
785 }
786
787 const bool Changed =
788 annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
789
790 // If the module doesn't contain any function, return after we process all
791 // global variables.
792 if (M.empty())
794
795 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
796
797 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
800 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
801
802 // Map from the stack hash and matched frame count of each allocation context
803 // in the function profiles to the total profiled size (bytes) and allocation
804 // type.
805 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
806 FullStackIdToAllocMatchInfo;
807
808 // Set of the matched call sites, each expressed as a sequence of an inline
809 // call stack.
810 std::set<std::vector<uint64_t>> MatchedCallSites;
811
812 uint64_t MaxColdSize = 0;
813 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
814 MaxColdSize = MemProfSum->getMaxColdTotalSize();
815
816 for (auto &F : M) {
817 if (F.isDeclaration())
818 continue;
819
820 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
821 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
822 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
823 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
824 }
825
827 for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
828 auto [Id, Length] = IdLengthPair;
829 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
830 << " context with id " << Id << " has total profiled size "
831 << Info.TotalSize << " is matched with " << Length << " frames\n";
832 }
833
834 for (const auto &CallStack : MatchedCallSites) {
835 errs() << "MemProf callsite match for inline call stack";
836 for (uint64_t StackId : CallStack)
837 errs() << " " << StackId;
838 errs() << "\n";
839 }
840 }
841
843}
844
845bool MemProfUsePass::annotateGlobalVariables(
846 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
847 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
848 return false;
849
850 if (!DataAccessProf) {
851 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
852 M.getContext().diagnose(DiagnosticInfoPGOProfile(
853 MemoryProfileFileName.data(),
854 StringRef("Data access profiles not found in memprof. Ignore "
855 "-memprof-annotate-static-data-prefix."),
856 DS_Warning));
857 return false;
858 }
859 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
860
861 bool Changed = false;
862 // Iterate all global variables in the module and annotate them based on
863 // data access profiles. Note it's up to the linker to decide how to map input
864 // sections to output sections, and one conservative practice is to map
865 // unlikely-prefixed ones to unlikely output section, and map the rest
866 // (hot-prefixed or prefix-less) to the canonical output section.
867 for (GlobalVariable &GVar : M.globals()) {
868 assert(!GVar.getSectionPrefix().has_value() &&
869 "GVar shouldn't have section prefix yet");
870 auto Kind = llvm::memprof::getAnnotationKind(GVar);
873 continue;
874 }
875
876 StringRef Name = GVar.getName();
877 // Skip string literals as their mangled names don't stay stable across
878 // binary releases.
879 // TODO: Track string content hash in the profiles and compute it inside the
880 // compiler to categeorize the hotness string literals.
881 if (Name.starts_with(".str")) {
882 LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
883 continue;
884 }
885
886 // DataAccessProfRecord's get* methods will canonicalize the name under the
887 // hood before looking it up, so optimizer doesn't need to do it.
888 std::optional<DataAccessProfRecord> Record =
889 DataAccessProf->getProfileRecord(Name);
890 // Annotate a global variable as hot if it has non-zero sampled count, and
891 // annotate it as cold if it's seen in the profiled binary
892 // file but doesn't have any access sample.
893 // For logging, optimization remark emitter requires a llvm::Function, but
894 // it's not well defined how to associate a global variable with a function.
895 // So we just print out the static data section prefix in LLVM_DEBUG.
896 if (Record && Record->AccessCount > 0) {
897 ++NumOfMemProfHotGlobalVars;
898 Changed |= GVar.setSectionPrefix("hot");
899 LLVM_DEBUG(dbgs() << "Global variable " << Name
900 << " is annotated as hot\n");
901 } else if (DataAccessProf->isKnownColdSymbol(Name)) {
902 ++NumOfMemProfColdGlobalVars;
903 Changed |= GVar.setSectionPrefix("unlikely");
904 Changed = true;
905 LLVM_DEBUG(dbgs() << "Global variable " << Name
906 << " is annotated as unlikely\n");
907 } else {
908 ++NumOfMemProfUnknownGlobalVars;
909 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
910 }
911 }
912
913 return Changed;
914}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, AnnotationKind Kind)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::vector< CallSiteEntry > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites, OptimizationRemarkEmitter &ORE)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > PrintFunctionGuids("memprof-print-function-guids", cl::desc("Print function GUIDs computed for matching"), cl::Hidden, cl::init(false))
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
AllocType
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:219
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition HashBuilder.h:64
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
instrprof_error get() const
Definition InstrProf.h:464
std::string message() const override
Return the error message as a string.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
@ Warning
Emits a warning if two values disagree.
Definition Module.h:124
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition MemProfUse.h:65
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
AnnotationKind getAnnotationKind(const GlobalVariable &GV)
Returns the annotation kind of the global variable GV.
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition MemProf.cpp:344
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
@ Length
Definition DWP.cpp:532
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition BLAKE3.h:35
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2088
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2100
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
cl::opt< bool > NoPGOWarnMismatchComdatWeak
uint64_t TotalSize
AllocationType AllocType
ArrayRef< GlobalValue::GUID > CalleeGuids
ArrayRef< Frame > Frames
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition MemProf.h:245
uint32_t LineOffset
Definition MemProf.h:250
llvm::SmallVector< CallSiteInfo > CallSites
Definition MemProf.h:522
llvm::SmallVector< AllocationInfo > AllocSites
Definition MemProf.h:520