41#define DEBUG_TYPE "memprof"
53 "memprof-match-hot-cold-new",
55 "Match allocation profiles onto existing hot/cold operator new calls"),
60 cl::desc(
"Print matching stats for each allocation "
61 "context in this module's profiles"),
66 cl::desc(
"Salvage stale MemProf profile"),
70 "memprof-attach-calleeguids",
72 "Attach calleeguids as value profile metadata for indirect calls."),
77 cl::desc(
"Min percent of cold bytes matched to hint allocation cold"));
81 cl::desc(
"If true, annotate the static data section prefix"));
84STATISTIC(NumOfMemProfMissing,
"Number of functions without memory profile.");
86 "Number of functions having mismatched memory profile hash.");
87STATISTIC(NumOfMemProfFunc,
"Number of functions having valid memory profile.");
89 "Number of alloc contexts in memory profile.");
91 "Number of callsites in memory profile.");
93 "Number of matched memory profile alloc contexts.");
95 "Number of matched memory profile allocs.");
97 "Number of matched memory profile callsites.");
99 "Number of global vars annotated with 'hot' section prefix.");
101 "Number of global vars annotated with 'unlikely' section prefix.");
103 "Number of global vars with unknown hotness (no section prefix).");
105 "Number of global vars with user-specified section (not annotated).");
110 I.setMetadata(LLVMContext::MD_callsite,
121 std::memcpy(&Id, Hash.data(),
sizeof(Hash));
133 for (
const auto &StackFrame :
AllocInfo->CallStack)
138 std::vector<ContextTotalSize> ContextSizeInfo;
140 auto TotalSize =
AllocInfo->Info.getTotalSize();
143 ContextSizeInfo.push_back({FullStackId, TotalSize});
155 return ProfileCallStack.
size() >= InlinedCallStack.
size() &&
158 return computeStackId(F) == StackId;
171 case LibFunc_ZnwmRKSt9nothrow_t:
172 case LibFunc_ZnwmSt11align_val_t:
173 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
175 case LibFunc_ZnamRKSt9nothrow_t:
176 case LibFunc_ZnamSt11align_val_t:
177 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
178 case LibFunc_size_returning_new:
179 case LibFunc_size_returning_new_aligned:
181 case LibFunc_Znwm12__hot_cold_t:
182 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
183 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
184 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
185 case LibFunc_Znam12__hot_cold_t:
186 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
187 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
188 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
189 case LibFunc_size_returning_new_hot_cold:
190 case LibFunc_size_returning_new_aligned_hot_cold:
208 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
213 if (
F.isDeclaration())
222 auto *CalledFunction = CB->getCalledFunction();
224 if (!CalledFunction || CalledFunction->isIntrinsic())
227 StringRef CalleeName = CalledFunction->getName();
235 DIL = DIL->getInlinedAt()) {
236 StringRef CallerName = DIL->getSubprogramLinkageName();
238 "Be sure to enable -fdebug-info-for-profiling");
248 }
else if (!IsPresentInProfile(CalleeGUID)) {
260 Calls[CallerGUID].emplace_back(
Loc, CalleeGUID);
261 CalleeName = CallerName;
269 for (
auto &[CallerGUID, CallList] : Calls) {
286 return CallsFromProfile.
contains(GUID);
290 for (
const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
291 auto It = CallsFromProfile.
find(CallerGUID);
292 if (It == CallsFromProfile.
end())
294 const auto &ProfileAnchors = It->second;
298 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
300 [[maybe_unused]]
bool Inserted =
301 UndriftMaps.
try_emplace(CallerGUID, std::move(Matchings)).second;
316 auto UndriftCallStack = [&](std::vector<Frame> &
CallStack) {
318 auto I = UndriftMaps.
find(
F.Function);
319 if (
I == UndriftMaps.
end())
322 if (J ==
I->second.end())
324 auto &NewLoc = J->second;
325 F.LineOffset = NewLoc.LineOffset;
326 F.Column = NewLoc.Column;
331 UndriftCallStack(AS.CallStack);
334 UndriftCallStack(CS.Frames);
343 if (
I.getMetadata(LLVMContext::MD_prof)) {
351 if (!ExistingVD.empty()) {
360 InstrProfValueData VD;
361 VD.Value = CalleeGUID;
368 TotalCount += VD.Count;
381 const std::set<const AllocationInfo *> &AllocInfoSet,
383 &FullStackIdToAllocMatchInfo) {
396 NumOfMemProfMatchedAllocContexts++;
401 TotalSize +=
AllocInfo->Info.getTotalSize();
403 TotalColdSize +=
AllocInfo->Info.getTotalSize();
408 FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
409 InlinedCallStack.
size())] = {
426 if (!AllocTrie.
empty()) {
427 NumOfMemProfMatchedAllocs++;
431 assert(MemprofMDAttached ==
I.hasMetadata(LLVMContext::MD_memprof));
432 if (MemprofMDAttached) {
473 const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
474 Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites) {
475 auto &Ctx = M.getContext();
481 NumOfMemProfMatchedCallSites++;
495 MatchedCallSites.insert(std::move(
CallStack));
506 &FullStackIdToAllocMatchInfo,
507 std::set<std::vector<uint64_t>> &MatchedCallSites,
510 auto &Ctx = M.getContext();
518 auto FuncName =
F.getName();
520 std::optional<memprof::MemProfRecord> MemProfRec;
521 auto Err =
MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
524 auto Err = IPE.
get();
525 bool SkipWarning =
false;
526 LLVM_DEBUG(
dbgs() <<
"Error in reading profile for Func " << FuncName
529 NumOfMemProfMissing++;
533 NumOfMemProfMismatch++;
539 LLVM_DEBUG(
dbgs() <<
"hash mismatch (skip=" << SkipWarning <<
")");
545 std::string Msg = (IPE.
message() +
Twine(
" ") +
F.getName().str() +
546 Twine(
" Hash = ") + std::to_string(FuncGUID))
566 bool ProfileHasColumns =
false;
570 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
574 std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
576 for (
auto &AI : MemProfRec->AllocSites) {
577 NumOfMemProfAllocContextProfiles++;
582 LocHashToAllocInfo[StackId].insert(&AI);
583 ProfileHasColumns |= AI.CallStack[0].Column;
585 for (
auto &CS : MemProfRec->CallSites) {
586 NumOfMemProfCallSiteProfiles++;
590 for (
auto &StackFrame : CS.Frames) {
594 LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
596 ProfileHasColumns |= StackFrame.Column;
598 if (StackFrame.Function == FuncGUID)
601 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
605 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
613 if (
I.isDebugOrPseudoInst())
620 auto *CalledFunction = CI->getCalledFunction();
621 if (CalledFunction && CalledFunction->isIntrinsic())
627 bool LeafFound =
false;
633 auto AllocInfoIter = LocHashToAllocInfo.
end();
634 auto CallSitesIter = LocHashToCallSites.end();
635 for (
const DILocation *DIL =
I.getDebugLoc(); DIL !=
nullptr;
636 DIL = DIL->getInlinedAt()) {
639 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
641 Name = DIL->getScope()->getSubprogram()->getName();
644 ProfileHasColumns ? DIL->getColumn() : 0);
650 AllocInfoIter = LocHashToAllocInfo.find(StackId);
651 CallSitesIter = LocHashToCallSites.find(StackId);
652 if (AllocInfoIter != LocHashToAllocInfo.end() ||
653 CallSitesIter != LocHashToCallSites.end())
667 if (AllocInfoIter != LocHashToAllocInfo.end() &&
671 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
672 else if (CallSitesIter != LocHashToCallSites.end())
677 CallSitesIter->second, M, MatchedCallSites);
684 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
692 if (M.empty() && M.globals().empty())
696 auto &Ctx = M.getContext();
698 if (
Error E = ReaderOrErr.takeError()) {
707 std::move(ReaderOrErr.get());
710 MemoryProfileFileName.data(),
StringRef(
"Cannot get MemProfReader")));
716 "Not a memory profile"));
721 annotateGlobalVariables(M,
MemProfReader->getDataAccessProfileData());
739 FullStackIdToAllocMatchInfo;
743 std::set<std::vector<uint64_t>> MatchedCallSites;
747 MaxColdSize = MemProfSum->getMaxColdTotalSize();
750 if (
F.isDeclaration())
756 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
760 for (
const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
761 auto [Id,
Length] = IdLengthPair;
763 <<
" context with id " << Id <<
" has total profiled size "
764 << Info.TotalSize <<
" is matched with " <<
Length <<
" frames\n";
767 for (
const auto &
CallStack : MatchedCallSites) {
768 errs() <<
"MemProf callsite match for inline call stack";
770 errs() <<
" " << StackId;
788 if (Attrs.hasAttribute(
"bss-section") || Attrs.hasAttribute(
"data-section") ||
789 Attrs.hasAttribute(
"relro-section") ||
790 Attrs.hasAttribute(
"rodata-section"))
795bool MemProfUsePass::annotateGlobalVariables(
800 if (!DataAccessProf) {
801 M.getContext().diagnose(DiagnosticInfoPGOProfile(
802 MemoryProfileFileName.data(),
803 StringRef(
"Data access profiles not found in memprof. Ignore "
804 "-memprof-annotate-static-data-prefix."),
815 for (GlobalVariable &GVar :
M.globals()) {
816 assert(!GVar.getSectionPrefix().has_value() &&
817 "GVar shouldn't have section prefix yet");
818 if (GVar.isDeclarationForLinker())
822 ++NumOfMemProfExplicitSectionGlobalVars;
824 <<
" has explicit section name. Skip annotating.\n");
828 StringRef
Name = GVar.getName();
833 if (
Name.starts_with(
".str")) {
835 LLVM_DEBUG(
dbgs() <<
"Skip annotating string literal " << Name <<
"\n");
841 std::optional<DataAccessProfRecord> Record =
849 if (Record && Record->AccessCount > 0) {
850 ++NumOfMemProfHotGlobalVars;
851 GVar.setSectionPrefix(
"hot");
854 <<
" is annotated as hot\n");
856 ++NumOfMemProfColdGlobalVars;
857 GVar.setSectionPrefix(
"unlikely");
860 <<
" is annotated as unlikely\n");
862 ++NumOfMemProfUnknownGlobalVars;
863 LLVM_DEBUG(
dbgs() <<
"Global variable " << Name <<
" is not annotated\n");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Module.h This file contains the declarations for the Module class.
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::unordered_set< CallSiteEntry, CallSiteEntryHash > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites)
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static bool hasExplicitSectionName(const GlobalVariable &GVar)
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Diagnostic information for the PGO profiler.
Base class for error info classes.
virtual std::string message() const
Return the error message as a string.
Lightweight error class with error context and mandatory checking.
bool hasSection() const
Check if this global has a custom object file section.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
@ AvailableExternallyLinkage
Available for inspection, not emission.
AttributeSet getAttributes() const
Return the attribute set for this global.
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
instrprof_error get() const
std::string message() const override
Return the error message as a string.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > NoPGOWarnMismatchComdatWeak
size_t operator()(const CallSiteEntry &Entry) const
ArrayRef< GlobalValue::GUID > CalleeGuids
bool operator==(const CallSiteEntry &Other) const
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
llvm::SmallVector< CallSiteInfo > CallSites
llvm::SmallVector< AllocationInfo > AllocSites