19using namespace sampleprof;
21#define DEBUG_TYPE "sample-profile-matcher"
27void SampleProfileMatcher::findIRAnchors(
28 const Function &
F, std::map<LineLocation, StringRef> &IRAnchors) {
32 auto FindTopLevelInlinedCallsite = [](
const DILocation *DIL) {
33 assert((DIL && DIL->getInlinedAt()) &&
"No inlined callsite");
37 DIL = DIL->getInlinedAt();
38 }
while (DIL->getInlinedAt());
42 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
43 return std::make_pair(Callsite, CalleeName);
46 auto GetCanonicalCalleeName = [](
const CallBase *CB) {
47 StringRef CalleeName = UnknownIndirectCallee;
48 if (
Function *Callee = CB->getCalledFunction())
63 if (DIL->getInlinedAt()) {
64 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
68 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
70 if (!isa<IntrinsicInst>(&
I))
71 CalleeName = GetCanonicalCalleeName(CB);
73 IRAnchors.emplace(
LineLocation(Probe->Id, 0), CalleeName);
80 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
83 if (DIL->getInlinedAt()) {
84 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
88 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&
I));
89 IRAnchors.emplace(Callsite, CalleeName);
96void SampleProfileMatcher::findProfileAnchors(
98 std::map<
LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
99 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
100 return LineOffset & 0x8000;
103 for (
const auto &
I :
FS.getBodySamples()) {
107 for (
const auto &
I :
I.second.getCallTargets()) {
109 ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
110 Ret.first->second.insert(
I.first);
114 for (
const auto &
I :
FS.getCallsiteSamples()) {
118 const auto &CalleeMap =
I.second;
119 for (
const auto &
I : CalleeMap) {
121 ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
122 Ret.first->second.insert(
I.first);
144void SampleProfileMatcher::runStaleProfileMatching(
145 const Function &
F,
const std::map<LineLocation, StringRef> &IRAnchors,
146 const std::map<
LineLocation, std::unordered_set<FunctionId>>
151 assert(IRToProfileLocationMap.empty() &&
152 "Run stale profile matching only once per function");
154 std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap;
155 for (
const auto &
I : ProfileAnchors) {
156 const auto &Loc =
I.first;
157 const auto &Callees =
I.second;
159 if (Callees.size() == 1) {
161 const auto &Candidates = CalleeToCallsitesMap.try_emplace(
162 CalleeName, std::set<LineLocation>());
163 Candidates.first->second.insert(Loc);
170 IRToProfileLocationMap.insert({
From, To});
174 int32_t LocationDelta = 0;
177 for (
const auto &
IR : IRAnchors) {
178 const auto &Loc =
IR.first;
179 auto CalleeName =
IR.second;
180 bool IsMatchedAnchor =
false;
182 if (!CalleeName.
empty()) {
183 auto CandidateAnchors =
185 if (CandidateAnchors != CalleeToCallsitesMap.end() &&
186 !CandidateAnchors->second.empty()) {
187 auto CI = CandidateAnchors->second.begin();
188 const auto Candidate = *CI;
189 CandidateAnchors->second.erase(CI);
190 InsertMatching(Loc, Candidate);
192 <<
" is matched from " << Loc <<
" to " << Candidate
194 LocationDelta = Candidate.LineOffset - Loc.
LineOffset;
200 for (
size_t I = (LastMatchedNonAnchors.
size() + 1) / 2;
201 I < LastMatchedNonAnchors.
size();
I++) {
202 const auto &
L = LastMatchedNonAnchors[
I];
203 uint32_t CandidateLineOffset =
L.LineOffset + LocationDelta;
204 LineLocation Candidate(CandidateLineOffset,
L.Discriminator);
205 InsertMatching(L, Candidate);
207 <<
" to " << Candidate <<
"\n");
210 IsMatchedAnchor =
true;
211 LastMatchedNonAnchors.
clear();
216 if (!IsMatchedAnchor) {
219 InsertMatching(Loc, Candidate);
221 << Candidate <<
"\n");
227void SampleProfileMatcher::runOnFunction(
Function &
F) {
234 const auto *FSFlattened = getFlattenedSamplesFor(
F);
241 std::map<LineLocation, StringRef> IRAnchors;
242 findIRAnchors(
F, IRAnchors);
245 std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
246 findProfileAnchors(*FSFlattened, ProfileAnchors);
250 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
nullptr);
262 F.addFnAttr(
"profile-checksum-mismatch");
266 auto &IRToProfileLocationMap = getIRToProfileLocationMap(
F);
267 runStaleProfileMatching(
F, IRAnchors, ProfileAnchors,
268 IRToProfileLocationMap);
271 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
272 &IRToProfileLocationMap);
276void SampleProfileMatcher::recordCallsiteMatchStates(
277 const Function &
F,
const std::map<LineLocation, StringRef> &IRAnchors,
278 const std::map<
LineLocation, std::unordered_set<FunctionId>>
281 bool IsPostMatch = IRToProfileLocationMap !=
nullptr;
282 auto &CallsiteMatchStates =
285 auto MapIRLocToProfileLoc = [&](
const LineLocation &IRLoc) {
287 if (!IRToProfileLocationMap)
289 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
290 if (ProfileLoc != IRToProfileLocationMap->end())
291 return ProfileLoc->second;
296 for (
const auto &
I : IRAnchors) {
299 const auto &ProfileLoc = MapIRLocToProfileLoc(
I.first);
300 const auto &IRCalleeName =
I.second;
301 const auto &It = ProfileAnchors.find(ProfileLoc);
302 if (It == ProfileAnchors.end())
304 const auto &Callees = It->second;
306 bool IsCallsiteMatched =
false;
311 if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
312 IsCallsiteMatched =
true;
313 else if (Callees.size() == 1 && Callees.count(
getRepInFormat(IRCalleeName)))
314 IsCallsiteMatched =
true;
316 if (IsCallsiteMatched) {
317 auto It = CallsiteMatchStates.find(ProfileLoc);
318 if (It == CallsiteMatchStates.end())
319 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
320 else if (IsPostMatch) {
321 if (It->second == MatchState::InitialMatch)
322 It->second = MatchState::UnchangedMatch;
323 else if (It->second == MatchState::InitialMismatch)
324 It->second = MatchState::RecoveredMismatch;
331 for (
const auto &
I : ProfileAnchors) {
332 const auto &Loc =
I.first;
333 [[maybe_unused]]
const auto &Callees =
I.second;
334 assert(!Callees.empty() &&
"Callees should not be empty");
335 auto It = CallsiteMatchStates.find(Loc);
336 if (It == CallsiteMatchStates.end())
337 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
338 else if (IsPostMatch) {
341 if (It->second == MatchState::InitialMismatch)
342 It->second = MatchState::UnchangedMismatch;
343 else if (It->second == MatchState::InitialMatch)
344 It->second = MatchState::RemovedMatch;
349void SampleProfileMatcher::countMismatchedFuncSamples(
const FunctionSamples &FS,
351 const auto *FuncDesc = ProbeManager->
getDesc(
FS.getGUID());
358 NumStaleProfileFunc++;
363 MismatchedFunctionSamples +=
FS.getTotalSamples();
372 for (
const auto &
I :
FS.getCallsiteSamples())
373 for (
const auto &CS :
I.second)
374 countMismatchedFuncSamples(CS.second,
false);
377void SampleProfileMatcher::countMismatchedCallsiteSamples(
379 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
381 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
383 const auto &CallsiteMatchStates = It->second;
386 auto It = CallsiteMatchStates.find(Loc);
387 if (It == CallsiteMatchStates.end())
388 return MatchState::Unknown;
392 auto AttributeMismatchedSamples = [&](
const enum MatchState &State,
394 if (isMismatchState(State))
395 MismatchedCallsiteSamples += Samples;
396 else if (State == MatchState::RecoveredMismatch)
397 RecoveredCallsiteSamples += Samples;
402 for (
const auto &
I :
FS.getBodySamples())
403 AttributeMismatchedSamples(findMatchState(
I.first),
I.second.getSamples());
406 for (
const auto &
I :
FS.getCallsiteSamples()) {
407 auto State = findMatchState(
I.first);
409 for (
const auto &CS :
I.second)
410 CallsiteSamples += CS.second.getTotalSamples();
411 AttributeMismatchedSamples(State, CallsiteSamples);
413 if (isMismatchState(State))
419 for (
const auto &CS :
I.second)
420 countMismatchedCallsiteSamples(CS.second);
424void SampleProfileMatcher::countMismatchCallsites(
const FunctionSamples &FS) {
425 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
427 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
429 const auto &MatchStates = It->second;
430 [[maybe_unused]]
bool OnInitialState =
431 isInitialState(MatchStates.begin()->second);
432 for (
const auto &
I : MatchStates) {
433 TotalProfiledCallsites++;
435 (OnInitialState ? isInitialState(
I.second) : isFinalState(
I.second)) &&
436 "Profile matching state is inconsistent");
438 if (isMismatchState(
I.second))
439 NumMismatchedCallsites++;
440 else if (
I.second == MatchState::RecoveredMismatch)
441 NumRecoveredCallsites++;
445void SampleProfileMatcher::computeAndReportProfileStaleness() {
450 for (
const auto &
F : M) {
461 TotalFunctionSamples +=
FS->getTotalSamples();
465 countMismatchedFuncSamples(*FS,
true);
468 countMismatchCallsites(*FS);
469 countMismatchedCallsiteSamples(*FS);
474 errs() <<
"(" << NumStaleProfileFunc <<
"/" << TotalProfiledFunc
475 <<
") of functions' profile are invalid and ("
476 << MismatchedFunctionSamples <<
"/" << TotalFunctionSamples
477 <<
") of samples are discarded due to function hash mismatch.\n";
479 errs() <<
"(" << (NumMismatchedCallsites + NumRecoveredCallsites) <<
"/"
480 << TotalProfiledCallsites
481 <<
") of callsites' profile are invalid and ("
482 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) <<
"/"
483 << TotalFunctionSamples
484 <<
") of samples are discarded due to callsite location mismatch.\n";
485 errs() <<
"(" << NumRecoveredCallsites <<
"/"
486 << (NumRecoveredCallsites + NumMismatchedCallsites)
487 <<
") of callsites and (" << RecoveredCallsiteSamples <<
"/"
488 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
489 <<
") of samples are recovered by stale profile matching.\n";
498 ProfStatsVec.
emplace_back(
"NumStaleProfileFunc", NumStaleProfileFunc);
499 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
501 MismatchedFunctionSamples);
502 ProfStatsVec.
emplace_back(
"TotalFunctionSamples", TotalFunctionSamples);
505 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
506 ProfStatsVec.
emplace_back(
"NumRecoveredCallsites", NumRecoveredCallsites);
507 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
509 MismatchedCallsiteSamples);
511 RecoveredCallsiteSamples);
513 auto *MD = MDB.createLLVMStats(ProfStatsVec);
514 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
528 distributeIRToProfileLocationMap();
530 computeAndReportProfileStaleness();
533void SampleProfileMatcher::distributeIRToProfileLocationMap(
535 const auto ProfileMappings = FuncMappings.
find(FS.getFuncName());
536 if (ProfileMappings != FuncMappings.
end()) {
537 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
542 for (
auto &FS : Callees.second) {
543 distributeIRToProfileLocationMap(FS.second);
550void SampleProfileMatcher::distributeIRToProfileLocationMap() {
552 distributeIRToProfileLocationMap(
I.second);
BlockVerifier::State From
Legalize the Machine IR a function s Machine IR
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
cl::opt< bool > ReportProfileStaleness
cl::opt< bool > SalvageStaleProfile
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
This is an important class for using LLVM in a threaded context.
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator find(StringRef Key)
StringRef - Represent a constant reference to a string, i.e.
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
This is an optimization pass for GlobalISel generic memory operations.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.