89#include <system_error>
94using namespace sampleprof;
97#define DEBUG_TYPE "sample-profile"
98#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
101 "Number of functions inlined with context sensitive profile");
103 "Number of functions not inlined with context sensitive profile");
105 "Number of functions with CFG mismatched profile");
106STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
108 "Number of inlined callsites with a partial distribution factor");
111 "Number of functions with FDO inline stopped due to min size limit");
113 "Number of functions with FDO inline stopped due to max size limit");
115 NumCSInlinedHitGrowthLimit,
116 "Number of functions with FDO inline stopped due to growth size limit");
133 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
134 "location for sample profile query."));
138 cl::desc(
"Compute and report stale profile statistical metrics."));
142 cl::desc(
"Compute stale profile statistical metrics and write it into the "
143 "native object file(.llvm_stats section)."));
147 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
148 "callsite and function as having 0 samples. Otherwise, treat "
149 "un-sampled callsites and functions conservatively as unknown. "));
153 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
154 "branches and calls as having 0 samples. Otherwise, treat "
155 "them conservatively as unknown. "));
159 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
160 "be accurate. It may be overriden by profile-sample-accurate. "));
164 cl::desc(
"Merge past inlinee's profile to outline version if sample "
165 "profile loader decided not to inline a call site. It will "
166 "only be enabled when top-down order of profile loading is "
171 cl::desc(
"Do profile annotation and inlining for functions in top-down "
172 "order of call graph during sample profile loading. It only "
173 "works for new pass manager. "));
177 cl::desc(
"Process functions in a top-down order "
178 "defined by the profiled call graph when "
179 "-sample-profile-top-down-load is on."));
183 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
191 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
192 "pass, and merge (or scale) profiles (as configured by "
193 "--sample-profile-merge-inlinee)."));
198 cl::desc(
"Sort profiled recursion by edge weights."));
202 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
203 "loader inlining."));
207 cl::desc(
"The lower bound of size growth limit for "
208 "proirity-based sample profile loader inlining."));
212 cl::desc(
"The upper bound of size growth limit for "
213 "proirity-based sample profile loader inlining."));
217 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
222 cl::desc(
"Threshold for inlining cold callsites"));
228 "Relative hotness percentage threshold for indirect "
229 "call promotion in proirity-based sample profile loader inlining."));
234 "Skip relative hotness check for ICP up to given number of targets."));
237 "sample-profile-prioritized-inline",
cl::Hidden,
239 cl::desc(
"Use call site prioritized inlining for sample profile loader."
240 "Currently only CSSPGO is supported."));
245 cl::desc(
"Use the preinliner decisions stored in profile context."));
248 "sample-profile-recursive-inline",
cl::Hidden,
250 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
255 "Optimization remarks file containing inline remarks to be replayed "
256 "by inlining from sample profile loader."),
260 "sample-profile-inline-replay-scope",
261 cl::init(ReplayInlinerSettings::Scope::Function),
263 "Replay on functions that have remarks associated "
264 "with them (default)"),
265 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
266 "Replay on the entire module")),
267 cl::desc(
"Whether inline replay should be applied to the entire "
268 "Module or just the Functions (default) that are present as "
269 "callers in remarks during sample profile inlining."),
273 "sample-profile-inline-replay-fallback",
274 cl::init(ReplayInlinerSettings::Fallback::Original),
277 ReplayInlinerSettings::Fallback::Original,
"Original",
278 "All decisions not in replay send to original advisor (default)"),
279 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
280 "AlwaysInline",
"All decisions not in replay are inlined"),
281 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
282 "All decisions not in replay are not inlined")),
283 cl::desc(
"How sample profile inline replay treats sites that don't come "
284 "from the replay. Original: defers to original advisor, "
285 "AlwaysInline: inline all sites not in replay, NeverInline: "
286 "inline no sites not in replay"),
290 "sample-profile-inline-replay-format",
291 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
293 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
294 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
295 "<Line Number>:<Column Number>"),
296 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
297 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
298 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
299 "LineColumnDiscriminator",
300 "<Line Number>:<Column Number>.<Discriminator> (default)")),
305 cl::desc(
"Max number of promotions for a single indirect "
306 "call callsite in sample profile loader"));
310 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
314 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
315 "sample-profile inline pass name."));
325using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
330class GUIDToFuncNameMapper {
335 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
339 for (
const auto &
F : CurrentModule) {
341 CurrentGUIDToFuncNameMap.insert(
342 {Function::getGUID(OrigName), OrigName});
352 if (CanonName != OrigName)
353 CurrentGUIDToFuncNameMap.insert(
354 {Function::getGUID(CanonName), CanonName});
358 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
361 ~GUIDToFuncNameMapper() {
365 CurrentGUIDToFuncNameMap.clear();
369 SetGUIDToFuncNameMapForAll(
nullptr);
374 std::queue<FunctionSamples *> FSToUpdate;
376 FSToUpdate.push(&IFS.second);
379 while (!FSToUpdate.empty()) {
382 FS->GUIDToFuncNameMap = Map;
383 for (
const auto &ICS : FS->getCallsiteSamples()) {
385 for (
const auto &IFS : FSMap) {
387 FSToUpdate.push(&FS);
399struct InlineCandidate {
409 float CallsiteDistribution;
413struct CandidateComparer {
414 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
415 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
416 return LHS.CallsiteCount <
RHS.CallsiteCount;
420 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
431using CandidateQueue =
436class SampleProfileMatcher {
447 uint64_t TotalProfiledCallsites = 0;
448 uint64_t NumMismatchedCallsites = 0;
449 uint64_t MismatchedCallsiteSamples = 0;
453 uint64_t MismatchedFuncHashSamples = 0;
458 static constexpr const char *UnknownIndirectCallee =
459 "unknown.indirect.callee";
464 :
M(
M), Reader(Reader), ProbeManager(ProbeManager){};
470 auto It = FlattenedProfiles.
find(CanonFName);
471 if (It != FlattenedProfiles.end())
477 std::map<LineLocation, StringRef> &IRAnchors);
481 void countProfileMismatches(
483 const std::map<LineLocation, StringRef> &IRAnchors,
485 void countProfileCallsiteMismatches(
487 const std::map<LineLocation, StringRef> &IRAnchors,
494 return Ret.first->second;
496 void distributeIRToProfileLocationMap();
498 void runStaleProfileMatching(
499 const Function &
F,
const std::map<LineLocation, StringRef> &IRAnchors,
519 GetAC(
std::
move(GetAssumptionCache)),
520 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
538 std::vector<const FunctionSamples *>
545 bool tryPromoteAndInlineCandidate(
551 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
552 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
553 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
554 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
556 tryInlineCandidate(InlineCandidate &Candidate,
559 inlineHotFunctionsWithPriority(
Function &
F,
563 void emitOptimizationRemarksForInlineCandidates(
566 void promoteMergeNotInlinedContextSamples(
570 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
571 void generateMDProfMetadata(
Function &
F);
584 std::unique_ptr<SampleContextTracker> ContextTracker;
592 const std::string AnnotatedPassName;
596 std::unique_ptr<ProfileSymbolList> PSL;
607 struct NotInlinedProfileInfo {
625 bool ProfAccForSymsInList;
628 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
631 std::unique_ptr<SampleProfileMatcher> MatchingManager;
634 const char *getAnnotatedRemarkPassName()
const {
635 return AnnotatedPassName.c_str();
648 const std::vector<const BasicBlockT *> &BasicBlocks,
650 for (
auto &Jump :
Func.Jumps) {
651 const auto *BB = BasicBlocks[Jump.Source];
652 const auto *Succ = BasicBlocks[Jump.Target];
656 if (Successors[BB].
size() == 2 && Successors[BB].back() == Succ) {
657 if (isa<InvokeInst>(TI)) {
658 Jump.IsUnlikely =
true;
664 if (isa<UnreachableInst>(SuccTI)) {
665 Jump.IsUnlikely =
true;
686 return getProbeWeight(Inst);
690 return std::error_code();
695 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
696 return std::error_code();
705 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
706 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
709 return getInstWeightImpl(Inst);
725SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
733 CalleeName =
Callee->getName();
736 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
749std::vector<const FunctionSamples *>
750SampleProfileLoader::findIndirectCallFunctionSamples(
753 std::vector<const FunctionSamples *>
R;
760 assert(L && R &&
"Expect non-null FunctionSamples");
761 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
762 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
769 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
770 if (CalleeSamples.empty())
776 for (
const auto *
const FS : CalleeSamples) {
777 Sum +=
FS->getHeadSamplesEstimate();
789 auto T =
FS->findCallTargetMapAt(CallSite);
792 for (
const auto &T_C :
T.get())
797 for (
const auto &NameFS : *M) {
798 Sum += NameFS.second.getHeadSamplesEstimate();
799 R.push_back(&NameFS.second);
807SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
818 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
821 it.first->second = ContextTracker->getContextSamplesFor(DIL);
824 Samples->findFunctionSamples(DIL, Reader->
getRemapper());
826 return it.first->second;
839 std::unique_ptr<InstrProfValueData[]> ValueData =
843 ValueData.get(), NumVals, TotalCount,
true);
849 unsigned NumPromoted = 0;
857 if (ValueData[
I].
Value == Function::getGUID(Candidate))
886 std::unique_ptr<InstrProfValueData[]> ValueData =
890 ValueData.get(), NumVals, OldSum,
true);
896 "If sum is 0, assume only one element in CallTargets "
897 "with count being NOMORE_ICP_MAGICNUM");
901 ValueCountMap[ValueData[
I].
Value] = ValueData[
I].Count;
908 OldSum -= Pair.first->second;
918 ValueCountMap[ValueData[
I].Value] = ValueData[
I].Count;
922 for (
const auto &
Data : CallTargets) {
929 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
935 for (
const auto &ValueCount : ValueCountMap) {
937 InstrProfValueData{ValueCount.first, ValueCount.second});
941 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
942 if (L.Count != R.Count)
943 return L.Count > R.Count;
944 return L.Value > R.Value;
950 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
963bool SampleProfileLoader::tryPromoteAndInlineCandidate(
974 auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
979 auto &CI = *Candidate.CallInstr;
983 const char *Reason =
"Callee function not available";
990 if (!
R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
991 R->getValue()->hasFnAttribute(
"use-sample-profile") &&
1000 CI,
R->getValue(), Candidate.CallsiteCount, Sum,
false, ORE);
1002 Sum -= Candidate.CallsiteCount;
1015 Candidate.CallInstr = DI;
1016 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
1017 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
1022 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
1029 << Candidate.CalleeSamples->getFuncName() <<
" because "
1040 if (Callee ==
nullptr)
1049 if (
Cost.isAlways())
1055void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1058 for (
auto *
I : Candidates) {
1059 Function *CalledFunction =
I->getCalledFunction();
1060 if (CalledFunction) {
1062 "InlineAttempt",
I->getDebugLoc(),
1064 <<
"previous inlining reattempted for "
1065 << (
Hot ?
"hotness: '" :
"size: '")
1066 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1072void SampleProfileLoader::findExternalInlineCandidate(
1079 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1107 std::queue<ContextTrieNode *> CalleeList;
1108 CalleeList.push(Caller);
1109 while (!CalleeList.empty()) {
1129 if (!Func ||
Func->isDeclaration())
1135 for (
const auto &TS : BS.second.getCallTargets())
1136 if (TS.getValue() > Threshold) {
1139 if (!Callee ||
Callee->isDeclaration())
1147 for (
auto &Child :
Node->getAllChildContext()) {
1149 CalleeList.push(CalleeNode);
1176bool SampleProfileLoader::inlineHotFunctions(
1180 assert((!ProfAccForSymsInList ||
1182 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1183 "ProfAccForSymsInList should be false when profile-sample-accurate "
1187 bool Changed =
false;
1188 bool LocalChanged =
true;
1189 while (LocalChanged) {
1190 LocalChanged =
false;
1192 for (
auto &BB :
F) {
1196 for (
auto &
I : BB) {
1198 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1199 if (!isa<IntrinsicInst>(
I)) {
1200 if ((FS = findCalleeFunctionSamples(*CB))) {
1202 "GUIDToFuncNameMap has to be populated");
1204 if (
FS->getHeadSamplesEstimate() > 0 ||
1206 LocalNotInlinedCallSites.
insert({CB,
FS});
1209 else if (shouldInlineColdCallee(*CB))
1211 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1217 if (
Hot || ExternalInlineAdvisor) {
1219 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1222 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1226 Function *CalledFunction =
I->getCalledFunction();
1227 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1231 if (CalledFunction == &
F)
1233 if (
I->isIndirectCall()) {
1235 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1237 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1238 findExternalInlineCandidate(
I, FS, InlinedGUIDs, SymbolMap,
1239 PSI->getOrCompHotCountThreshold());
1245 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1246 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1247 LocalNotInlinedCallSites.
erase(
I);
1248 LocalChanged =
true;
1251 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1253 if (tryInlineCandidate(Candidate)) {
1254 LocalNotInlinedCallSites.
erase(
I);
1255 LocalChanged =
true;
1257 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1258 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1259 InlinedGUIDs, SymbolMap,
1260 PSI->getOrCompHotCountThreshold());
1263 Changed |= LocalChanged;
1269 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1273bool SampleProfileLoader::tryInlineCandidate(
1280 CallBase &CB = *Candidate.CallInstr;
1282 assert(CalledFunction &&
"Expect a callee with definition");
1287 if (
Cost.isNever()) {
1289 "InlineFail", DLoc, BB)
1290 <<
"incompatible inlining");
1298 IFI.UpdateProfile =
false;
1301 if (!
IR.isSuccess())
1306 Cost,
true, getAnnotatedRemarkPassName());
1309 if (InlinedCallSites) {
1310 InlinedCallSites->
clear();
1311 for (
auto &
I : IFI.InlinedCallSites)
1316 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1326 if (Candidate.CallsiteDistribution < 1) {
1327 for (
auto &
I : IFI.InlinedCallSites) {
1330 Candidate.CallsiteDistribution);
1332 NumDuplicatedInlinesite++;
1338bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1340 assert(CB &&
"Expect non-null call instruction");
1342 if (isa<IntrinsicInst>(CB))
1346 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1349 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1353 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1354 Factor = Probe->Factor;
1358 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1362std::optional<InlineCost>
1363SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1364 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1365 if (ExternalInlineAdvisor) {
1366 Advice = ExternalInlineAdvisor->getAdvice(CB);
1368 if (!Advice->isInliningRecommended()) {
1369 Advice->recordUnattemptedInlining();
1372 Advice->recordInlining();
1380bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1381 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1386SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1387 if (std::optional<InlineCost> ReplayCost =
1388 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1394 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1401 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1414 GetTTI(*Callee), GetAC, GetTLI);
1417 if (
Cost.isNever() ||
Cost.isAlways())
1450bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1454 assert((!ProfAccForSymsInList ||
1456 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1457 "ProfAccForSymsInList should be false when profile-sample-accurate "
1462 CandidateQueue CQueue;
1463 InlineCandidate NewCandidate;
1464 for (
auto &BB :
F) {
1465 for (
auto &
I : BB) {
1466 auto *CB = dyn_cast<CallBase>(&
I);
1469 if (getInlineCandidate(&NewCandidate, CB))
1470 CQueue.push(NewCandidate);
1479 "Max inline size limit should not be smaller than min inline size "
1484 if (ExternalInlineAdvisor)
1485 SizeLimit = std::numeric_limits<unsigned>::max();
1490 bool Changed =
false;
1491 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1492 InlineCandidate Candidate = CQueue.top();
1495 Function *CalledFunction =
I->getCalledFunction();
1497 if (CalledFunction == &
F)
1499 if (
I->isIndirectCall()) {
1501 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1503 Sum *= Candidate.CallsiteDistribution;
1504 unsigned ICPCount = 0;
1505 for (
const auto *FS : CalleeSamples) {
1507 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1508 findExternalInlineCandidate(
I, FS, InlinedGUIDs, SymbolMap,
1509 PSI->getOrCompHotCountThreshold());
1513 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1528 if (!PSI->isHotCount(EntryCountDistributed))
1533 Candidate = {
I,
FS, EntryCountDistributed,
1534 Candidate.CallsiteDistribution};
1535 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1536 &InlinedCallSites)) {
1537 for (
auto *CB : InlinedCallSites) {
1538 if (getInlineCandidate(&NewCandidate, CB))
1539 CQueue.emplace(NewCandidate);
1543 }
else if (!ContextTracker) {
1544 LocalNotInlinedCallSites.
insert({
I,
FS});
1547 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1550 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1551 for (
auto *CB : InlinedCallSites) {
1552 if (getInlineCandidate(&NewCandidate, CB))
1553 CQueue.emplace(NewCandidate);
1556 }
else if (!ContextTracker) {
1557 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1559 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1560 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1561 InlinedGUIDs, SymbolMap,
1562 PSI->getOrCompHotCountThreshold());
1566 if (!CQueue.empty()) {
1568 ++NumCSInlinedHitMaxLimit;
1570 ++NumCSInlinedHitMinLimit;
1572 ++NumCSInlinedHitGrowthLimit;
1578 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1582void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1586 for (
const auto &Pair : NonInlinedCallSites) {
1589 if (!Callee ||
Callee->isDeclaration())
1594 I->getDebugLoc(),
I->getParent())
1595 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1596 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1600 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1614 if (
FS->getHeadSamples() == 0) {
1618 FS->getHeadSamplesEstimate());
1627 OutlineFS = &OutlineFunctionSamples[
1629 OutlineFS->
merge(*FS, 1);
1635 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1636 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1654void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1657 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1660 for (
auto &BI :
F) {
1663 if (BlockWeights[BB]) {
1664 for (
auto &
I : *BB) {
1665 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1676 auto T =
FS->findCallTargetMapAt(CallSite);
1677 if (!
T ||
T.get().empty())
1684 if (Probe->Factor < 1)
1691 for (
const auto &
C :
T.get())
1698 FS->findFunctionSamplesMapAt(CallSite)) {
1699 for (
const auto &NameFS : *M)
1700 Sum += NameFS.second.getHeadSamplesEstimate();
1706 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1707 }
else if (!isa<IntrinsicInst>(&
I)) {
1708 I.setMetadata(LLVMContext::MD_prof,
1709 MDB.createBranchWeights(
1710 {static_cast<uint32_t>(BlockWeights[BB])}));
1716 for (
auto &
I : *BB) {
1717 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1719 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1721 I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1729 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1730 !isa<IndirectBrInst>(TI))
1736 :
Twine(
"<UNKNOWN LOCATION>"))
1745 std::vector<uint64_t> EdgeIndex;
1750 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1751 EdgeMultiplicity[Succ]++;
1756 Edge
E = std::make_pair(BB, Succ);
1762 if (Weight > std::numeric_limits<uint32_t>::max()) {
1764 Weight = std::numeric_limits<uint32_t>::max();
1773 uint64_t W = Weight / EdgeMultiplicity[Succ];
1775 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1780 if (Weight > MaxWeight) {
1782 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1798 if (MaxWeight > 0 &&
1801 TI->
setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1804 <<
"most popular destination for conditional branches at "
1805 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1824bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1825 bool Changed =
false;
1830 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1831 <<
F.getName() <<
"\n");
1832 ++NumMismatchedProfile;
1836 ++NumMatchedProfile;
1838 if (getFunctionLoc(
F) == 0)
1842 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1847 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1849 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1851 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1854 generateMDProfMetadata(
F);
1856 emitCoverageRemarks(
F);
1860std::unique_ptr<ProfiledCallGraph>
1861SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1862 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1864 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1866 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->
getProfiles());
1872 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
1880std::vector<Function *>
1882 std::vector<Function *> FunctionOrderList;
1883 FunctionOrderList.reserve(
M.size());
1886 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1887 "together with -sample-profile-top-down-load.\n";
1900 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1901 FunctionOrderList.push_back(&
F);
1902 return FunctionOrderList;
1955 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1964 for (
auto *
Node : Range) {
1966 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1967 FunctionOrderList.push_back(
F);
1977 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1978 FunctionOrderList.push_back(&
F);
1984 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1987 dbgs() <<
"Function processing order:\n";
1988 for (
auto F : FunctionOrderList) {
1989 dbgs() <<
F->getName() <<
"\n";
1993 return FunctionOrderList;
1996bool SampleProfileLoader::doInitialization(
Module &M,
1998 auto &Ctx =
M.getContext();
2001 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
2002 if (std::error_code EC = ReaderOrErr.getError()) {
2003 std::string
Msg =
"Could not open profile: " +
EC.message();
2007 Reader = std::move(ReaderOrErr.get());
2008 Reader->
setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
2012 if (std::error_code EC = Reader->
read()) {
2013 std::string
Msg =
"profile reading failed: " +
EC.message();
2021 ProfAccForSymsInList =
2023 if (ProfAccForSymsInList) {
2024 NamesInProfile.clear();
2026 NamesInProfile.insert(NameTable->begin(), NameTable->end());
2027 CoverageTracker.setProfAccForSymsInList(
true);
2032 M, *
FAM, Ctx,
nullptr,
2037 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2087 ContextTracker = std::make_unique<SampleContextTracker>(
2093 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2096 "Pseudo-probe-based profile requires SampleProfileProbePass";
2106 std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
2112void SampleProfileMatcher::findIRAnchors(
2113 const Function &
F, std::map<LineLocation, StringRef> &IRAnchors) {
2117 auto FindTopLevelInlinedCallsite = [](
const DILocation *DIL) {
2118 assert((DIL && DIL->getInlinedAt()) &&
"No inlined callsite");
2122 DIL = DIL->getInlinedAt();
2123 }
while (DIL->getInlinedAt());
2126 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
2127 return std::make_pair(Callsite, CalleeName);
2130 auto GetCanonicalCalleeName = [](
const CallBase *CB) {
2131 StringRef CalleeName = UnknownIndirectCallee;
2138 for (
auto &BB :
F) {
2139 for (
auto &
I : BB) {
2147 if (DIL->getInlinedAt()) {
2148 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
2152 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
2154 if (!isa<IntrinsicInst>(&
I))
2155 CalleeName = GetCanonicalCalleeName(CB);
2157 IRAnchors.emplace(
LineLocation(Probe->Id, 0), CalleeName);
2164 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
2167 if (DIL->getInlinedAt()) {
2168 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
2171 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&
I));
2172 IRAnchors.emplace(Callsite, CalleeName);
2179void SampleProfileMatcher::countMismatchedSamples(
const FunctionSamples &FS) {
2180 const auto *FuncDesc = ProbeManager->
getDesc(
FS.getName());
2186 MismatchedFuncHashSamples +=
FS.getTotalSamples();
2189 for (
const auto &
I :
FS.getCallsiteSamples())
2190 for (
const auto &CS :
I.second)
2191 countMismatchedSamples(CS.second);
2194void SampleProfileMatcher::countProfileMismatches(
2196 const std::map<LineLocation, StringRef> &IRAnchors,
2198 [[maybe_unused]]
bool IsFuncHashMismatch =
false;
2200 TotalFuncHashSamples +=
FS.getTotalSamples();
2201 TotalProfiledFunc++;
2202 const auto *FuncDesc = ProbeManager->
getDesc(
F);
2205 NumMismatchedFuncHash++;
2206 IsFuncHashMismatch =
true;
2208 countMismatchedSamples(FS);
2212 uint64_t FuncMismatchedCallsites = 0;
2213 uint64_t FuncProfiledCallsites = 0;
2214 countProfileCallsiteMismatches(FS, IRAnchors, ProfileAnchors,
2215 FuncMismatchedCallsites,
2216 FuncProfiledCallsites);
2217 TotalProfiledCallsites += FuncProfiledCallsites;
2218 NumMismatchedCallsites += FuncMismatchedCallsites;
2221 FuncMismatchedCallsites)
2222 dbgs() <<
"Function checksum is matched but there are "
2223 << FuncMismatchedCallsites <<
"/" << FuncProfiledCallsites
2224 <<
" mismatched callsites.\n";
2228void SampleProfileMatcher::countProfileCallsiteMismatches(
2230 const std::map<LineLocation, StringRef> &IRAnchors,
2236 for (
const auto &
I : ProfileAnchors) {
2237 const auto &Loc =
I.first;
2238 const auto &Callees =
I.second;
2239 assert(!Callees.empty() &&
"Callees should not be empty");
2242 const auto &
IR = IRAnchors.find(Loc);
2243 if (
IR != IRAnchors.end())
2244 IRCalleeName =
IR->second;
2248 auto CTM =
FS.findCallTargetMapAt(Loc);
2250 for (
const auto &
I : CTM.get())
2251 CallsiteSamples +=
I.second;
2253 const auto *FSMap =
FS.findFunctionSamplesMapAt(Loc);
2255 for (
const auto &
I : *FSMap)
2256 CallsiteSamples +=
I.second.getTotalSamples();
2259 bool CallsiteIsMatched =
false;
2264 if (IRCalleeName == UnknownIndirectCallee)
2265 CallsiteIsMatched =
true;
2266 else if (Callees.size() == 1 && Callees.count(IRCalleeName))
2267 CallsiteIsMatched =
true;
2269 FuncProfiledCallsites++;
2270 TotalCallsiteSamples += CallsiteSamples;
2271 if (!CallsiteIsMatched) {
2272 FuncMismatchedCallsites++;
2273 MismatchedCallsiteSamples += CallsiteSamples;
2278void SampleProfileMatcher::findProfileAnchors(
2281 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
2282 return LineOffset & 0x8000;
2285 for (
const auto &
I :
FS.getBodySamples()) {
2289 for (
const auto &
I :
I.second.getCallTargets()) {
2291 Ret.first->second.insert(
I.first());
2295 for (
const auto &
I :
FS.getCallsiteSamples()) {
2299 const auto &CalleeMap =
I.second;
2300 for (
const auto &
I : CalleeMap) {
2302 Ret.first->second.insert(
I.first);
2324void SampleProfileMatcher::runStaleProfileMatching(
2325 const Function &
F,
const std::map<LineLocation, StringRef> &IRAnchors,
2330 assert(IRToProfileLocationMap.empty() &&
2331 "Run stale profile matching only once per function");
2334 for (
const auto &
I : ProfileAnchors) {
2335 const auto &Loc =
I.first;
2336 const auto &Callees =
I.second;
2338 if (Callees.size() == 1) {
2340 const auto &Candidates = CalleeToCallsitesMap.
try_emplace(
2341 CalleeName, std::set<LineLocation>());
2342 Candidates.first->second.insert(Loc);
2349 IRToProfileLocationMap.insert({
From, To});
2353 int32_t LocationDelta = 0;
2356 for (
const auto &
IR : IRAnchors) {
2357 const auto &Loc =
IR.first;
2359 bool IsMatchedAnchor =
false;
2361 if (!CalleeName.
empty()) {
2362 auto CandidateAnchors = CalleeToCallsitesMap.
find(CalleeName);
2363 if (CandidateAnchors != CalleeToCallsitesMap.
end() &&
2364 !CandidateAnchors->second.empty()) {
2365 auto CI = CandidateAnchors->second.begin();
2366 const auto Candidate = *CI;
2367 CandidateAnchors->second.erase(CI);
2368 InsertMatching(Loc, Candidate);
2370 <<
" is matched from " << Loc <<
" to " << Candidate
2372 LocationDelta = Candidate.LineOffset - Loc.
LineOffset;
2378 for (
size_t I = (LastMatchedNonAnchors.
size() + 1) / 2;
2379 I < LastMatchedNonAnchors.
size();
I++) {
2380 const auto &
L = LastMatchedNonAnchors[
I];
2381 uint32_t CandidateLineOffset =
L.LineOffset + LocationDelta;
2382 LineLocation Candidate(CandidateLineOffset,
L.Discriminator);
2383 InsertMatching(L, Candidate);
2384 LLVM_DEBUG(
dbgs() <<
"Location is rematched backwards from " << L
2385 <<
" to " << Candidate <<
"\n");
2388 IsMatchedAnchor =
true;
2389 LastMatchedNonAnchors.
clear();
2394 if (!IsMatchedAnchor) {
2397 InsertMatching(Loc, Candidate);
2398 LLVM_DEBUG(
dbgs() <<
"Location is matched from " << Loc <<
" to "
2399 << Candidate <<
"\n");
2405void SampleProfileMatcher::runOnFunction(
const Function &
F) {
2412 const auto *FSFlattened = getFlattenedSamplesFor(
F);
2419 std::map<LineLocation, StringRef> IRAnchors;
2420 findIRAnchors(
F, IRAnchors);
2423 std::map<LineLocation, StringSet<>> ProfileAnchors;
2424 findProfileAnchors(*FSFlattened, ProfileAnchors);
2434 countProfileMismatches(
F, *FS, IRAnchors, ProfileAnchors);
2443 runStaleProfileMatching(
F, IRAnchors, ProfileAnchors,
2444 getIRToProfileLocationMap(
F));
2448void SampleProfileMatcher::runOnModule() {
2452 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
2457 distributeIRToProfileLocationMap();
2461 errs() <<
"(" << NumMismatchedFuncHash <<
"/" << TotalProfiledFunc <<
")"
2462 <<
" of functions' profile are invalid and "
2463 <<
" (" << MismatchedFuncHashSamples <<
"/" << TotalFuncHashSamples
2465 <<
" of samples are discarded due to function hash mismatch.\n";
2467 errs() <<
"(" << NumMismatchedCallsites <<
"/" << TotalProfiledCallsites
2469 <<
" of callsites' profile are invalid and "
2470 <<
"(" << MismatchedCallsiteSamples <<
"/" << TotalCallsiteSamples
2472 <<
" of samples are discarded due to callsite location mismatch.\n";
2481 ProfStatsVec.
emplace_back(
"NumMismatchedFuncHash", NumMismatchedFuncHash);
2482 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
2484 MismatchedFuncHashSamples);
2485 ProfStatsVec.
emplace_back(
"TotalFuncHashSamples", TotalFuncHashSamples);
2488 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
2489 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
2491 MismatchedCallsiteSamples);
2492 ProfStatsVec.
emplace_back(
"TotalCallsiteSamples", TotalCallsiteSamples);
2494 auto *MD = MDB.createLLVMStats(ProfStatsVec);
2495 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
2496 NMD->addOperand(MD);
2500void SampleProfileMatcher::distributeIRToProfileLocationMap(
2502 const auto ProfileMappings = FuncMappings.
find(
FS.getName());
2503 if (ProfileMappings != FuncMappings.
end()) {
2504 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
2507 for (
auto &Inlinees :
FS.getCallsiteSamples()) {
2508 for (
auto FS : Inlinees.second) {
2509 distributeIRToProfileLocationMap(
FS.second);
2516void SampleProfileMatcher::distributeIRToProfileLocationMap() {
2518 distributeIRToProfileLocationMap(
I.second);
2525 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2528 if (
M.getProfileSummary(
false) ==
nullptr) {
2535 TotalCollectedSamples +=
I.second.getTotalSamples();
2539 for (
const auto &N_F :
M.getValueSymbolTable()) {
2541 Function *
F = dyn_cast<Function>(N_F.getValue());
2542 if (
F ==
nullptr || OrigName.
empty())
2546 if (OrigName != NewName && !NewName.
empty()) {
2553 r.first->second =
nullptr;
2558 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2559 if (*MapName != OrigName && !MapName->empty())
2565 "No empty StringRef should be added in SymbolMap");
2569 MatchingManager->runOnModule();
2572 bool retval =
false;
2573 for (
auto *
F : buildFunctionOrder(M, CG)) {
2575 clearFunctionData();
2581 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2589 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2590 DILocation2SampleMap.clear();
2601 initialEntryCount = 0;
2604 ProfAccForSymsInList =
false;
2606 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2612 if (ProfAccForSymsInList) {
2614 if (PSL->contains(
F.getName()))
2615 initialEntryCount = 0;
2628 if (NamesInProfile.count(CanonName))
2629 initialEntryCount = -1;
2634 if (!
F.getEntryCount())
2636 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2643 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2644 ORE = OwnedORE.get();
2648 Samples = ContextTracker->getBaseSamplesFor(
F);
2655 auto It = OutlineFunctionSamples.find(CanonName);
2656 if (It != OutlineFunctionSamples.end()) {
2657 Samples = &It->second;
2658 }
else if (
auto Remapper = Reader->
getRemapper()) {
2659 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2660 It = OutlineFunctionSamples.find(*RemppedName);
2661 if (It != OutlineFunctionSamples.end())
2662 Samples = &It->second;
2668 if (Samples && !Samples->
empty())
2669 return emitAnnotations(
F);
2675 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2676 LTOPhase(LTOPhase), FS(
std::
move(FS)) {}
2696 SampleProfileLoader SampleLoader(
2699 : ProfileRemappingFileName,
2700 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
2702 if (!SampleLoader.doInitialization(M, &
FAM))
2707 if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
This file defines the StringMap class.
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file defines the PriorityQueue class.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
static cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
static cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
const BasicBlock * getParent() const
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool moduleIsProbed(const Module &M) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
iterator find(StringRef Key)
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
Representation of the samples collected for a function.
static uint64_t getGUID(StringRef Name)
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
StringRef getFuncName() const
Return the original function name.
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
void SetContextSynthetic()
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
StringRef getName() const
Return the function name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
This class provides operator overloads to the map container using MD5 as the key type,...
iterator find(const SampleContext &Ctx)
Sample-based profile reader.
SampleProfileMap & getProfiles()
Return all the profiles.
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
std::error_code read()
The interface to read sample profiles from the associated file.
SampleProfileReaderItaniumRemapper * getRemapper()
void setModule(const Module *Mod)
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
ProfileSummary & getSummary() const
Return the profile summary.
bool profileIsCS() const
Whether input profile is fully context-sensitive.
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
@ ContextDuplicatedIntoBase
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
bool succ_empty(const Instruction *I)
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void setProbeDistributionFactor(Instruction &Inst, float Factor)
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Implement std::hash so that hash_code can be used in STL containers.
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Represents the relative location of an instruction.