91#include <system_error>
96using namespace sampleprof;
99#define DEBUG_TYPE "sample-profile"
100#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
103 "Number of functions inlined with context sensitive profile");
105 "Number of functions not inlined with context sensitive profile");
107 "Number of functions with CFG mismatched profile");
108STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
110 "Number of inlined callsites with a partial distribution factor");
113 "Number of functions with FDO inline stopped due to min size limit");
115 "Number of functions with FDO inline stopped due to max size limit");
117 NumCSInlinedHitGrowthLimit,
118 "Number of functions with FDO inline stopped due to growth size limit");
135 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
136 "location for sample profile query."));
140 cl::desc(
"Compute and report stale profile statistical metrics."));
144 cl::desc(
"Compute stale profile statistical metrics and write it into the "
145 "native object file(.llvm_stats section)."));
149 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
150 "callsite and function as having 0 samples. Otherwise, treat "
151 "un-sampled callsites and functions conservatively as unknown. "));
155 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
156 "branches and calls as having 0 samples. Otherwise, treat "
157 "them conservatively as unknown. "));
161 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
162 "be accurate. It may be overriden by profile-sample-accurate. "));
166 cl::desc(
"Merge past inlinee's profile to outline version if sample "
167 "profile loader decided not to inline a call site. It will "
168 "only be enabled when top-down order of profile loading is "
173 cl::desc(
"Do profile annotation and inlining for functions in top-down "
174 "order of call graph during sample profile loading. It only "
175 "works for new pass manager. "));
179 cl::desc(
"Process functions in a top-down order "
180 "defined by the profiled call graph when "
181 "-sample-profile-top-down-load is on."));
185 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
193 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
194 "pass, and merge (or scale) profiles (as configured by "
195 "--sample-profile-merge-inlinee)."));
200 cl::desc(
"Sort profiled recursion by edge weights."));
204 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
205 "loader inlining."));
209 cl::desc(
"The lower bound of size growth limit for "
210 "proirity-based sample profile loader inlining."));
214 cl::desc(
"The upper bound of size growth limit for "
215 "proirity-based sample profile loader inlining."));
219 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
224 cl::desc(
"Threshold for inlining cold callsites"));
230 "Relative hotness percentage threshold for indirect "
231 "call promotion in proirity-based sample profile loader inlining."));
236 "Skip relative hotness check for ICP up to given number of targets."));
240 cl::desc(
"A function is considered hot for staleness error check if its "
241 "total sample count is above the specified percentile"));
245 cl::desc(
"Skip the check if the number of hot functions is smaller than "
246 "the specified number."));
250 cl::desc(
"Reject the profile if the mismatch percent is higher than the "
254 "sample-profile-prioritized-inline",
cl::Hidden,
256 cl::desc(
"Use call site prioritized inlining for sample profile loader."
257 "Currently only CSSPGO is supported."));
262 cl::desc(
"Use the preinliner decisions stored in profile context."));
265 "sample-profile-recursive-inline",
cl::Hidden,
267 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
272 "Optimization remarks file containing inline remarks to be replayed "
273 "by inlining from sample profile loader."),
277 "sample-profile-inline-replay-scope",
278 cl::init(ReplayInlinerSettings::Scope::Function),
280 "Replay on functions that have remarks associated "
281 "with them (default)"),
282 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
283 "Replay on the entire module")),
284 cl::desc(
"Whether inline replay should be applied to the entire "
285 "Module or just the Functions (default) that are present as "
286 "callers in remarks during sample profile inlining."),
290 "sample-profile-inline-replay-fallback",
291 cl::init(ReplayInlinerSettings::Fallback::Original),
294 ReplayInlinerSettings::Fallback::Original,
"Original",
295 "All decisions not in replay send to original advisor (default)"),
296 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
297 "AlwaysInline",
"All decisions not in replay are inlined"),
298 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
299 "All decisions not in replay are not inlined")),
300 cl::desc(
"How sample profile inline replay treats sites that don't come "
301 "from the replay. Original: defers to original advisor, "
302 "AlwaysInline: inline all sites not in replay, NeverInline: "
303 "inline no sites not in replay"),
307 "sample-profile-inline-replay-format",
308 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
310 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
311 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
312 "<Line Number>:<Column Number>"),
313 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
314 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
315 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
316 "LineColumnDiscriminator",
317 "<Line Number>:<Column Number>.<Discriminator> (default)")),
322 cl::desc(
"Max number of promotions for a single indirect "
323 "call callsite in sample profile loader"));
327 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
331 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
332 "sample-profile inline pass name."));
342using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
347class GUIDToFuncNameMapper {
352 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
356 for (
const auto &
F : CurrentModule) {
358 CurrentGUIDToFuncNameMap.insert(
359 {Function::getGUID(OrigName), OrigName});
369 if (CanonName != OrigName)
370 CurrentGUIDToFuncNameMap.insert(
371 {Function::getGUID(CanonName), CanonName});
375 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
378 ~GUIDToFuncNameMapper() {
382 CurrentGUIDToFuncNameMap.clear();
386 SetGUIDToFuncNameMapForAll(
nullptr);
391 std::queue<FunctionSamples *> FSToUpdate;
393 FSToUpdate.push(&IFS.second);
396 while (!FSToUpdate.empty()) {
399 FS->GUIDToFuncNameMap = Map;
400 for (
const auto &ICS : FS->getCallsiteSamples()) {
402 for (
const auto &IFS : FSMap) {
404 FSToUpdate.push(&FS);
416struct InlineCandidate {
426 float CallsiteDistribution;
430struct CandidateComparer {
431 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
432 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
433 return LHS.CallsiteCount <
RHS.CallsiteCount;
437 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
448using CandidateQueue =
467 GetAC(
std::
move(GetAssumptionCache)),
468 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
486 std::vector<const FunctionSamples *>
492 bool tryPromoteAndInlineCandidate(
498 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
499 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
500 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
501 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
503 tryInlineCandidate(InlineCandidate &Candidate,
506 inlineHotFunctionsWithPriority(
Function &
F,
510 void emitOptimizationRemarksForInlineCandidates(
513 void promoteMergeNotInlinedContextSamples(
517 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
518 void generateMDProfMetadata(
Function &
F);
533 std::unique_ptr<SampleContextTracker> ContextTracker;
541 const std::string AnnotatedPassName;
545 std::unique_ptr<ProfileSymbolList> PSL;
556 struct NotInlinedProfileInfo {
579 bool ProfAccForSymsInList;
582 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
585 std::unique_ptr<SampleProfileMatcher> MatchingManager;
588 const char *getAnnotatedRemarkPassName()
const {
589 return AnnotatedPassName.c_str();
602 const std::vector<const BasicBlockT *> &BasicBlocks,
604 for (
auto &Jump :
Func.Jumps) {
605 const auto *BB = BasicBlocks[Jump.Source];
606 const auto *Succ = BasicBlocks[Jump.Target];
610 if (Successors[BB].
size() == 2 && Successors[BB].back() == Succ) {
611 if (isa<InvokeInst>(TI)) {
612 Jump.IsUnlikely =
true;
618 if (isa<UnreachableInst>(SuccTI)) {
619 Jump.IsUnlikely =
true;
640 return getProbeWeight(Inst);
644 return std::error_code();
649 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
650 return std::error_code();
659 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
660 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
663 return getInstWeightImpl(Inst);
679SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
687 CalleeName =
Callee->getName();
690 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
697 CalleeName, Reader->getRemapper());
703std::vector<const FunctionSamples *>
704SampleProfileLoader::findIndirectCallFunctionSamples(
707 std::vector<const FunctionSamples *>
R;
714 assert(L && R &&
"Expect non-null FunctionSamples");
715 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
716 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
717 return L->getGUID() <
R->getGUID();
722 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
723 if (CalleeSamples.empty())
729 for (
const auto *
const FS : CalleeSamples) {
730 Sum +=
FS->getHeadSamplesEstimate();
743 if (
auto T =
FS->findCallTargetMapAt(CallSite))
744 for (
const auto &T_C : *
T)
749 for (
const auto &NameFS : *M) {
750 Sum += NameFS.second.getHeadSamplesEstimate();
751 R.push_back(&NameFS.second);
759SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
770 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
773 it.first->second = ContextTracker->getContextSamplesFor(DIL);
776 Samples->findFunctionSamples(DIL, Reader->getRemapper());
778 return it.first->second;
791 std::unique_ptr<InstrProfValueData[]> ValueData =
795 ValueData.get(), NumVals, TotalCount,
true);
801 unsigned NumPromoted = 0;
809 if (ValueData[
I].
Value == Function::getGUID(Candidate))
838 std::unique_ptr<InstrProfValueData[]> ValueData =
842 ValueData.get(), NumVals, OldSum,
true);
848 "If sum is 0, assume only one element in CallTargets "
849 "with count being NOMORE_ICP_MAGICNUM");
853 ValueCountMap[ValueData[
I].
Value] = ValueData[
I].Count;
860 OldSum -= Pair.first->second;
870 ValueCountMap[ValueData[
I].Value] = ValueData[
I].Count;
874 for (
const auto &Data : CallTargets) {
875 auto Pair = ValueCountMap.
try_emplace(Data.Value, Data.Count);
881 assert(Sum >= Data.Count &&
"Sum should never be less than Data.Count");
887 for (
const auto &ValueCount : ValueCountMap) {
889 InstrProfValueData{ValueCount.first, ValueCount.second});
893 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
894 if (L.Count != R.Count)
895 return L.Count > R.Count;
896 return L.Value > R.Value;
902 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
915bool SampleProfileLoader::tryPromoteAndInlineCandidate(
926 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
931 auto &CI = *Candidate.CallInstr;
935 const char *Reason =
"Callee function not available";
942 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
943 R->second->hasFnAttribute(
"use-sample-profile") &&
952 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
954 Sum -= Candidate.CallsiteCount;
967 Candidate.CallInstr = DI;
968 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
969 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
974 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
982 Candidate.CallInstr->getName())<<
" because "
993 if (Callee ==
nullptr)
1002 if (
Cost.isAlways())
1008void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1011 for (
auto *
I : Candidates) {
1012 Function *CalledFunction =
I->getCalledFunction();
1013 if (CalledFunction) {
1015 "InlineAttempt",
I->getDebugLoc(),
1017 <<
"previous inlining reattempted for "
1018 << (
Hot ?
"hotness: '" :
"size: '")
1019 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1025void SampleProfileLoader::findExternalInlineCandidate(
1031 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1062 std::queue<ContextTrieNode *> CalleeList;
1063 CalleeList.push(Caller);
1064 while (!CalleeList.empty()) {
1083 if (!Func ||
Func->isDeclaration())
1089 for (
const auto &TS : BS.second.getCallTargets())
1090 if (TS.second > Threshold) {
1092 if (!Callee ||
Callee->isDeclaration())
1093 InlinedGUIDs.
insert(TS.first.getHashCode());
1100 for (
auto &Child :
Node->getAllChildContext()) {
1102 CalleeList.push(CalleeNode);
1129bool SampleProfileLoader::inlineHotFunctions(
1133 assert((!ProfAccForSymsInList ||
1135 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1136 "ProfAccForSymsInList should be false when profile-sample-accurate "
1140 bool Changed =
false;
1141 bool LocalChanged =
true;
1142 while (LocalChanged) {
1143 LocalChanged =
false;
1145 for (
auto &BB :
F) {
1149 for (
auto &
I : BB) {
1151 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1152 if (!isa<IntrinsicInst>(
I)) {
1153 if ((FS = findCalleeFunctionSamples(*CB))) {
1155 "GUIDToFuncNameMap has to be populated");
1157 if (
FS->getHeadSamplesEstimate() > 0 ||
1159 LocalNotInlinedCallSites.
insert({CB,
FS});
1162 else if (shouldInlineColdCallee(*CB))
1164 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1170 if (
Hot || ExternalInlineAdvisor) {
1172 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1175 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1179 Function *CalledFunction =
I->getCalledFunction();
1180 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1184 if (CalledFunction == &
F)
1186 if (
I->isIndirectCall()) {
1188 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1190 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1191 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1192 PSI->getOrCompHotCountThreshold());
1198 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1199 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1200 LocalNotInlinedCallSites.
erase(
I);
1201 LocalChanged =
true;
1204 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1206 if (tryInlineCandidate(Candidate)) {
1207 LocalNotInlinedCallSites.
erase(
I);
1208 LocalChanged =
true;
1210 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1211 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1213 PSI->getOrCompHotCountThreshold());
1216 Changed |= LocalChanged;
1222 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1226bool SampleProfileLoader::tryInlineCandidate(
1233 CallBase &CB = *Candidate.CallInstr;
1235 assert(CalledFunction &&
"Expect a callee with definition");
1240 if (
Cost.isNever()) {
1242 "InlineFail", DLoc, BB)
1243 <<
"incompatible inlining");
1251 IFI.UpdateProfile =
false;
1254 if (!
IR.isSuccess())
1259 Cost,
true, getAnnotatedRemarkPassName());
1262 if (InlinedCallSites) {
1263 InlinedCallSites->
clear();
1264 for (
auto &
I : IFI.InlinedCallSites)
1269 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1279 if (Candidate.CallsiteDistribution < 1) {
1280 for (
auto &
I : IFI.InlinedCallSites) {
1283 Candidate.CallsiteDistribution);
1285 NumDuplicatedInlinesite++;
1291bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1293 assert(CB &&
"Expect non-null call instruction");
1295 if (isa<IntrinsicInst>(CB))
1299 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1302 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1306 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1307 Factor = Probe->Factor;
1311 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1315std::optional<InlineCost>
1316SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1317 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1318 if (ExternalInlineAdvisor) {
1319 Advice = ExternalInlineAdvisor->getAdvice(CB);
1321 if (!Advice->isInliningRecommended()) {
1322 Advice->recordUnattemptedInlining();
1325 Advice->recordInlining();
1333bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1334 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1339SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1340 if (std::optional<InlineCost> ReplayCost =
1341 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1347 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1354 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1367 GetTTI(*Callee), GetAC, GetTLI);
1370 if (
Cost.isNever() ||
Cost.isAlways())
1403bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1407 assert((!ProfAccForSymsInList ||
1409 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1410 "ProfAccForSymsInList should be false when profile-sample-accurate "
1415 CandidateQueue CQueue;
1416 InlineCandidate NewCandidate;
1417 for (
auto &BB :
F) {
1418 for (
auto &
I : BB) {
1419 auto *CB = dyn_cast<CallBase>(&
I);
1422 if (getInlineCandidate(&NewCandidate, CB))
1423 CQueue.push(NewCandidate);
1432 "Max inline size limit should not be smaller than min inline size "
1437 if (ExternalInlineAdvisor)
1438 SizeLimit = std::numeric_limits<unsigned>::max();
1443 bool Changed =
false;
1444 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1445 InlineCandidate Candidate = CQueue.top();
1448 Function *CalledFunction =
I->getCalledFunction();
1450 if (CalledFunction == &
F)
1452 if (
I->isIndirectCall()) {
1454 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1456 Sum *= Candidate.CallsiteDistribution;
1457 unsigned ICPCount = 0;
1458 for (
const auto *FS : CalleeSamples) {
1460 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1461 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1462 PSI->getOrCompHotCountThreshold());
1466 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1481 if (!PSI->isHotCount(EntryCountDistributed))
1486 Candidate = {
I,
FS, EntryCountDistributed,
1487 Candidate.CallsiteDistribution};
1488 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1489 &InlinedCallSites)) {
1490 for (
auto *CB : InlinedCallSites) {
1491 if (getInlineCandidate(&NewCandidate, CB))
1492 CQueue.emplace(NewCandidate);
1496 }
else if (!ContextTracker) {
1497 LocalNotInlinedCallSites.
insert({
I,
FS});
1500 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1503 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1504 for (
auto *CB : InlinedCallSites) {
1505 if (getInlineCandidate(&NewCandidate, CB))
1506 CQueue.emplace(NewCandidate);
1509 }
else if (!ContextTracker) {
1510 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1512 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1513 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1515 PSI->getOrCompHotCountThreshold());
1519 if (!CQueue.empty()) {
1521 ++NumCSInlinedHitMaxLimit;
1523 ++NumCSInlinedHitMinLimit;
1525 ++NumCSInlinedHitGrowthLimit;
1531 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1535void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1539 for (
const auto &Pair : NonInlinedCallSites) {
1542 if (!Callee ||
Callee->isDeclaration())
1547 I->getDebugLoc(),
I->getParent())
1548 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1549 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1553 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1567 if (
FS->getHeadSamples() == 0) {
1571 FS->getHeadSamplesEstimate());
1580 OutlineFS = &OutlineFunctionSamples[
1582 OutlineFS->
merge(*FS, 1);
1588 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1589 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1600 InstrProfValueData{
I.first.getHashCode(),
I.second});
1607void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1610 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1613 for (
auto &BI :
F) {
1616 if (BlockWeights[BB]) {
1617 for (
auto &
I : *BB) {
1618 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1630 FS->findCallTargetMapAt(CallSite);
1631 if (!
T ||
T.get().empty())
1638 if (Probe->Factor < 1)
1645 for (
const auto &
C :
T.get())
1652 FS->findFunctionSamplesMapAt(CallSite)) {
1653 for (
const auto &NameFS : *M)
1654 Sum += NameFS.second.getHeadSamplesEstimate();
1660 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1661 }
else if (!isa<IntrinsicInst>(&
I)) {
1668 for (
auto &
I : *BB) {
1669 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1670 if (cast<CallBase>(
I).isIndirectCall()) {
1671 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1682 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1683 !isa<IndirectBrInst>(TI))
1689 :
Twine(
"<UNKNOWN LOCATION>"))
1698 std::vector<uint64_t> EdgeIndex;
1703 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1704 EdgeMultiplicity[Succ]++;
1709 Edge E = std::make_pair(BB, Succ);
1715 if (Weight > std::numeric_limits<uint32_t>::max()) {
1717 Weight = std::numeric_limits<uint32_t>::max();
1726 uint64_t W = Weight / EdgeMultiplicity[Succ];
1728 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1733 if (Weight > MaxWeight) {
1735 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1751 if (MaxWeight > 0 &&
1757 <<
"most popular destination for conditional branches at "
1758 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1777bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1778 bool Changed =
false;
1782 if (!ProbeManager->getDesc(
F))
1783 dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
1787 if (ProbeManager->profileIsValid(
F, *Samples)) {
1788 ++NumMatchedProfile;
1790 ++NumMismatchedProfile;
1792 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1793 <<
F.getName() <<
"\n");
1798 if (getFunctionLoc(
F) == 0)
1802 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1807 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1809 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1811 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1814 generateMDProfMetadata(
F);
1816 emitCoverageRemarks(
F);
1820std::unique_ptr<ProfiledCallGraph>
1821SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1822 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1824 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1826 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1834 ProfiledCG->addProfiledFunction(
1841std::vector<Function *>
1843 std::vector<Function *> FunctionOrderList;
1844 FunctionOrderList.reserve(
M.size());
1847 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1848 "together with -sample-profile-top-down-load.\n";
1862 FunctionOrderList.push_back(&
F);
1863 return FunctionOrderList;
1916 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1925 for (
auto *
Node : Range) {
1928 FunctionOrderList.push_back(
F);
1939 FunctionOrderList.push_back(&
F);
1945 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1948 dbgs() <<
"Function processing order:\n";
1949 for (
auto F : FunctionOrderList) {
1950 dbgs() <<
F->getName() <<
"\n";
1954 return FunctionOrderList;
1957bool SampleProfileLoader::doInitialization(
Module &M,
1959 auto &Ctx =
M.getContext();
1962 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1963 if (std::error_code EC = ReaderOrErr.getError()) {
1964 std::string
Msg =
"Could not open profile: " +
EC.message();
1968 Reader = std::move(ReaderOrErr.get());
1969 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1972 Reader->setModule(&M);
1973 if (std::error_code EC = Reader->read()) {
1974 std::string
Msg =
"profile reading failed: " +
EC.message();
1979 PSL = Reader->getProfileSymbolList();
1982 ProfAccForSymsInList =
1984 if (ProfAccForSymsInList) {
1985 NamesInProfile.clear();
1986 GUIDsInProfile.clear();
1987 if (
auto NameTable = Reader->getNameTable()) {
1989 for (
auto Name : *NameTable)
1990 GUIDsInProfile.insert(
Name.getHashCode());
1992 for (
auto Name : *NameTable)
1993 NamesInProfile.insert(
Name.stringRef());
1996 CoverageTracker.setProfAccForSymsInList(
true);
2001 M, *
FAM, Ctx,
nullptr,
2006 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2010 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2011 Reader->profileIsProbeBased()) {
2027 if (Reader->profileIsPreInlined()) {
2037 if (Reader->profileIsProbeBased() &&
2042 if (!Reader->profileIsCS()) {
2054 if (Reader->profileIsCS()) {
2056 ContextTracker = std::make_unique<SampleContextTracker>(
2057 Reader->getProfiles(), &GUIDToFuncNameMap);
2061 if (Reader->profileIsProbeBased()) {
2062 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2063 if (!ProbeManager->moduleIsProbed(M)) {
2065 "Pseudo-probe-based profile requires SampleProfileProbePass";
2074 MatchingManager = std::make_unique<SampleProfileMatcher>(
2075 M, *Reader, ProbeManager.get(), LTOPhase);
2091bool SampleProfileLoader::rejectHighStalenessProfile(
2094 "Only support for probe-based profile");
2097 for (
const auto &
I : Profiles) {
2098 const auto &
FS =
I.second;
2099 const auto *FuncDesc = ProbeManager->getDesc(
FS.getGUID());
2105 FS.getTotalSamples()))
2109 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2110 NumMismatchedFunc++;
2118 if (NumMismatchedFunc * 100 >=
2120 auto &Ctx =
M.getContext();
2122 "The input profile significantly mismatches current source code. "
2123 "Please recollect profile to avoid performance regression.";
2133 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2136 if (
M.getProfileSummary(
false) ==
nullptr) {
2137 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2143 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2147 for (
const auto &
I : Reader->getProfiles())
2148 TotalCollectedSamples +=
I.second.getTotalSamples();
2150 auto Remapper = Reader->getRemapper();
2152 for (
const auto &N_F :
M.getValueSymbolTable()) {
2154 Function *
F = dyn_cast<Function>(N_F.getValue());
2155 if (
F ==
nullptr || OrigName.
empty())
2159 if (OrigName != NewName && !NewName.
empty()) {
2166 r.first->second =
nullptr;
2171 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2172 if (*MapName != OrigName && !MapName->empty())
2178 "No empty StringRef should be added in SymbolMap");
2182 MatchingManager->runOnModule();
2183 MatchingManager->clearMatchingData();
2186 bool retval =
false;
2187 for (
auto *
F : buildFunctionOrder(M, CG)) {
2189 clearFunctionData();
2195 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2203 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2204 DILocation2SampleMap.clear();
2215 initialEntryCount = 0;
2218 ProfAccForSymsInList =
false;
2220 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2226 if (ProfAccForSymsInList) {
2228 if (PSL->contains(
F.getName()))
2229 initialEntryCount = 0;
2243 GUIDsInProfile.count(Function::getGUID(CanonName))) ||
2245 initialEntryCount = -1;
2250 if (!
F.getEntryCount())
2252 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2259 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2260 ORE = OwnedORE.get();
2264 Samples = ContextTracker->getBaseSamplesFor(
F);
2266 Samples = Reader->getSamplesFor(
F);
2271 auto It = OutlineFunctionSamples.find(
FunctionId(CanonName));
2272 if (It != OutlineFunctionSamples.end()) {
2273 Samples = &It->second;
2274 }
else if (
auto Remapper = Reader->getRemapper()) {
2275 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2276 It = OutlineFunctionSamples.find(
FunctionId(*RemppedName));
2277 if (It != OutlineFunctionSamples.end())
2278 Samples = &It->second;
2284 if (Samples && !Samples->
empty())
2285 return emitAnnotations(
F);
2291 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2292 LTOPhase(LTOPhase), FS(
std::
move(FS)) {}
2312 SampleProfileLoader SampleLoader(
2315 : ProfileRemappingFileName,
2316 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
2318 if (!SampleLoader.doInitialization(M, &
FAM))
2323 if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
This file defines the StringMap class.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
const BasicBlock * getParent() const
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void refresh()
If no summary is present, attempt to refresh.
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
FunctionId getFunction() const
Return the function name.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
void SetContextSynthetic()
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
bool hasAttribute(ContextAttributeMask A)
This class provides operator overloads to the map container using MD5 as the key type,...
Sample-based profile reader.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void setProbeDistributionFactor(Instruction &Inst, float Factor)
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
cl::opt< int > SampleHotCallSiteThreshold
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< int > SampleColdCallSiteThreshold
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
static bool skipProfileForFunction(const Function &F)
cl::opt< bool > SortProfiledSCC
cl::opt< int > ProfileInlineLimitMax
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > ProfileInlineGrowthLimit
Implement std::hash so that hash_code can be used in STL containers.
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.