90#include <system_error>
98#define DEBUG_TYPE "sample-profile"
99#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
102 "Number of functions inlined with context sensitive profile");
104 "Number of functions not inlined with context sensitive profile");
106 "Number of functions with CFG mismatched profile");
107STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
109 "Number of inlined callsites with a partial distribution factor");
112 "Number of functions with FDO inline stopped due to min size limit");
114 "Number of functions with FDO inline stopped due to max size limit");
116 NumCSInlinedHitGrowthLimit,
117 "Number of functions with FDO inline stopped due to growth size limit");
136 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
137 "location for sample profile query."));
140 cl::desc(
"Salvage unused profile by matching with new "
141 "functions on call graph."));
145 cl::desc(
"Compute and report stale profile statistical metrics."));
149 cl::desc(
"Compute stale profile statistical metrics and write it into the "
150 "native object file(.llvm_stats section)."));
154 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
155 "callsite and function as having 0 samples. Otherwise, treat "
156 "un-sampled callsites and functions conservatively as unknown. "));
160 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
161 "branches and calls as having 0 samples. Otherwise, treat "
162 "them conservatively as unknown. "));
166 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
167 "be accurate. It may be overridden by profile-sample-accurate. "));
171 cl::desc(
"Merge past inlinee's profile to outline version if sample "
172 "profile loader decided not to inline a call site. It will "
173 "only be enabled when top-down order of profile loading is "
178 cl::desc(
"Do profile annotation and inlining for functions in top-down "
179 "order of call graph during sample profile loading. It only "
180 "works for new pass manager. "));
184 cl::desc(
"Process functions in a top-down order "
185 "defined by the profiled call graph when "
186 "-sample-profile-top-down-load is on."));
190 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
199 "If true, artificially skip inline transformation in sample-loader "
200 "pass, and merge (or scale) profiles (as configured by "
201 "--sample-profile-merge-inlinee)."));
205 cl::desc(
"Sort profiled recursion by edge weights."));
209 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
210 "loader inlining."));
214 cl::desc(
"The lower bound of size growth limit for "
215 "proirity-based sample profile loader inlining."));
219 cl::desc(
"The upper bound of size growth limit for "
220 "proirity-based sample profile loader inlining."));
224 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
229 cl::desc(
"Threshold for inlining cold callsites"));
235 "Relative hotness percentage threshold for indirect "
236 "call promotion in proirity-based sample profile loader inlining."));
241 "Skip relative hotness check for ICP up to given number of targets."));
245 cl::desc(
"A function is considered hot for staleness error check if its "
246 "total sample count is above the specified percentile"));
250 cl::desc(
"Skip the check if the number of hot functions is smaller than "
251 "the specified number."));
255 cl::desc(
"Reject the profile if the mismatch percent is higher than the "
259 "sample-profile-prioritized-inline",
cl::Hidden,
260 cl::desc(
"Use call site prioritized inlining for sample profile loader. "
261 "Currently only CSSPGO is supported."));
265 cl::desc(
"Use the preinliner decisions stored in profile context."));
268 "sample-profile-recursive-inline",
cl::Hidden,
269 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
273 cl::desc(
"Remove pseudo-probe after sample profile annotation."));
278 "Optimization remarks file containing inline remarks to be replayed "
279 "by inlining from sample profile loader."),
283 "sample-profile-inline-replay-scope",
286 "Replay on functions that have remarks associated "
287 "with them (default)"),
289 "Replay on the entire module")),
290 cl::desc(
"Whether inline replay should be applied to the entire "
291 "Module or just the Functions (default) that are present as "
292 "callers in remarks during sample profile inlining."),
296 "sample-profile-inline-replay-fallback",
301 "All decisions not in replay send to original advisor (default)"),
303 "AlwaysInline",
"All decisions not in replay are inlined"),
305 "All decisions not in replay are not inlined")),
306 cl::desc(
"How sample profile inline replay treats sites that don't come "
307 "from the replay. Original: defers to original advisor, "
308 "AlwaysInline: inline all sites not in replay, NeverInline: "
309 "inline no sites not in replay"),
313 "sample-profile-inline-replay-format",
318 "<Line Number>:<Column Number>"),
320 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
322 "LineColumnDiscriminator",
323 "<Line Number>:<Column Number>.<Discriminator> (default)")),
328 cl::desc(
"Max number of promotions for a single indirect "
329 "call callsite in sample profile loader"));
333 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
337 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
338 "sample-profile inline pass name."));
348using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
353class GUIDToFuncNameMapper {
358 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
362 for (
const auto &
F : CurrentModule) {
364 CurrentGUIDToFuncNameMap.insert(
375 if (CanonName != OrigName)
376 CurrentGUIDToFuncNameMap.insert(
381 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
384 ~GUIDToFuncNameMapper() {
388 CurrentGUIDToFuncNameMap.clear();
392 SetGUIDToFuncNameMapForAll(
nullptr);
397 std::queue<FunctionSamples *> FSToUpdate;
399 FSToUpdate.push(&IFS.second);
402 while (!FSToUpdate.empty()) {
405 FS->GUIDToFuncNameMap = Map;
406 for (
const auto &ICS : FS->getCallsiteSamples()) {
408 for (
const auto &IFS : FSMap) {
410 FSToUpdate.push(&FS);
422struct InlineCandidate {
424 const FunctionSamples *CalleeSamples;
429 uint64_t CallsiteCount;
432 float CallsiteDistribution;
436struct CandidateComparer {
437 bool operator()(
const InlineCandidate &
LHS,
const InlineCandidate &
RHS) {
438 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
439 return LHS.CallsiteCount <
RHS.CallsiteCount;
441 const FunctionSamples *LCS =
LHS.CalleeSamples;
442 const FunctionSamples *RCS =
RHS.CalleeSamples;
457using CandidateQueue =
470 IntrusiveRefCntPtr<vfs::FileSystem> FS,
471 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
472 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
473 std::function<
const TargetLibraryInfo &(Function &)> GetTLI,
474 LazyCallGraph &CG,
bool DisableSampleProfileInlining,
475 bool UseFlattenedProfile)
478 GetAC(std::
move(GetAssumptionCache)),
479 GetTTI(std::
move(GetTargetTransformInfo)), GetTLI(std::
move(GetTLI)),
480 CG(CG), LTOPhase(LTOPhase),
485 DisableSampleProfileInlining(DisableSampleProfileInlining),
486 UseFlattenedProfile(UseFlattenedProfile) {}
490 ProfileSummaryInfo *_PSI);
494 bool emitAnnotations(Function &
F);
495 ErrorOr<uint64_t> getInstWeight(
const Instruction &
I)
override;
496 const FunctionSamples *findCalleeFunctionSamples(
const CallBase &
I)
const;
497 const FunctionSamples *
498 findFunctionSamples(
const Instruction &
I)
const override;
499 std::vector<const FunctionSamples *>
500 findIndirectCallFunctionSamples(
const Instruction &
I, uint64_t &Sum)
const;
501 void findExternalInlineCandidate(CallBase *CB,
const FunctionSamples *Samples,
502 DenseSet<GlobalValue::GUID> &InlinedGUIDs,
505 bool tryPromoteAndInlineCandidate(
506 Function &
F, InlineCandidate &Candidate, uint64_t SumOrigin,
509 bool inlineHotFunctions(Function &
F,
510 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
511 std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
512 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
513 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
514 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
516 tryInlineCandidate(InlineCandidate &Candidate,
519 inlineHotFunctionsWithPriority(Function &
F,
520 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
522 bool shouldInlineColdCallee(CallBase &CallInst);
523 void emitOptimizationRemarksForInlineCandidates(
524 const SmallVectorImpl<CallBase *> &Candidates,
const Function &
F,
526 void promoteMergeNotInlinedContextSamples(
527 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
529 std::vector<Function *> buildFunctionOrder(
Module &M, LazyCallGraph &CG);
530 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
531 void generateMDProfMetadata(Function &
F);
532 bool rejectHighStalenessProfile(
Module &M, ProfileSummaryInfo *PSI,
533 const SampleProfileMap &Profiles);
534 void removePseudoProbeInstsDiscriminator(
Module &M);
540 HashKeyMap<std::unordered_map, FunctionId, Function *> SymbolMap;
544 HashKeyMap<std::unordered_map, FunctionId, FunctionId> FuncNameToProfNameMap;
546 std::function<AssumptionCache &(
Function &)> GetAC;
547 std::function<TargetTransformInfo &(
Function &)> GetTTI;
548 std::function<
const TargetLibraryInfo &(
Function &)> GetTLI;
552 std::unique_ptr<SampleContextTracker> ContextTracker;
560 const std::string AnnotatedPassName;
564 std::shared_ptr<ProfileSymbolList> PSL;
569 struct NotInlinedProfileInfo {
572 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
576 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
580 StringSet<> NamesInProfile;
585 llvm::DenseSet<uint64_t> GUIDsInProfile;
592 bool ProfAccForSymsInList;
594 bool DisableSampleProfileInlining;
596 bool UseFlattenedProfile;
599 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
602 std::unique_ptr<SampleProfileMatcher> MatchingManager;
605 const char *getAnnotatedRemarkPassName()
const {
606 return AnnotatedPassName.c_str();
613inline bool SampleProfileInference<Function>::isExit(
const BasicBlock *BB) {
618inline void SampleProfileInference<Function>::findUnlikelyJumps(
619 const std::vector<const BasicBlockT *> &BasicBlocks,
621 for (
auto &Jump :
Func.Jumps) {
622 const auto *BB = BasicBlocks[Jump.Source];
623 const auto *Succ = BasicBlocks[Jump.Target];
627 const auto &Succs = Successors[BB];
628 if (Succs.size() == 2 && Succs.back() == Succ) {
630 Jump.IsUnlikely =
true;
637 Jump.IsUnlikely =
true;
658 return getProbeWeight(Inst);
662 return std::error_code();
668 return std::error_code();
678 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
681 return getInstWeightImpl(Inst);
696const FunctionSamples *
697SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
703 StringRef CalleeName;
705 CalleeName =
Callee->getName();
708 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
710 const FunctionSamples *
FS = findFunctionSamples(Inst);
715 CalleeName, Reader->getRemapper(),
716 &FuncNameToProfNameMap);
722std::vector<const FunctionSamples *>
723SampleProfileLoader::findIndirectCallFunctionSamples(
724 const Instruction &Inst, uint64_t &Sum)
const {
726 std::vector<const FunctionSamples *>
R;
732 auto FSCompare = [](
const FunctionSamples *
L,
const FunctionSamples *
R) {
733 assert(L && R &&
"Expect non-null FunctionSamples");
734 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
735 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
736 return L->getGUID() <
R->getGUID();
741 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
742 if (CalleeSamples.empty())
748 for (
const auto *
const FS : CalleeSamples) {
749 Sum +=
FS->getHeadSamplesEstimate();
756 const FunctionSamples *
FS = findFunctionSamples(Inst);
762 if (
auto T =
FS->findCallTargetMapAt(CallSite))
763 for (
const auto &T_C : *
T)
768 for (
const auto &NameFS : *M) {
769 Sum += NameFS.second.getHeadSamplesEstimate();
770 R.push_back(&NameFS.second);
777const FunctionSamples *
778SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
789 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
792 it.first->second = ContextTracker->getContextSamplesFor(DIL);
794 it.first->second = Samples->findFunctionSamples(
795 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
797 return it.first->second;
813 if (ValueData.empty())
816 unsigned NumPromoted = 0;
817 for (
const auto &V : ValueData) {
859 "If sum is 0, assume only one element in CallTargets "
860 "with count being NOMORE_ICP_MAGICNUM");
862 for (
const auto &V : ValueData)
863 ValueCountMap[V.Value] = V.Count;
869 OldSum -= Pair.first->second;
876 for (
const auto &V : ValueData) {
878 ValueCountMap[V.Value] = V.Count;
881 for (
const auto &
Data : CallTargets) {
888 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
894 for (
const auto &ValueCount : ValueCountMap) {
896 InstrProfValueData{ValueCount.first, ValueCount.second});
900 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
901 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);
907 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
920bool SampleProfileLoader::tryPromoteAndInlineCandidate(
921 Function &
F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
924 if (DisableSampleProfileInlining)
931 auto CalleeFunctionName = Candidate.CalleeSamples->
getFunction();
932 auto R = SymbolMap.find(CalleeFunctionName);
933 if (R == SymbolMap.end() || !
R->second)
936 auto &CI = *Candidate.CallInstr;
940 const char *Reason =
"Callee function not available";
947 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
948 R->second->hasFnAttribute(
"use-sample-profile") &&
953 Function::getGUIDAssumingExternalLinkage(
R->second->getName()),
958 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
960 Sum -= Candidate.CallsiteCount;
973 Candidate.CallInstr = DI;
975 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
980 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
988 Candidate.CallInstr->
getName())<<
" because "
994bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
999 if (Callee ==
nullptr)
1008 if (
Cost.isAlways())
1014void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1015 const SmallVectorImpl<CallBase *> &Candidates,
const Function &
F,
1017 for (
auto *
I : Candidates) {
1018 Function *CalledFunction =
I->getCalledFunction();
1019 if (CalledFunction) {
1020 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1021 "InlineAttempt",
I->getDebugLoc(),
1023 <<
"previous inlining reattempted for "
1024 << (
Hot ?
"hotness: '" :
"size: '")
1025 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1031void SampleProfileLoader::findExternalInlineCandidate(
1032 CallBase *CB,
const FunctionSamples *Samples,
1033 DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
1037 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1041 InlinedGUIDs.
insert(Function::getGUIDAssumingExternalLinkage(
1067 ContextTrieNode *
Caller = ContextTracker->getContextNodeForProfile(Samples);
1068 std::queue<ContextTrieNode *> CalleeList;
1069 CalleeList.push(Caller);
1070 while (!CalleeList.empty()) {
1071 ContextTrieNode *
Node = CalleeList.front();
1073 FunctionSamples *CalleeSample =
Node->getFunctionSamples();
1089 if (!Func ||
Func->isDeclaration())
1095 for (
const auto &TS : BS.second.getCallTargets())
1096 if (TS.second > Threshold) {
1098 if (!Callee ||
Callee->isDeclaration())
1099 InlinedGUIDs.
insert(TS.first.getHashCode());
1106 for (
auto &Child :
Node->getAllChildContext()) {
1107 ContextTrieNode *CalleeNode = &Child.second;
1108 CalleeList.push(CalleeNode);
1135bool SampleProfileLoader::inlineHotFunctions(
1136 Function &
F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1139 assert((!ProfAccForSymsInList ||
1141 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1142 "ProfAccForSymsInList should be false when profile-sample-accurate "
1145 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1147 bool LocalChanged =
true;
1148 while (LocalChanged) {
1149 LocalChanged =
false;
1151 for (
auto &BB :
F) {
1155 for (
auto &
I : BB) {
1156 const FunctionSamples *
FS =
nullptr;
1159 if ((FS = findCalleeFunctionSamples(*CB))) {
1161 "GUIDToFuncNameMap has to be populated");
1163 if (
FS->getHeadSamplesEstimate() > 0 ||
1165 LocalNotInlinedCallSites.
insert({CB,
FS});
1168 else if (shouldInlineColdCallee(*CB))
1170 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1176 if (
Hot || ExternalInlineAdvisor) {
1178 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1181 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1184 for (CallBase *
I : CIS) {
1185 Function *CalledFunction =
I->getCalledFunction();
1186 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1190 if (CalledFunction == &
F)
1192 if (
I->isIndirectCall()) {
1194 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1195 uint64_t SumOrigin = Sum;
1196 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1197 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1198 PSI->getOrCompHotCountThreshold());
1204 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1205 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1206 LocalNotInlinedCallSites.
erase(
I);
1207 LocalChanged =
true;
1210 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1212 if (tryInlineCandidate(Candidate)) {
1213 LocalNotInlinedCallSites.
erase(
I);
1214 LocalChanged =
true;
1216 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1217 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1219 PSI->getOrCompHotCountThreshold());
1228 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1232bool SampleProfileLoader::tryInlineCandidate(
1236 if (DisableSampleProfileInlining)
1239 CallBase &CB = *Candidate.CallInstr;
1241 assert(CalledFunction &&
"Expect a callee with definition");
1245 InlineCost
Cost = shouldInlineCandidate(Candidate);
1246 if (
Cost.isNever()) {
1247 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1248 "InlineFail", DLoc, BB)
1249 <<
"incompatible inlining");
1256 InlineFunctionInfo IFI(GetAC);
1257 IFI.UpdateProfile =
false;
1260 if (!
IR.isSuccess())
1265 Cost,
true, getAnnotatedRemarkPassName());
1268 if (InlinedCallSites) {
1269 InlinedCallSites->
clear();
1274 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1284 if (Candidate.CallsiteDistribution < 1) {
1285 for (
auto &
I : IFI.InlinedCallSites) {
1288 Candidate.CallsiteDistribution);
1290 NumDuplicatedInlinesite++;
1296bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1298 assert(CB &&
"Expect non-null call instruction");
1304 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1307 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1311 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1312 Factor = Probe->Factor;
1314 uint64_t CallsiteCount =
1316 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1320std::optional<InlineCost>
1321SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1322 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1323 if (ExternalInlineAdvisor) {
1324 Advice = ExternalInlineAdvisor->getAdvice(CB);
1326 if (!Advice->isInliningRecommended()) {
1327 Advice->recordUnattemptedInlining();
1330 Advice->recordInlining();
1338bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1339 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1344SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1345 if (std::optional<InlineCost> ReplayCost =
1346 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1352 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1359 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1372 GetTTI(*Callee), GetAC, GetTLI);
1375 if (
Cost.isNever() ||
Cost.isAlways())
1409bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1410 Function &
F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1413 assert((!ProfAccForSymsInList ||
1415 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1416 "ProfAccForSymsInList should be false when profile-sample-accurate "
1421 CandidateQueue CQueue;
1422 InlineCandidate NewCandidate;
1423 for (
auto &BB :
F) {
1424 for (
auto &
I : BB) {
1428 if (getInlineCandidate(&NewCandidate, CB))
1429 CQueue.push(NewCandidate);
1438 "Max inline size limit should not be smaller than min inline size "
1443 if (ExternalInlineAdvisor)
1444 SizeLimit = std::numeric_limits<unsigned>::max();
1446 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1450 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1451 InlineCandidate Candidate = CQueue.top();
1453 CallBase *
I = Candidate.CallInstr;
1454 Function *CalledFunction =
I->getCalledFunction();
1456 if (CalledFunction == &
F)
1458 if (
I->isIndirectCall()) {
1460 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1461 uint64_t SumOrigin = Sum;
1462 Sum *= Candidate.CallsiteDistribution;
1463 unsigned ICPCount = 0;
1464 for (
const auto *FS : CalleeSamples) {
1466 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1467 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1468 PSI->getOrCompHotCountThreshold());
1471 uint64_t EntryCountDistributed =
1472 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1487 if (!PSI->isHotCount(EntryCountDistributed))
1492 Candidate = {
I,
FS, EntryCountDistributed,
1493 Candidate.CallsiteDistribution};
1494 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1495 &InlinedCallSites)) {
1496 for (
auto *CB : InlinedCallSites) {
1497 if (getInlineCandidate(&NewCandidate, CB))
1498 CQueue.emplace(NewCandidate);
1502 }
else if (!ContextTracker) {
1503 LocalNotInlinedCallSites.
insert({
I,
FS});
1506 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1509 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1510 for (
auto *CB : InlinedCallSites) {
1511 if (getInlineCandidate(&NewCandidate, CB))
1512 CQueue.emplace(NewCandidate);
1515 }
else if (!ContextTracker) {
1516 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1518 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1519 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1521 PSI->getOrCompHotCountThreshold());
1525 if (!CQueue.empty()) {
1527 ++NumCSInlinedHitMaxLimit;
1529 ++NumCSInlinedHitMinLimit;
1531 ++NumCSInlinedHitGrowthLimit;
1537 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1541void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1542 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
1543 const Function &
F) {
1545 for (
const auto &Pair : NonInlinedCallSites) {
1546 CallBase *
I = Pair.first;
1548 if (!Callee ||
Callee->isDeclaration())
1552 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
"NotInline",
1553 I->getDebugLoc(),
I->getParent())
1554 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1555 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1558 const FunctionSamples *
FS = Pair.second;
1559 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1573 if (
FS->getHeadSamples() == 0) {
1576 const_cast<FunctionSamples *
>(
FS)->addHeadSamples(
1577 FS->getHeadSamplesEstimate());
1582 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1586 OutlineFS = &OutlineFunctionSamples[
1588 OutlineFS->
merge(*FS, 1);
1594 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1595 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1606 InstrProfValueData{
I.first.getHashCode(),
I.second});
1613void SampleProfileLoader::generateMDProfMetadata(Function &
F) {
1616 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1617 LLVMContext &Ctx =
F.getContext();
1619 for (
auto &BI :
F) {
1622 if (BlockWeights[BB]) {
1623 for (
auto &
I : *BB) {
1630 const DILocation *DIL = DLoc;
1631 const FunctionSamples *
FS = findFunctionSamples(
I);
1635 ErrorOr<SampleRecord::CallTargetMap>
T =
1636 FS->findCallTargetMapAt(CallSite);
1637 if (!
T ||
T.get().empty())
1644 if (Probe->Factor < 1)
1651 for (
const auto &
C :
T.get())
1658 FS->findFunctionSamplesMapAt(CallSite)) {
1659 for (
const auto &NameFS : *M)
1660 Sum += NameFS.second.getHeadSamplesEstimate();
1666 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1669 I, ArrayRef<uint32_t>{
static_cast<uint32_t
>(BlockWeights[BB])},
1676 for (
auto &
I : *BB) {
1679 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1697 << ((BranchLoc) ? Twine(BranchLoc.
getLine())
1698 : Twine(
"<UNKNOWN LOCATION>"))
1700 SmallVector<uint32_t, 4> Weights;
1701 uint32_t MaxWeight = 0;
1706 DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1707 std::vector<uint64_t> EdgeIndex;
1712 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1713 EdgeMultiplicity[Succ]++;
1718 Edge E = std::make_pair(BB, Succ);
1719 uint64_t Weight = EdgeWeights[
E];
1724 if (Weight > std::numeric_limits<uint32_t>::max()) {
1726 Weight = std::numeric_limits<uint32_t>::max();
1731 Weights.
push_back(
static_cast<uint32_t
>(
1732 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1737 uint64_t
W = Weight / EdgeMultiplicity[Succ];
1739 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1741 Weights.
push_back(
static_cast<uint32_t
>(W));
1744 if (Weight > MaxWeight) {
1746 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();
1753 uint64_t TempWeight;
1762 if (MaxWeight > 0 &&
1767 return OptimizationRemark(
DEBUG_TYPE,
"PopularDest", MaxDestInst)
1768 <<
"most popular destination for conditional branches at "
1769 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1788bool SampleProfileLoader::emitAnnotations(Function &
F) {
1793 if (!ProbeManager->getDesc(
F))
1794 dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
1798 if (ProbeManager->profileIsValid(
F, *Samples)) {
1799 ++NumMatchedProfile;
1801 ++NumMismatchedProfile;
1803 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1804 <<
F.getName() <<
"\n");
1809 if (getFunctionLoc(
F) == 0)
1813 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1816 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1818 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1820 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1822 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1825 generateMDProfMetadata(
F);
1827 emitCoverageRemarks(
F);
1831std::unique_ptr<ProfiledCallGraph>
1832SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1833 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1835 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1837 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1842 for (Function &
F : M) {
1845 ProfiledCG->addProfiledFunction(
1852std::vector<Function *>
1853SampleProfileLoader::buildFunctionOrder(
Module &M, LazyCallGraph &CG) {
1854 std::vector<Function *> FunctionOrderList;
1855 FunctionOrderList.reserve(
M.size());
1858 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1859 "together with -sample-profile-top-down-load.\n";
1871 for (Function &
F : M)
1873 FunctionOrderList.push_back(&
F);
1874 return FunctionOrderList;
1927 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1928 scc_iterator<ProfiledCallGraph *> CGI =
scc_begin(ProfiledCG.get());
1933 scc_member_iterator<ProfiledCallGraph *>
SI(*CGI);
1936 for (
auto *Node :
Range) {
1939 FunctionOrderList.push_back(
F);
1943 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1948 dbgs() <<
"Function processing order:\n";
1949 for (
auto F : FunctionOrderList) {
1950 dbgs() <<
F->getName() <<
"\n";
1954 return FunctionOrderList;
1957bool SampleProfileLoader::doInitialization(
Module &M,
1959 auto &Ctx =
M.getContext();
1962 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1963 if (std::error_code EC = ReaderOrErr.getError()) {
1964 std::string Msg =
"Could not open profile: " +
EC.message();
1965 Ctx.
diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1968 Reader = std::move(ReaderOrErr.get());
1969 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1972 Reader->setModule(&M);
1973 if (std::error_code EC = Reader->read()) {
1974 std::string Msg =
"profile reading failed: " +
EC.message();
1975 Ctx.
diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1979 PSL = Reader->getProfileSymbolList();
1984 if (UseFlattenedProfile)
1986 Reader->profileIsCS());
1989 ProfAccForSymsInList =
1991 if (ProfAccForSymsInList) {
1992 NamesInProfile.
clear();
1993 GUIDsInProfile.
clear();
1994 if (
auto NameTable = Reader->getNameTable()) {
1996 for (
auto Name : *NameTable)
1999 for (
auto Name : *NameTable)
2003 CoverageTracker.setProfAccForSymsInList(
true);
2008 M, *
FAM, Ctx,
nullptr,
2013 false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2017 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2018 Reader->profileIsProbeBased()) {
2034 if (Reader->profileIsPreInlined()) {
2044 if (Reader->profileIsProbeBased()) {
2051 if (!Reader->profileIsCS()) {
2063 if (Reader->profileIsCS()) {
2065 ContextTracker = std::make_unique<SampleContextTracker>(
2066 Reader->getProfiles(), &GUIDToFuncNameMap);
2070 if (Reader->profileIsProbeBased()) {
2071 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2072 if (!ProbeManager->moduleIsProbed(M)) {
2074 "Pseudo-probe-based profile requires SampleProfileProbePass";
2075 Ctx.
diagnose(DiagnosticInfoSampleProfile(
M.getModuleIdentifier(), Msg,
2083 MatchingManager = std::make_unique<SampleProfileMatcher>(
2084 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2085 FuncNameToProfNameMap);
2101bool SampleProfileLoader::rejectHighStalenessProfile(
2102 Module &M, ProfileSummaryInfo *PSI,
const SampleProfileMap &Profiles) {
2104 "Only support for probe-based profile");
2105 uint64_t TotalHotFunc = 0;
2106 uint64_t NumMismatchedFunc = 0;
2107 for (
const auto &
I : Profiles) {
2108 const auto &
FS =
I.second;
2109 const auto *FuncDesc = ProbeManager->getDesc(
FS.getGUID());
2115 FS.getTotalSamples()))
2119 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2120 NumMismatchedFunc++;
2128 if (NumMismatchedFunc * 100 >=
2130 auto &Ctx =
M.getContext();
2132 "The input profile significantly mismatches current source code. "
2133 "Please recollect profile to avoid performance regression.";
2134 Ctx.
diagnose(DiagnosticInfoSampleProfile(
M.getModuleIdentifier(), Msg));
2140void SampleProfileLoader::removePseudoProbeInstsDiscriminator(
Module &M) {
2142 std::vector<Instruction *> InstsToDel;
2143 for (
auto &BB :
F) {
2144 for (
auto &
I : BB) {
2146 InstsToDel.push_back(&
I);
2148 if (
const DILocation *DIL =
I.getDebugLoc().get()) {
2152 std::optional<uint32_t> DwarfDiscriminator =
2161 for (
auto *
I : InstsToDel)
2162 I->eraseFromParent();
2167 ProfileSummaryInfo *_PSI) {
2168 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2171 if (
M.getProfileSummary(
false) ==
nullptr) {
2172 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2178 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2181 auto Remapper = Reader->getRemapper();
2183 for (
const auto &N_F :
M.getValueSymbolTable()) {
2184 StringRef OrigName = N_F.getKey();
2186 if (
F ==
nullptr || OrigName.
empty())
2188 SymbolMap[FunctionId(OrigName)] =
F;
2190 if (OrigName != NewName && !NewName.
empty()) {
2191 auto r = SymbolMap.emplace(FunctionId(NewName),
F);
2197 r.first->second =
nullptr;
2202 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2203 if (*MapName != OrigName && !MapName->empty())
2204 SymbolMap.emplace(FunctionId(*MapName),
F);
2212 MatchingManager->runOnModule();
2213 MatchingManager->clearMatchingData();
2215 assert(SymbolMap.count(FunctionId()) == 0 &&
2216 "No empty StringRef should be added in SymbolMap");
2218 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2221 bool retval =
false;
2222 for (
auto *
F : buildFunctionOrder(M, CG)) {
2224 clearFunctionData();
2230 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2236 removePseudoProbeInstsDiscriminator(M);
2238 M.eraseNamedMetadata(FuncInfo);
2244bool SampleProfileLoader::runOnFunction(Function &
F,
2246 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2247 DILocation2SampleMap.clear();
2252 uint64_t initialEntryCount = -1;
2258 initialEntryCount = 0;
2261 ProfAccForSymsInList =
false;
2263 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2269 if (ProfAccForSymsInList) {
2271 if (PSL->contains(
F.getName()))
2272 initialEntryCount = 0;
2286 GUIDsInProfile.
count(
2287 Function::getGUIDAssumingExternalLinkage(CanonName))) ||
2289 initialEntryCount = -1;
2294 if (!
F.getEntryCount())
2296 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2302 Samples = ContextTracker->getBaseSamplesFor(
F);
2304 Samples = Reader->getSamplesFor(
F);
2309 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2310 if (It != OutlineFunctionSamples.end()) {
2311 Samples = &It->second;
2312 }
else if (
auto Remapper = Reader->getRemapper()) {
2313 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2314 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2315 if (It != OutlineFunctionSamples.end())
2316 Samples = &It->second;
2322 if (Samples && !Samples->
empty())
2323 return emitAnnotations(
F);
2329 bool UseFlattenedProfile)
2330 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2331 LTOPhase(LTOPhase), FS(
std::
move(FS)),
2332 DisableSampleProfileInlining(DisableSampleProfileInlining),
2333 UseFlattenedProfile(UseFlattenedProfile) {}
2354 SampleProfileLoader SampleLoader(
2357 : ProfileRemappingFileName,
2358 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2359 DisableSampleProfileInlining, UseFlattenedProfile);
2360 if (!SampleLoader.doInitialization(M, &
FAM))
2364 if (!SampleLoader.runOnModule(M, AM, PSI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static LVReader * CurrentReader
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
bool empty() const
Returns true if the analysis manager has an empty results cache.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
LLVM_ABI unsigned getLine() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
void computeDominanceAndLoopInfo(FunctionT &F)
PostDominatorTreePtrT PDT
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
std::pair< typename Base::iterator, bool > insert(StringRef key)
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
static LLVM_ABI bool ProfileIsCS
FunctionId getFunction() const
Return the function name.
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
void setContextSynthetic()
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
Sample-based profile reader.
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
LLVM_ABI cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
cl::opt< bool > SampleProfileUseProfi
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
FunctionAddr VTableAddr Count
Function::ProfileCount ProfileCount
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
static bool skipProfileForFunction(const Function &F)
LLVM_ABI cl::opt< bool > SortProfiledSCC
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite"))
LLVM_ABI cl::opt< int > ProfileInlineLimitMax
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
constexpr const char * PseudoProbeDescMetadataName
Implement std::hash so that hash_code can be used in STL containers.
A wrapper of binary function with basic blocks and jumps.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)