91#include <system_error>
96using namespace sampleprof;
99#define DEBUG_TYPE "sample-profile"
100#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
103 "Number of functions inlined with context sensitive profile");
105 "Number of functions not inlined with context sensitive profile");
107 "Number of functions with CFG mismatched profile");
108STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
110 "Number of inlined callsites with a partial distribution factor");
113 "Number of functions with FDO inline stopped due to min size limit");
115 "Number of functions with FDO inline stopped due to max size limit");
117 NumCSInlinedHitGrowthLimit,
118 "Number of functions with FDO inline stopped due to growth size limit");
135 cl::desc(
"Compute and report stale profile statistical metrics."));
139 cl::desc(
"Compute stale profile statistical metrics and write it into the "
140 "native object file(.llvm_stats section)."));
144 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
145 "callsite and function as having 0 samples. Otherwise, treat "
146 "un-sampled callsites and functions conservatively as unknown. "));
150 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
151 "branches and calls as having 0 samples. Otherwise, treat "
152 "them conservatively as unknown. "));
156 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
157 "be accurate. It may be overriden by profile-sample-accurate. "));
161 cl::desc(
"Merge past inlinee's profile to outline version if sample "
162 "profile loader decided not to inline a call site. It will "
163 "only be enabled when top-down order of profile loading is "
168 cl::desc(
"Do profile annotation and inlining for functions in top-down "
169 "order of call graph during sample profile loading. It only "
170 "works for new pass manager. "));
174 cl::desc(
"Process functions in a top-down order "
175 "defined by the profiled call graph when "
176 "-sample-profile-top-down-load is on."));
180 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
188 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
189 "pass, and merge (or scale) profiles (as configured by "
190 "--sample-profile-merge-inlinee)."));
195 cl::desc(
"Sort profiled recursion by edge weights."));
199 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
200 "loader inlining."));
204 cl::desc(
"The lower bound of size growth limit for "
205 "proirity-based sample profile loader inlining."));
209 cl::desc(
"The upper bound of size growth limit for "
210 "proirity-based sample profile loader inlining."));
214 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
219 cl::desc(
"Threshold for inlining cold callsites"));
225 "Relative hotness percentage threshold for indirect "
226 "call promotion in proirity-based sample profile loader inlining."));
231 "Skip relative hotness check for ICP up to given number of targets."));
234 "sample-profile-prioritized-inline",
cl::Hidden,
236 cl::desc(
"Use call site prioritized inlining for sample profile loader."
237 "Currently only CSSPGO is supported."));
242 cl::desc(
"Use the preinliner decisions stored in profile context."));
245 "sample-profile-recursive-inline",
cl::Hidden,
247 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
252 "Optimization remarks file containing inline remarks to be replayed "
253 "by inlining from sample profile loader."),
257 "sample-profile-inline-replay-scope",
258 cl::init(ReplayInlinerSettings::Scope::Function),
260 "Replay on functions that have remarks associated "
261 "with them (default)"),
262 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
263 "Replay on the entire module")),
264 cl::desc(
"Whether inline replay should be applied to the entire "
265 "Module or just the Functions (default) that are present as "
266 "callers in remarks during sample profile inlining."),
270 "sample-profile-inline-replay-fallback",
271 cl::init(ReplayInlinerSettings::Fallback::Original),
274 ReplayInlinerSettings::Fallback::Original,
"Original",
275 "All decisions not in replay send to original advisor (default)"),
276 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
277 "AlwaysInline",
"All decisions not in replay are inlined"),
278 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
279 "All decisions not in replay are not inlined")),
280 cl::desc(
"How sample profile inline replay treats sites that don't come "
281 "from the replay. Original: defers to original advisor, "
282 "AlwaysInline: inline all sites not in replay, NeverInline: "
283 "inline no sites not in replay"),
287 "sample-profile-inline-replay-format",
288 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
290 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
291 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
292 "<Line Number>:<Column Number>"),
293 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
294 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
295 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
296 "LineColumnDiscriminator",
297 "<Line Number>:<Column Number>.<Discriminator> (default)")),
302 cl::desc(
"Max number of promotions for a single indirect "
303 "call callsite in sample profile loader"));
307 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
311 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
312 "sample-profile inline pass name."));
322using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
327class GUIDToFuncNameMapper {
332 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
336 for (
const auto &
F : CurrentModule) {
338 CurrentGUIDToFuncNameMap.insert(
339 {Function::getGUID(OrigName), OrigName});
349 if (CanonName != OrigName)
350 CurrentGUIDToFuncNameMap.insert(
351 {Function::getGUID(CanonName), CanonName});
355 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
358 ~GUIDToFuncNameMapper() {
362 CurrentGUIDToFuncNameMap.clear();
366 SetGUIDToFuncNameMapForAll(
nullptr);
371 std::queue<FunctionSamples *> FSToUpdate;
373 FSToUpdate.push(&IFS.second);
376 while (!FSToUpdate.empty()) {
379 FS->GUIDToFuncNameMap = Map;
380 for (
const auto &ICS : FS->getCallsiteSamples()) {
382 for (
const auto &IFS : FSMap) {
384 FSToUpdate.push(&FS);
396struct InlineCandidate {
406 float CallsiteDistribution;
410struct CandidateComparer {
411 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
412 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
413 return LHS.CallsiteCount <
RHS.CallsiteCount;
417 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
428using CandidateQueue =
433class SampleProfileMatcher {
439 uint64_t TotalProfiledCallsites = 0;
440 uint64_t NumMismatchedCallsites = 0;
441 uint64_t MismatchedCallsiteSamples = 0;
445 uint64_t MismatchedFuncHashSamples = 0;
451 :
M(
M), Reader(Reader), ProbeManager(ProbeManager) {}
452 void detectProfileMismatch();
461class SampleProfileLoader final
472 GetAC(
std::
move(GetAssumptionCache)),
473 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
492 std::vector<const FunctionSamples *>
499 bool tryPromoteAndInlineCandidate(
505 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
506 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
507 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
508 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
510 tryInlineCandidate(InlineCandidate &Candidate,
513 inlineHotFunctionsWithPriority(
Function &
F,
517 void emitOptimizationRemarksForInlineCandidates(
520 void promoteMergeNotInlinedContextSamples(
524 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
525 void generateMDProfMetadata(
Function &
F);
538 std::unique_ptr<SampleContextTracker> ContextTracker;
546 const std::string AnnotatedPassName;
550 std::unique_ptr<ProfileSymbolList> PSL;
561 struct NotInlinedProfileInfo {
579 bool ProfAccForSymsInList;
582 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
585 std::unique_ptr<PseudoProbeManager> ProbeManager;
588 std::unique_ptr<SampleProfileMatcher> MatchingManager;
591 const char *getAnnotatedRemarkPassName()
const {
592 return AnnotatedPassName.c_str();
599 return getProbeWeight(Inst);
603 return std::error_code();
608 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
609 return std::error_code();
618 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
619 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
622 return getInstWeightImpl(Inst);
630 "Profile is not pseudo probe based");
635 return std::error_code();
657 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
658 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
663 uint64_t Samples =
R.get() * Probe->Factor;
664 bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
669 Remark <<
" samples from profile (ProbeId=";
673 Remark <<
", OriginalSamples=";
680 <<
" - weight: " <<
R.get() <<
" - factor: "
681 <<
format(
"%0.2f", Probe->Factor) <<
")\n");
700SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
708 CalleeName =
Callee->getName();
711 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
718 CalleeName, Reader->getRemapper());
724std::vector<const FunctionSamples *>
725SampleProfileLoader::findIndirectCallFunctionSamples(
728 std::vector<const FunctionSamples *>
R;
735 assert(L && R &&
"Expect non-null FunctionSamples");
736 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
737 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
744 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
745 if (CalleeSamples.empty())
751 for (
const auto *
const FS : CalleeSamples) {
752 Sum +=
FS->getHeadSamplesEstimate();
764 auto T =
FS->findCallTargetMapAt(CallSite);
767 for (
const auto &T_C :
T.get())
772 for (
const auto &NameFS : *M) {
773 Sum += NameFS.second.getHeadSamplesEstimate();
774 R.push_back(&NameFS.second);
782SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
793 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
796 it.first->second = ContextTracker->getContextSamplesFor(DIL);
799 Samples->findFunctionSamples(DIL, Reader->getRemapper());
801 return it.first->second;
814 std::unique_ptr<InstrProfValueData[]> ValueData =
818 ValueData.get(), NumVals, TotalCount,
true);
824 unsigned NumPromoted = 0;
832 if (ValueData[
I].
Value == Function::getGUID(Candidate))
861 std::unique_ptr<InstrProfValueData[]> ValueData =
865 ValueData.get(), NumVals, OldSum,
true);
871 "If sum is 0, assume only one element in CallTargets "
872 "with count being NOMORE_ICP_MAGICNUM");
876 ValueCountMap[ValueData[
I].
Value] = ValueData[
I].Count;
883 OldSum -= Pair.first->second;
893 ValueCountMap[ValueData[
I].Value] = ValueData[
I].Count;
897 for (
const auto &
Data : CallTargets) {
904 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
910 for (
const auto &ValueCount : ValueCountMap) {
912 InstrProfValueData{ValueCount.first, ValueCount.second});
916 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
917 if (L.Count != R.Count)
918 return L.Count > R.Count;
919 return L.Value > R.Value;
925 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
938bool SampleProfileLoader::tryPromoteAndInlineCandidate(
949 auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
954 auto &CI = *Candidate.CallInstr;
958 const char *Reason =
"Callee function not available";
965 if (!
R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
966 R->getValue()->hasFnAttribute(
"use-sample-profile") &&
975 CI,
R->getValue(), Candidate.CallsiteCount, Sum,
false, ORE);
977 Sum -= Candidate.CallsiteCount;
990 Candidate.CallInstr = DI;
991 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
992 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
997 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
1004 << Candidate.CalleeSamples->getFuncName() <<
" because "
1024 if (
Cost.isAlways())
1030void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1033 for (
auto *
I : Candidates) {
1034 Function *CalledFunction =
I->getCalledFunction();
1035 if (CalledFunction) {
1037 "InlineAttempt",
I->getDebugLoc(),
1039 <<
"previous inlining reattempted for "
1040 << (Hot ?
"hotness: '" :
"size: '")
1041 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1047void SampleProfileLoader::findExternalInlineCandidate(
1054 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1066 assert(Samples &&
"expect non-null caller profile");
1076 std::queue<ContextTrieNode *> CalleeList;
1077 CalleeList.push(Caller);
1078 while (!CalleeList.empty()) {
1098 if (!Func ||
Func->isDeclaration())
1104 for (
const auto &TS : BS.second.getCallTargets())
1105 if (TS.getValue() > Threshold) {
1116 for (
auto &Child :
Node->getAllChildContext()) {
1118 CalleeList.push(CalleeNode);
1145bool SampleProfileLoader::inlineHotFunctions(
1149 assert((!ProfAccForSymsInList ||
1151 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1152 "ProfAccForSymsInList should be false when profile-sample-accurate "
1156 bool Changed =
false;
1157 bool LocalChanged =
true;
1158 while (LocalChanged) {
1159 LocalChanged =
false;
1161 for (
auto &BB :
F) {
1165 for (
auto &
I : BB) {
1167 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1168 if (!isa<IntrinsicInst>(
I)) {
1169 if ((FS = findCalleeFunctionSamples(*CB))) {
1171 "GUIDToFuncNameMap has to be populated");
1173 if (
FS->getHeadSamplesEstimate() > 0 ||
1175 LocalNotInlinedCallSites.
insert({CB,
FS});
1178 else if (shouldInlineColdCallee(*CB))
1180 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1186 if (Hot || ExternalInlineAdvisor) {
1188 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1191 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1195 Function *CalledFunction =
I->getCalledFunction();
1196 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1200 if (CalledFunction == &
F)
1202 if (
I->isIndirectCall()) {
1204 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1206 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1207 findExternalInlineCandidate(
I, FS, InlinedGUIDs, SymbolMap,
1208 PSI->getOrCompHotCountThreshold());
1214 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1215 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1216 LocalNotInlinedCallSites.
erase(
I);
1217 LocalChanged =
true;
1220 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1222 if (tryInlineCandidate(Candidate)) {
1223 LocalNotInlinedCallSites.
erase(
I);
1224 LocalChanged =
true;
1226 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1227 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1228 InlinedGUIDs, SymbolMap,
1229 PSI->getOrCompHotCountThreshold());
1232 Changed |= LocalChanged;
1238 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1242bool SampleProfileLoader::tryInlineCandidate(
1249 CallBase &CB = *Candidate.CallInstr;
1251 assert(CalledFunction &&
"Expect a callee with definition");
1256 if (
Cost.isNever()) {
1258 "InlineFail", DLoc, BB)
1259 <<
"incompatible inlining");
1267 IFI.UpdateProfile =
false;
1270 if (!
IR.isSuccess())
1275 Cost,
true, getAnnotatedRemarkPassName());
1278 if (InlinedCallSites) {
1279 InlinedCallSites->
clear();
1280 for (
auto &
I : IFI.InlinedCallSites)
1285 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1295 if (Candidate.CallsiteDistribution < 1) {
1296 for (
auto &
I : IFI.InlinedCallSites) {
1299 Candidate.CallsiteDistribution);
1301 NumDuplicatedInlinesite++;
1307bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1309 assert(CB &&
"Expect non-null call instruction");
1311 if (isa<IntrinsicInst>(CB))
1315 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1318 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1322 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1323 Factor = Probe->Factor;
1327 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1331std::optional<InlineCost>
1332SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1333 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1334 if (ExternalInlineAdvisor) {
1335 Advice = ExternalInlineAdvisor->getAdvice(CB);
1337 if (!Advice->isInliningRecommended()) {
1338 Advice->recordUnattemptedInlining();
1341 Advice->recordInlining();
1349bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1350 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1355SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1356 if (std::optional<InlineCost> ReplayCost =
1357 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1363 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1370 assert(
Callee &&
"Expect a definition for inline candidate of direct call");
1383 GetTTI(*
Callee), GetAC, GetTLI);
1386 if (
Cost.isNever() ||
Cost.isAlways())
1419bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1423 assert((!ProfAccForSymsInList ||
1425 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1426 "ProfAccForSymsInList should be false when profile-sample-accurate "
1431 CandidateQueue CQueue;
1432 InlineCandidate NewCandidate;
1433 for (
auto &BB :
F) {
1434 for (
auto &
I : BB) {
1435 auto *CB = dyn_cast<CallBase>(&
I);
1438 if (getInlineCandidate(&NewCandidate, CB))
1439 CQueue.push(NewCandidate);
1448 "Max inline size limit should not be smaller than min inline size "
1453 if (ExternalInlineAdvisor)
1454 SizeLimit = std::numeric_limits<unsigned>::max();
1459 bool Changed =
false;
1460 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1461 InlineCandidate Candidate = CQueue.top();
1464 Function *CalledFunction =
I->getCalledFunction();
1466 if (CalledFunction == &
F)
1468 if (
I->isIndirectCall()) {
1470 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1472 Sum *= Candidate.CallsiteDistribution;
1473 unsigned ICPCount = 0;
1474 for (
const auto *FS : CalleeSamples) {
1476 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1477 findExternalInlineCandidate(
I, FS, InlinedGUIDs, SymbolMap,
1478 PSI->getOrCompHotCountThreshold());
1482 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1497 if (!PSI->isHotCount(EntryCountDistributed))
1502 Candidate = {
I,
FS, EntryCountDistributed,
1503 Candidate.CallsiteDistribution};
1504 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1505 &InlinedCallSites)) {
1506 for (
auto *CB : InlinedCallSites) {
1507 if (getInlineCandidate(&NewCandidate, CB))
1508 CQueue.emplace(NewCandidate);
1512 }
else if (!ContextTracker) {
1513 LocalNotInlinedCallSites.
insert({
I,
FS});
1516 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1519 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1520 for (
auto *CB : InlinedCallSites) {
1521 if (getInlineCandidate(&NewCandidate, CB))
1522 CQueue.emplace(NewCandidate);
1525 }
else if (!ContextTracker) {
1526 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1528 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1529 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1530 InlinedGUIDs, SymbolMap,
1531 PSI->getOrCompHotCountThreshold());
1535 if (!CQueue.empty()) {
1537 ++NumCSInlinedHitMaxLimit;
1539 ++NumCSInlinedHitMinLimit;
1541 ++NumCSInlinedHitGrowthLimit;
1547 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1551void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1555 for (
const auto &Pair : NonInlinedCallSites) {
1563 I->getDebugLoc(),
I->getParent())
1564 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee",
Callee)
1565 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1569 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1583 if (
FS->getHeadSamples() == 0) {
1587 FS->getHeadSamplesEstimate());
1593 OutlineFS->
merge(*FS, 1);
1599 notInlinedCallInfo.try_emplace(
Callee, NotInlinedProfileInfo{0});
1600 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1618void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1621 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1624 for (
auto &BI :
F) {
1627 if (BlockWeights[BB]) {
1628 for (
auto &
I : *BB) {
1629 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1640 auto T =
FS->findCallTargetMapAt(CallSite);
1641 if (!
T ||
T.get().empty())
1648 if (Probe->Factor < 1)
1655 for (
const auto &
C :
T.get())
1662 FS->findFunctionSamplesMapAt(CallSite)) {
1663 for (
const auto &NameFS : *M)
1664 Sum += NameFS.second.getHeadSamplesEstimate();
1670 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1671 }
else if (!isa<IntrinsicInst>(&
I)) {
1672 I.setMetadata(LLVMContext::MD_prof,
1673 MDB.createBranchWeights(
1674 {static_cast<uint32_t>(BlockWeights[BB])}));
1680 for (
auto &
I : *BB) {
1681 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1683 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1685 I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1693 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1694 !isa<IndirectBrInst>(TI))
1700 :
Twine(
"<UNKNOWN LOCATION>"))
1709 std::vector<uint64_t> EdgeIndex;
1714 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1715 EdgeMultiplicity[Succ]++;
1720 Edge
E = std::make_pair(BB, Succ);
1726 if (Weight > std::numeric_limits<uint32_t>::max()) {
1728 Weight = std::numeric_limits<uint32_t>::max();
1737 uint64_t W = Weight / EdgeMultiplicity[Succ];
1739 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1744 if (Weight > MaxWeight) {
1746 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1762 if (MaxWeight > 0 &&
1765 TI->
setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1768 <<
"most popular destination for conditional branches at "
1769 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1788bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1789 bool Changed =
false;
1792 if (!ProbeManager->profileIsValid(
F, *Samples)) {
1794 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1796 ++NumMismatchedProfile;
1799 ++NumMatchedProfile;
1801 if (getFunctionLoc(
F) == 0)
1805 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1810 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1812 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1814 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1817 generateMDProfMetadata(
F);
1819 emitCoverageRemarks(
F);
1823std::unique_ptr<ProfiledCallGraph>
1824SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1825 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1827 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1829 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1835 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
1843std::vector<Function *>
1845 std::vector<Function *> FunctionOrderList;
1846 FunctionOrderList.reserve(
M.size());
1849 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1850 "together with -sample-profile-top-down-load.\n";
1863 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1864 FunctionOrderList.push_back(&
F);
1865 return FunctionOrderList;
1918 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1927 for (
auto *
Node : Range) {
1929 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1930 FunctionOrderList.push_back(
F);
1940 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1941 FunctionOrderList.push_back(&
F);
1947 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1950 dbgs() <<
"Function processing order:\n";
1951 for (
auto F : FunctionOrderList) {
1952 dbgs() <<
F->getName() <<
"\n";
1956 return FunctionOrderList;
1959bool SampleProfileLoader::doInitialization(
Module &M,
1961 auto &Ctx =
M.getContext();
1964 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1965 if (std::error_code EC = ReaderOrErr.getError()) {
1966 std::string
Msg =
"Could not open profile: " +
EC.message();
1970 Reader = std::move(ReaderOrErr.get());
1971 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1974 Reader->setModule(&M);
1975 if (std::error_code EC = Reader->read()) {
1976 std::string
Msg =
"profile reading failed: " +
EC.message();
1981 PSL = Reader->getProfileSymbolList();
1984 ProfAccForSymsInList =
1986 if (ProfAccForSymsInList) {
1987 NamesInProfile.clear();
1988 if (
auto NameTable = Reader->getNameTable())
1989 NamesInProfile.insert(NameTable->begin(), NameTable->end());
1990 CoverageTracker.setProfAccForSymsInList(
true);
1995 M, *
FAM, Ctx,
nullptr,
2000 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2004 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2005 Reader->profileIsProbeBased()) {
2021 if (Reader->profileIsPreInlined()) {
2026 if (!Reader->profileIsCS()) {
2038 if (Reader->profileIsCS()) {
2040 ContextTracker = std::make_unique<SampleContextTracker>(
2041 Reader->getProfiles(), &GUIDToFuncNameMap);
2045 if (Reader->profileIsProbeBased()) {
2046 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2047 if (!ProbeManager->moduleIsProbed(M)) {
2049 "Pseudo-probe-based profile requires SampleProfileProbePass";
2058 std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
2064void SampleProfileMatcher::detectProfileMismatch(
const Function &
F,
2068 TotalFuncHashSamples += Count;
2069 TotalProfiledFunc++;
2070 if (!ProbeManager->profileIsValid(
F, FS)) {
2071 MismatchedFuncHashSamples += Count;
2072 NumMismatchedFuncHash++;
2077 std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
2081 for (
auto &BB :
F) {
2082 for (
auto &
I : BB) {
2083 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
2086 const auto *CB = dyn_cast<CallBase>(&
I);
2087 if (
auto &DLoc =
I.getDebugLoc()) {
2094 const auto CTM =
FS.findCallTargetMapAt(IRCallsite);
2095 const auto CallsiteFS =
FS.findFunctionSamplesMapAt(IRCallsite);
2098 if (CalleeName.
empty()) {
2103 if ((CTM && !CTM->empty()) || (CallsiteFS && !CallsiteFS->empty()))
2104 MatchedCallsiteLocs.insert(IRCallsite);
2107 if ((CTM && CTM->count(CalleeName)) ||
2108 (CallsiteFS && CallsiteFS->count(CalleeName)))
2109 MatchedCallsiteLocs.insert(IRCallsite);
2115 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
2116 return LineOffset & 0x8000;
2121 for (
auto &
I :
FS.getBodySamples()) {
2127 if (!
I.second.getCallTargets().empty()) {
2128 TotalCallsiteSamples += Count;
2129 TotalProfiledCallsites++;
2130 if (!MatchedCallsiteLocs.count(Loc)) {
2131 MismatchedCallsiteSamples += Count;
2132 NumMismatchedCallsites++;
2137 for (
auto &
I :
FS.getCallsiteSamples()) {
2143 for (
auto &FM :
I.second) {
2144 Count += FM.second.getHeadSamplesEstimate();
2146 TotalCallsiteSamples += Count;
2147 TotalProfiledCallsites++;
2148 if (!MatchedCallsiteLocs.count(Loc)) {
2149 MismatchedCallsiteSamples += Count;
2150 NumMismatchedCallsites++;
2155void SampleProfileMatcher::detectProfileMismatch() {
2157 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
2162 detectProfileMismatch(
F, *FS);
2167 errs() <<
"(" << NumMismatchedFuncHash <<
"/" << TotalProfiledFunc <<
")"
2168 <<
" of functions' profile are invalid and "
2169 <<
" (" << MismatchedFuncHashSamples <<
"/" << TotalFuncHashSamples
2171 <<
" of samples are discarded due to function hash mismatch.\n";
2173 errs() <<
"(" << NumMismatchedCallsites <<
"/" << TotalProfiledCallsites
2175 <<
" of callsites' profile are invalid and "
2176 <<
"(" << MismatchedCallsiteSamples <<
"/" << TotalCallsiteSamples
2178 <<
" of samples are discarded due to callsite location mismatch.\n";
2187 ProfStatsVec.
emplace_back(
"NumMismatchedFuncHash", NumMismatchedFuncHash);
2188 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
2190 MismatchedFuncHashSamples);
2191 ProfStatsVec.
emplace_back(
"TotalFuncHashSamples", TotalFuncHashSamples);
2194 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
2195 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
2197 MismatchedCallsiteSamples);
2198 ProfStatsVec.
emplace_back(
"TotalCallsiteSamples", TotalCallsiteSamples);
2200 auto *MD = MDB.createLLVMStats(ProfStatsVec);
2201 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
2202 NMD->addOperand(MD);
2209 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2212 if (
M.getProfileSummary(
false) ==
nullptr) {
2213 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2218 for (
const auto &
I : Reader->getProfiles())
2219 TotalCollectedSamples +=
I.second.getTotalSamples();
2221 auto Remapper = Reader->getRemapper();
2223 for (
const auto &N_F :
M.getValueSymbolTable()) {
2225 Function *
F = dyn_cast<Function>(N_F.getValue());
2226 if (
F ==
nullptr || OrigName.
empty())
2230 if (OrigName != NewName && !NewName.
empty()) {
2237 r.first->second =
nullptr;
2242 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2243 if (*MapName != OrigName && !MapName->empty())
2249 "No empty StringRef should be added in SymbolMap");
2252 MatchingManager->detectProfileMismatch();
2254 bool retval =
false;
2255 for (
auto *
F : buildFunctionOrder(M, CG)) {
2257 clearFunctionData();
2263 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2271 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2272 DILocation2SampleMap.clear();
2283 initialEntryCount = 0;
2286 ProfAccForSymsInList =
false;
2288 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2294 if (ProfAccForSymsInList) {
2296 if (PSL->contains(
F.getName()))
2297 initialEntryCount = 0;
2310 if (NamesInProfile.count(CanonName))
2311 initialEntryCount = -1;
2316 if (!
F.getEntryCount())
2318 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2325 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2326 ORE = OwnedORE.get();
2330 Samples = ContextTracker->getBaseSamplesFor(
F);
2332 Samples = Reader->getSamplesFor(
F);
2334 if (Samples && !Samples->
empty())
2335 return emitAnnotations(
F);
2341 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2342 LTOPhase(LTOPhase), FS(
std::
move(FS)) {}
2362 SampleProfileLoader SampleLoader(
2365 : ProfileRemappingFileName,
2366 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
2368 if (!SampleLoader.doInitialization(M, &
FAM))
2373 if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
This file defines the StringMap class.
amdgpu Simplify well known AMD library false FunctionCallee Callee
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Implements a lazy call graph analysis and related passes for the new pass manager.
Statically lint checks LLVM IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file defines the PriorityQueue class.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
static cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
const BasicBlock * getParent() const
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
Representation of the samples collected for a function.
static uint64_t getGUID(StringRef Name)
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
StringRef getFuncName() const
Return the original function name.
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
void SetContextSynthetic()
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
StringRef getName() const
Return the function name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
bool hasAttribute(ContextAttributeMask A)
Sample-based profile reader.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
@ ContextDuplicatedIntoBase
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void setProbeDistributionFactor(Instruction &Inst, float Factor)
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Used in the streaming interface as the general argument type.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Represents the relative location of an instruction.