Go to the documentation of this file.
88 #include <system_error>
93 using namespace sampleprof;
96 #define DEBUG_TYPE "sample-profile"
97 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
100 "Number of functions inlined with context sensitive profile");
102 "Number of functions not inlined with context sensitive profile");
104 "Number of functions with CFG mismatched profile");
105 STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
107 "Number of inlined callsites with a partial distribution factor");
110 "Number of functions with FDO inline stopped due to min size limit");
112 "Number of functions with FDO inline stopped due to max size limit");
114 NumCSInlinedHitGrowthLimit,
115 "Number of functions with FDO inline stopped due to growth size limit");
132 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
133 "callsite and function as having 0 samples. Otherwise, treat "
134 "un-sampled callsites and functions conservatively as unknown. "));
138 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
139 "branches and calls as having 0 samples. Otherwise, treat "
140 "them conservatively as unknown. "));
144 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
145 "be accurate. It may be overriden by profile-sample-accurate. "));
149 cl::desc(
"Merge past inlinee's profile to outline version if sample "
150 "profile loader decided not to inline a call site. It will "
151 "only be enabled when top-down order of profile loading is "
156 cl::desc(
"Do profile annotation and inlining for functions in top-down "
157 "order of call graph during sample profile loading. It only "
158 "works for new pass manager. "));
162 cl::desc(
"Process functions in a top-down order "
163 "defined by the profiled call graph when "
164 "-sample-profile-top-down-load is on."));
167 cl::desc(
"Sort profiled recursion by edge weights."));
171 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
179 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
180 "pass, and merge (or scale) profiles (as configured by "
181 "--sample-profile-merge-inlinee)."));
185 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
186 "loader inlining."));
190 cl::desc(
"The lower bound of size growth limit for "
191 "proirity-based sample profile loader inlining."));
195 cl::desc(
"The upper bound of size growth limit for "
196 "proirity-based sample profile loader inlining."));
200 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
205 cl::desc(
"Threshold for inlining cold callsites"));
210 "Relative hotness percentage threshold for indirect "
211 "call promotion in proirity-based sample profile loader inlining."));
216 "Skip relative hotness check for ICP up to given number of targets."));
219 "sample-profile-prioritized-inline",
cl::Hidden,
221 cl::desc(
"Use call site prioritized inlining for sample profile loader."
222 "Currently only CSSPGO is supported."));
227 cl::desc(
"Use the preinliner decisions stored in profile context."));
230 "sample-profile-recursive-inline",
cl::Hidden,
232 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
237 "Optimization remarks file containing inline remarks to be replayed "
238 "by inlining from sample profile loader."),
242 "sample-profile-inline-replay-scope",
245 "Replay on functions that have remarks associated "
246 "with them (default)"),
248 "Replay on the entire module")),
249 cl::desc(
"Whether inline replay should be applied to the entire "
250 "Module or just the Functions (default) that are present as "
251 "callers in remarks during sample profile inlining."),
255 "sample-profile-inline-replay-fallback",
260 "All decisions not in replay send to original advisor (default)"),
262 "AlwaysInline",
"All decisions not in replay are inlined"),
264 "All decisions not in replay are not inlined")),
265 cl::desc(
"How sample profile inline replay treats sites that don't come "
266 "from the replay. Original: defers to original advisor, "
267 "AlwaysInline: inline all sites not in replay, NeverInline: "
268 "inline no sites not in replay"),
272 "sample-profile-inline-replay-format",
277 "<Line Number>:<Column Number>"),
279 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
281 "LineColumnDiscriminator",
282 "<Line Number>:<Column Number>.<Discriminator> (default)")),
287 cl::desc(
"Max number of promotions for a single indirect "
288 "call callsite in sample profile loader"));
292 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
296 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
297 "sample-profile inline pass name."));
305 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
310 class GUIDToFuncNameMapper {
314 : CurrentReader(Reader), CurrentModule(
M),
315 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
316 if (!CurrentReader.useMD5())
319 for (
const auto &
F : CurrentModule) {
321 CurrentGUIDToFuncNameMap.insert(
332 if (CanonName != OrigName)
333 CurrentGUIDToFuncNameMap.insert(
338 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
341 ~GUIDToFuncNameMapper() {
342 if (!CurrentReader.useMD5())
345 CurrentGUIDToFuncNameMap.clear();
349 SetGUIDToFuncNameMapForAll(
nullptr);
354 std::queue<FunctionSamples *> FSToUpdate;
355 for (
auto &IFS : CurrentReader.getProfiles()) {
356 FSToUpdate.push(&IFS.second);
359 while (!FSToUpdate.empty()) {
362 FS->GUIDToFuncNameMap =
Map;
363 for (
const auto &ICS :
FS->getCallsiteSamples()) {
365 for (
auto &IFS : FSMap) {
367 FSToUpdate.push(&
FS);
379 struct InlineCandidate {
389 float CallsiteDistribution;
393 struct CandidateComparer {
394 bool operator()(
const InlineCandidate &
LHS,
const InlineCandidate &
RHS) {
395 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
396 return LHS.CallsiteCount <
RHS.CallsiteCount;
400 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
411 using CandidateQueue =
420 class SampleProfileLoader final
429 GetAC(
std::
move(GetAssumptionCache)),
430 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
448 findFunctionSamples(
const Instruction &
I)
const override;
449 std::vector<const FunctionSamples *>
456 bool tryPromoteAndInlineCandidate(
463 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
464 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
465 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
467 tryInlineCandidate(InlineCandidate &Candidate,
470 inlineHotFunctionsWithPriority(
Function &
F,
474 void emitOptimizationRemarksForInlineCandidates(
477 void promoteMergeNotInlinedContextSamples(
481 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
CallGraph &CG);
482 void generateMDProfMetadata(
Function &
F);
495 std::unique_ptr<SampleContextTracker> ContextTracker;
503 const std::string AnnotatedPassName;
507 std::unique_ptr<ProfileSymbolList> PSL;
518 struct NotInlinedProfileInfo {
536 bool ProfAccForSymsInList;
539 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
542 std::unique_ptr<PseudoProbeManager> ProbeManager;
545 const char *getAnnotatedRemarkPassName()
const {
546 return AnnotatedPassName.c_str();
550 class SampleProfileLoaderLegacyPass :
public ModulePass {
555 SampleProfileLoaderLegacyPass(
561 return ACT->getAssumptionCache(
F);
564 return TTIWP->getTTI(
F);
567 return TLIWP->getTLI(
F);
573 void dump() { SampleLoader.dump(); }
575 bool doInitialization(
Module &M)
override {
576 return SampleLoader.doInitialization(M);
579 StringRef getPassName()
const override {
return "Sample profile pass"; }
580 bool runOnModule(
Module &M)
override;
590 SampleProfileLoader SampleLoader;
600 return getProbeWeight(Inst);
604 return std::error_code();
609 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
610 return std::error_code();
619 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
620 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
623 return getInstWeightImpl(Inst);
631 "Profile is not pseudo probe based");
636 return std::error_code();
658 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
659 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
665 bool FirstMark = CoverageTracker.markSamplesUsed(
FS, Probe->
Id, 0, Samples);
670 Remark <<
" samples from profile (ProbeId=";
674 Remark <<
", OriginalSamples=";
681 <<
" - weight: " <<
R.get() <<
" - factor: "
701 SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
709 CalleeName =
Callee->getName();
712 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
725 std::vector<const FunctionSamples *>
726 SampleProfileLoader::findIndirectCallFunctionSamples(
729 std::vector<const FunctionSamples *>
R;
736 assert(L && R &&
"Expect non-null FunctionSamples");
745 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
746 if (CalleeSamples.empty())
752 for (
const auto *
const FS : CalleeSamples) {
753 Sum +=
FS->getEntrySamples();
765 auto T =
FS->findCallTargetMapAt(CallSite);
768 for (
const auto &T_C :
T.get())
773 for (
const auto &NameFS : *M) {
774 Sum += NameFS.second.getEntrySamples();
775 R.push_back(&NameFS.second);
783 SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
794 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
797 it.first->second = ContextTracker->getContextSamplesFor(DIL);
800 Samples->findFunctionSamples(DIL, Reader->
getRemapper());
802 return it.first->second;
815 std::unique_ptr<InstrProfValueData[]> ValueData =
819 ValueData.get(), NumVals, TotalCount,
true);
825 unsigned NumPromoted = 0;
862 std::unique_ptr<InstrProfValueData[]> ValueData =
866 ValueData.get(), NumVals, OldSum,
true);
870 assert((CallTargets.size() == 1 &&
872 "If sum is 0, assume only one element in CallTargets "
873 "with count being NOMORE_ICP_MAGICNUM");
877 ValueCountMap[ValueData[
I].
Value] = ValueData[
I].Count;
884 OldSum -= Pair.first->second;
894 ValueCountMap[ValueData[
I].Value] = ValueData[
I].Count;
898 for (
const auto &
Data : CallTargets) {
905 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
911 for (
const auto &ValueCount : ValueCountMap) {
913 InstrProfValueData{ValueCount.first, ValueCount.second});
917 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
918 if (L.Count != R.Count)
919 return L.Count > R.Count;
920 return L.Value > R.Value;
926 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
939 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
950 auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
955 auto &CI = *Candidate.CallInstr;
959 const char *Reason =
"Callee function not available";
966 if (!
R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
967 R->getValue()->hasFnAttribute(
"use-sample-profile") &&
976 CI,
R->getValue(), Candidate.CallsiteCount, Sum,
false, ORE);
978 Sum -= Candidate.CallsiteCount;
991 Candidate.CallInstr = DI;
992 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
993 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
998 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
1005 << Candidate.CalleeSamples->getFuncName() <<
" because "
1016 if (Callee ==
nullptr)
1031 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1034 for (
auto I : Candidates) {
1035 Function *CalledFunction =
I->getCalledFunction();
1036 if (CalledFunction) {
1038 "InlineAttempt",
I->getDebugLoc(),
1040 <<
"previous inlining reattempted for "
1041 << (Hot ?
"hotness: '" :
"size: '")
1042 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1048 void SampleProfileLoader::findExternalInlineCandidate(
1055 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1067 assert(Samples &&
"expect non-null caller profile");
1077 ContextTracker->getContextFor(Samples->
getContext());
1078 std::queue<ContextTrieNode *> CalleeList;
1079 CalleeList.push(Caller);
1080 while (!CalleeList.empty()) {
1100 if (!Func ||
Func->isDeclaration())
1106 for (
const auto &TS : BS.second.getCallTargets())
1107 if (TS.getValue() > Threshold) {
1110 if (!Callee ||
Callee->isDeclaration())
1118 for (
auto &Child : Node->getAllChildContext()) {
1120 CalleeList.push(CalleeNode);
1147 bool SampleProfileLoader::inlineHotFunctions(
1151 assert((!ProfAccForSymsInList ||
1153 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1154 "ProfAccForSymsInList should be false when profile-sample-accurate "
1158 bool Changed =
false;
1159 bool LocalChanged =
true;
1160 while (LocalChanged) {
1161 LocalChanged =
false;
1163 for (
auto &
BB :
F) {
1167 for (
auto &
I :
BB.getInstList()) {
1169 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1170 if (!isa<IntrinsicInst>(
I)) {
1171 if ((
FS = findCalleeFunctionSamples(*CB))) {
1173 "GUIDToFuncNameMap has to be populated");
1174 AllCandidates.push_back(CB);
1179 else if (shouldInlineColdCallee(*CB))
1180 ColdCandidates.push_back(CB);
1181 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1182 AllCandidates.push_back(CB);
1187 if (Hot || ExternalInlineAdvisor) {
1188 CIS.
insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1189 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1191 CIS.
insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1192 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1196 Function *CalledFunction =
I->getCalledFunction();
1197 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1201 if (CalledFunction == &
F)
1203 if (
I->isIndirectCall()) {
1205 for (
const auto *
FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1208 findExternalInlineCandidate(
I,
FS, InlinedGUIDs,
SymbolMap,
1209 PSI->getOrCompHotCountThreshold());
1215 Candidate = {
I,
FS,
FS->getEntrySamples(), 1.0};
1216 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1217 LocalNotInlinedCallSites.
erase(
I);
1218 LocalChanged =
true;
1221 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1223 if (tryInlineCandidate(Candidate)) {
1224 LocalNotInlinedCallSites.
erase(
I);
1225 LocalChanged =
true;
1228 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1230 PSI->getOrCompHotCountThreshold());
1233 Changed |= LocalChanged;
1239 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1243 bool SampleProfileLoader::tryInlineCandidate(
1250 CallBase &CB = *Candidate.CallInstr;
1252 assert(CalledFunction &&
"Expect a callee with definition");
1256 InlineCost Cost = shouldInlineCandidate(Candidate);
1259 "InlineFail", DLoc,
BB)
1260 <<
"incompatible inlining");
1268 IFI.UpdateProfile =
false;
1278 Cost,
true, getAnnotatedRemarkPassName());
1281 if (InlinedCallSites) {
1282 InlinedCallSites->
clear();
1283 for (
auto &
I : IFI.InlinedCallSites)
1284 InlinedCallSites->push_back(
I);
1288 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1298 if (Candidate.CallsiteDistribution < 1) {
1299 for (
auto &
I : IFI.InlinedCallSites) {
1302 Candidate.CallsiteDistribution);
1304 NumDuplicatedInlinesite++;
1310 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1312 assert(CB &&
"Expect non-null call instruction");
1314 if (isa<IntrinsicInst>(CB))
1318 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1321 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1330 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1335 SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1336 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1337 if (ExternalInlineAdvisor) {
1338 Advice = ExternalInlineAdvisor->getAdvice(CB);
1340 if (!Advice->isInliningRecommended()) {
1341 Advice->recordUnattemptedInlining();
1344 Advice->recordInlining();
1352 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1354 return Cost ? !!Cost.
getValue() :
false;
1358 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1360 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1361 return ReplayCost.getValue();
1366 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1373 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1386 GetTTI(*Callee), GetAC, GetTLI);
1422 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1426 assert((!ProfAccForSymsInList ||
1428 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1429 "ProfAccForSymsInList should be false when profile-sample-accurate "
1434 CandidateQueue CQueue;
1435 InlineCandidate NewCandidate;
1436 for (
auto &
BB :
F) {
1437 for (
auto &
I :
BB.getInstList()) {
1438 auto *CB = dyn_cast<CallBase>(&
I);
1441 if (getInlineCandidate(&NewCandidate, CB))
1442 CQueue.push(NewCandidate);
1451 "Max inline size limit should not be smaller than min inline size "
1456 if (ExternalInlineAdvisor)
1462 bool Changed =
false;
1463 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1464 InlineCandidate Candidate = CQueue.top();
1467 Function *CalledFunction =
I->getCalledFunction();
1469 if (CalledFunction == &
F)
1471 if (
I->isIndirectCall()) {
1473 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1475 Sum *= Candidate.CallsiteDistribution;
1476 unsigned ICPCount = 0;
1477 for (
const auto *
FS : CalleeSamples) {
1480 findExternalInlineCandidate(
I,
FS, InlinedGUIDs,
SymbolMap,
1481 PSI->getOrCompHotCountThreshold());
1485 FS->getEntrySamples() * Candidate.CallsiteDistribution;
1500 if (!PSI->isHotCount(EntryCountDistributed))
1505 Candidate = {
I,
FS, EntryCountDistributed,
1506 Candidate.CallsiteDistribution};
1507 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1508 &InlinedCallSites)) {
1509 for (
auto *CB : InlinedCallSites) {
1510 if (getInlineCandidate(&NewCandidate, CB))
1511 CQueue.emplace(NewCandidate);
1515 }
else if (!ContextTracker) {
1519 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1522 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1523 for (
auto *CB : InlinedCallSites) {
1524 if (getInlineCandidate(&NewCandidate, CB))
1525 CQueue.emplace(NewCandidate);
1528 }
else if (!ContextTracker) {
1529 LocalNotInlinedCallSites.
try_emplace(
I, Candidate.CalleeSamples);
1532 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1534 PSI->getOrCompHotCountThreshold());
1538 if (!CQueue.empty()) {
1540 ++NumCSInlinedHitMaxLimit;
1542 ++NumCSInlinedHitMinLimit;
1544 ++NumCSInlinedHitGrowthLimit;
1550 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1554 void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1558 for (
const auto &Pair : NonInlinedCallSites) {
1561 if (!Callee ||
Callee->isDeclaration())
1566 I->getDebugLoc(),
I->getParent())
1567 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1568 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1572 if (
FS->getTotalSamples() == 0 &&
FS->getEntrySamples() == 0) {
1586 if (
FS->getHeadSamples() == 0) {
1590 FS->getEntrySamples());
1602 notInlinedCallInfo.
try_emplace(Callee, NotInlinedProfileInfo{0});
1603 pair.first->second.entryCount +=
FS->getEntrySamples();
1621 void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1624 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1627 for (
auto &BI :
F) {
1630 if (BlockWeights[
BB]) {
1631 for (
auto &
I :
BB->getInstList()) {
1632 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1643 auto T =
FS->findCallTargetMapAt(CallSite);
1644 if (!T ||
T.get().empty())
1658 for (
const auto &
C :
T.get())
1665 FS->findFunctionSamplesMapAt(CallSite)) {
1666 for (
const auto &NameFS : *M)
1667 Sum += NameFS.second.getEntrySamples();
1673 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1674 }
else if (!isa<IntrinsicInst>(&
I)) {
1675 I.setMetadata(LLVMContext::MD_prof,
1676 MDB.createBranchWeights(
1677 {static_cast<uint32_t>(BlockWeights[BB])}));
1683 for (
auto &
I :
BB->getInstList()) {
1684 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1686 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1688 I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1696 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1697 !isa<IndirectBrInst>(TI))
1703 :
Twine(
"<UNKNOWN LOCATION>"))
1712 std::vector<uint64_t> EdgeIndex;
1717 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1718 EdgeMultiplicity[Succ]++;
1723 Edge
E = std::make_pair(
BB, Succ);
1736 Weights.push_back(
static_cast<uint32_t>(Weight + 1));
1740 uint64_t W = Weight / EdgeMultiplicity[Succ];
1742 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1744 Weights.push_back(
static_cast<uint32_t>(
W));
1747 if (Weight > MaxWeight) {
1749 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1768 if (MaxWeight > 0 &&
1771 TI->
setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1774 <<
"most popular destination for conditional branches at "
1775 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1794 bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1795 bool Changed =
false;
1798 if (!ProbeManager->profileIsValid(
F, *Samples)) {
1800 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1802 ++NumMismatchedProfile;
1805 ++NumMatchedProfile;
1807 if (getFunctionLoc(
F) == 0)
1811 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1816 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1818 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1820 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1823 generateMDProfMetadata(
F);
1825 emitCoverageRemarks(
F);
1832 "Sample Profile loader",
false,
false)
1841 SampleProfileLoader::buildProfiledCallGraph(
CallGraph &CG) {
1842 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1844 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1846 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->
getProfiles());
1851 for (
auto &Node : CG) {
1852 const auto *
F = Node.first;
1853 if (!
F ||
F->isDeclaration() || !
F->hasFnAttribute(
"use-sample-profile"))
1861 std::vector<Function *>
1863 std::vector<Function *> FunctionOrderList;
1864 FunctionOrderList.reserve(
M.size());
1867 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1868 "together with -sample-profile-top-down-load.\n";
1881 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1882 FunctionOrderList.push_back(&
F);
1883 return FunctionOrderList;
1938 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1947 for (
auto *Node : Range) {
1949 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1950 FunctionOrderList.push_back(
F);
1958 auto *
F = Node->getFunction();
1959 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1960 FunctionOrderList.push_back(
F);
1967 dbgs() <<
"Function processing order:\n";
1968 for (
auto F :
reverse(FunctionOrderList)) {
1969 dbgs() <<
F->getName() <<
"\n";
1973 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1974 return FunctionOrderList;
1977 bool SampleProfileLoader::doInitialization(
Module &M,
1979 auto &Ctx =
M.getContext();
1983 if (std::error_code EC = ReaderOrErr.getError()) {
1984 std::string
Msg =
"Could not open profile: " +
EC.message();
1993 if (std::error_code EC = Reader->
read()) {
1994 std::string
Msg =
"profile reading failed: " +
EC.message();
2002 ProfAccForSymsInList =
2004 if (ProfAccForSymsInList) {
2005 NamesInProfile.
clear();
2007 NamesInProfile.
insert(NameTable->begin(), NameTable->end());
2008 CoverageTracker.setProfAccForSymsInList(
true);
2013 M, *
FAM, Ctx,
nullptr,
2058 ContextTracker = std::make_unique<SampleContextTracker>(
2064 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2065 if (!ProbeManager->moduleIsProbed(M)) {
2067 "Pseudo-probe-based profile requires SampleProfileProbePass";
2078 return new SampleProfileLoaderLegacyPass();
2082 return new SampleProfileLoaderLegacyPass(
Name);
2087 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2090 if (
M.getProfileSummary(
false) ==
nullptr) {
2097 TotalCollectedSamples +=
I.second.getTotalSamples();
2101 for (
const auto &N_F :
M.getValueSymbolTable()) {
2103 Function *
F = dyn_cast<Function>(N_F.getValue());
2104 if (
F ==
nullptr || OrigName.
empty())
2108 if (OrigName != NewName && !NewName.
empty()) {
2115 r.first->second =
nullptr;
2120 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2121 if (*MapName != OrigName && !MapName->empty())
2127 "No empty StringRef should be added in SymbolMap");
2129 bool retval =
false;
2130 for (
auto F : buildFunctionOrder(M, CG)) {
2132 clearFunctionData();
2138 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2145 bool SampleProfileLoaderLegacyPass::runOnModule(
Module &M) {
2146 ACT = &getAnalysis<AssumptionCacheTracker>();
2147 TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
2148 TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
2150 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2151 return SampleLoader.runOnModule(M,
nullptr, PSI,
nullptr);
2155 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2156 DILocation2SampleMap.clear();
2167 initialEntryCount = 0;
2170 ProfAccForSymsInList =
false;
2172 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2178 if (ProfAccForSymsInList) {
2180 if (PSL->contains(
F.getName()))
2181 initialEntryCount = 0;
2194 if (NamesInProfile.
count(CanonName))
2195 initialEntryCount = -1;
2200 if (!
F.getEntryCount())
2202 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2209 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2210 ORE = OwnedORE.get();
2214 Samples = ContextTracker->getBaseSamplesFor(
F);
2218 if (Samples && !Samples->
empty())
2219 return emitAnnotations(
F);
2238 SampleProfileLoader SampleLoader(
2241 : ProfileRemappingFileName,
2242 LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2244 if (!SampleLoader.doInitialization(
M, &
FAM))
2249 if (!SampleLoader.runOnModule(
M, &AM, PSI, &CG))
A set of analyses that are preserved following a run of a transformation pass.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
int getCost() const
Get the inline cost estimate.
Analysis pass providing the TargetTransformInfo.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
cl::opt< bool > EnableExtTspBlockPlacement
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
@ ContextDuplicatedIntoBase
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
static bool ProfileIsProbeBased
An analysis pass to compute the CallGraph for a Module.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
bool hasAttribute(ContextAttributeMask A)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
DISubprogram * getSubprogram() const
Get the attached subprogram.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
The basic data container for the call graph of a Module of IR.
FunctionAnalysisManager FAM
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
bool erase(const KeyT &Val)
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
ModulePass * createSampleProfileLoaderPass()
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
Thresholds to tune inline cost analysis.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
DiagnosticInfoOptimizationBase::Argument NV
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
std::pair< typename Base::iterator, bool > insert(StringRef key)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the function name.
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
SampleProfileReaderItaniumRemapper * getRemapper()
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
std::pair< iterator, bool > insert(const ValueT &V)
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
LLVM Basic Block Representation.
void SetContextSynthetic()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
StringRef getFuncName() const
Return the original function name.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool profileIsCS() const
Whether input profile is fully context-sensitive.
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::error_code read()
The interface to read sample profiles from the associated file.
(vector float) vec_cmpeq(*A, *B) C
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Represent the analysis usage information of a pass.
Represents the cost of inlining a function.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
static uint64_t getGUID(StringRef Name)
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
Function::ProfileCount ProfileCount
STATISTIC(NumFunctions, "Total number of functions")
int getNumOccurrences() const
void setProbeDistributionFactor(Instruction &Inst, float Factor)
A node in the call graph for a module.
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
static InlineCost get(int Cost, int Threshold)
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Analysis providing profile information.
Implements a dense probed hash-table based set.
Function::ProfileCount ProfileCount
Used in the streaming interface as the general argument type.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Module * getParent()
Get the module that this global value is contained inside of...
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
A function analysis which provides an AssumptionCache.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Representation of the samples collected for a function.
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
This is an important class for using LLVM in a threaded context.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
cl::opt< bool > SampleProfileUseProfi
initializer< Ty > init(const Ty &Val)
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
constexpr const T & getValue() const &
StandardInstrumentations SI(Debug, VerifyEach)
static bool UseMD5
Whether the profile uses MD5 to represent string.
print Print MemDeps of function
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
A Module instance is used to store all the information related to an LLVM module.
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
Diagnostic information for the sample profiler.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
StringSet - A wrapper for StringMap that provides set-like functionality.
An immutable pass that tracks lazily created AssumptionCache objects.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
StringRef - Represent a constant reference to a string, i.e.
A cache of @llvm.assume calls within a function.
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static bool isIndirectCall(const MachineInstr &MI)
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
ProfileSummary & getSummary() const
Return the profile summary.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ ReplaySampleProfileInliner
amdgpu Simplify well known AMD library false FunctionCallee Callee
static bool runOnFunction(Function &F, bool PostInlining)
SampleProfileMap & getProfiles()
Return all the profiles.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
void sort(IteratorTy Start, IteratorTy End)
Provides information about what library functions are available for the current target.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
Sample-based profile reader.
@ None
No LTO/ThinLTO behavior needed.
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
void setModule(const Module *Mod)
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Module & getModule() const
Returns the module the call graph corresponds to.
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
SampleContext & getContext() const
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
std::string AnnotateInlinePassName(InlineContext IC)
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
sample Sample Profile loader
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
const BasicBlock * getParent() const
Represents either an error or a value T.
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
llvm::cl::opt< bool > UseIterativeBFIInference
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A container for analyses that lazily runs them and caches their results.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
AnalysisUsage & addRequired()
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Class to represent profile counts.
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
LLVM Value Representation.
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
Analysis pass providing the TargetLibraryInfo.
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)