LLVM  16.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/MapVector.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SCCIterator.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Twine.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/DebugLoc.h"
48 #include "llvm/IR/DiagnosticInfo.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/MDBuilder.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/PassManager.h"
59 #include "llvm/IR/PseudoProbe.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
66 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/ErrorOr.h"
71 #include "llvm/Transforms/IPO.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <cstdint>
84 #include <functional>
85 #include <limits>
86 #include <map>
87 #include <memory>
88 #include <queue>
89 #include <string>
90 #include <system_error>
91 #include <utility>
92 #include <vector>
93 
94 using namespace llvm;
95 using namespace sampleprof;
96 using namespace llvm::sampleprofutil;
98 #define DEBUG_TYPE "sample-profile"
99 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
100 
101 STATISTIC(NumCSInlined,
102  "Number of functions inlined with context sensitive profile");
103 STATISTIC(NumCSNotInlined,
104  "Number of functions not inlined with context sensitive profile");
105 STATISTIC(NumMismatchedProfile,
106  "Number of functions with CFG mismatched profile");
107 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
108 STATISTIC(NumDuplicatedInlinesite,
109  "Number of inlined callsites with a partial distribution factor");
110 
111 STATISTIC(NumCSInlinedHitMinLimit,
112  "Number of functions with FDO inline stopped due to min size limit");
113 STATISTIC(NumCSInlinedHitMaxLimit,
114  "Number of functions with FDO inline stopped due to max size limit");
115 STATISTIC(
116  NumCSInlinedHitGrowthLimit,
117  "Number of functions with FDO inline stopped due to growth size limit");
118 
119 // Command line option to specify the file to read samples from. This is
120 // mainly used for debugging.
122  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
123  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
124 
125 // The named file contains a set of transformations that may have been applied
126 // to the symbol names between the program from which the sample data was
127 // collected and the current program's symbols.
129  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
130  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
131 
133  "report-profile-staleness", cl::Hidden, cl::init(false),
134  cl::desc("Compute and report stale profile statistical metrics."));
135 
137  "persist-profile-staleness", cl::Hidden, cl::init(false),
138  cl::desc("Compute stale profile statistical metrics and write it into the "
139  "native object file(.llvm_stats section)."));
140 
142  "profile-sample-accurate", cl::Hidden, cl::init(false),
143  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
144  "callsite and function as having 0 samples. Otherwise, treat "
145  "un-sampled callsites and functions conservatively as unknown. "));
146 
148  "profile-sample-block-accurate", cl::Hidden, cl::init(false),
149  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
150  "branches and calls as having 0 samples. Otherwise, treat "
151  "them conservatively as unknown. "));
152 
154  "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
155  cl::desc("For symbols in profile symbol list, regard their profiles to "
156  "be accurate. It may be overriden by profile-sample-accurate. "));
157 
159  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
160  cl::desc("Merge past inlinee's profile to outline version if sample "
161  "profile loader decided not to inline a call site. It will "
162  "only be enabled when top-down order of profile loading is "
163  "enabled. "));
164 
166  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
167  cl::desc("Do profile annotation and inlining for functions in top-down "
168  "order of call graph during sample profile loading. It only "
169  "works for new pass manager. "));
170 
171 static cl::opt<bool>
172  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
173  cl::desc("Process functions in a top-down order "
174  "defined by the profiled call graph when "
175  "-sample-profile-top-down-load is on."));
177  SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
178  cl::desc("Sort profiled recursion by edge weights."));
179 
181  "sample-profile-inline-size", cl::Hidden, cl::init(false),
182  cl::desc("Inline cold call sites in profile loader if it's beneficial "
183  "for code size."));
184 
185 // Since profiles are consumed by many passes, turning on this option has
186 // side effects. For instance, pre-link SCC inliner would see merged profiles
187 // and inline the hot functions (that are skipped in this pass).
189  "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
190  cl::desc("If true, artifically skip inline transformation in sample-loader "
191  "pass, and merge (or scale) profiles (as configured by "
192  "--sample-profile-merge-inlinee)."));
193 
195  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
196  cl::desc("The size growth ratio limit for proirity-based sample profile "
197  "loader inlining."));
198 
200  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
201  cl::desc("The lower bound of size growth limit for "
202  "proirity-based sample profile loader inlining."));
203 
205  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
206  cl::desc("The upper bound of size growth limit for "
207  "proirity-based sample profile loader inlining."));
208 
210  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
211  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
212  "inlining."));
213 
215  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
216  cl::desc("Threshold for inlining cold callsites"));
217 
219  "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
220  cl::desc(
221  "Relative hotness percentage threshold for indirect "
222  "call promotion in proirity-based sample profile loader inlining."));
223 
225  "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
226  cl::desc(
227  "Skip relative hotness check for ICP up to given number of targets."));
228 
230  "sample-profile-prioritized-inline", cl::Hidden,
231 
232  cl::desc("Use call site prioritized inlining for sample profile loader."
233  "Currently only CSSPGO is supported."));
234 
236  "sample-profile-use-preinliner", cl::Hidden,
237 
238  cl::desc("Use the preinliner decisions stored in profile context."));
239 
241  "sample-profile-recursive-inline", cl::Hidden,
242 
243  cl::desc("Allow sample loader inliner to inline recursive calls."));
244 
246  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
247  cl::desc(
248  "Optimization remarks file containing inline remarks to be replayed "
249  "by inlining from sample profile loader."),
250  cl::Hidden);
251 
253  "sample-profile-inline-replay-scope",
256  "Replay on functions that have remarks associated "
257  "with them (default)"),
259  "Replay on the entire module")),
260  cl::desc("Whether inline replay should be applied to the entire "
261  "Module or just the Functions (default) that are present as "
262  "callers in remarks during sample profile inlining."),
263  cl::Hidden);
264 
266  "sample-profile-inline-replay-fallback",
268  cl::values(
269  clEnumValN(
271  "All decisions not in replay send to original advisor (default)"),
273  "AlwaysInline", "All decisions not in replay are inlined"),
275  "All decisions not in replay are not inlined")),
276  cl::desc("How sample profile inline replay treats sites that don't come "
277  "from the replay. Original: defers to original advisor, "
278  "AlwaysInline: inline all sites not in replay, NeverInline: "
279  "inline no sites not in replay"),
280  cl::Hidden);
281 
283  "sample-profile-inline-replay-format",
285  cl::values(
286  clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
288  "<Line Number>:<Column Number>"),
290  "LineDiscriminator", "<Line Number>.<Discriminator>"),
292  "LineColumnDiscriminator",
293  "<Line Number>:<Column Number>.<Discriminator> (default)")),
294  cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
295 
296 static cl::opt<unsigned>
297  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
298  cl::desc("Max number of promotions for a single indirect "
299  "call callsite in sample profile loader"));
300 
302  "overwrite-existing-weights", cl::Hidden, cl::init(false),
303  cl::desc("Ignore existing branch weights on IR and always overwrite."));
304 
306  "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
307  cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
308  "sample-profile inline pass name."));
309 
311 
312 namespace {
313 
314 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
315 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
316 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
317 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
318 using BlockEdgeMap =
320 
321 class GUIDToFuncNameMapper {
322 public:
323  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
324  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
325  : CurrentReader(Reader), CurrentModule(M),
326  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
327  if (!CurrentReader.useMD5())
328  return;
329 
330  for (const auto &F : CurrentModule) {
331  StringRef OrigName = F.getName();
332  CurrentGUIDToFuncNameMap.insert(
333  {Function::getGUID(OrigName), OrigName});
334 
335  // Local to global var promotion used by optimization like thinlto
336  // will rename the var and add suffix like ".llvm.xxx" to the
337  // original local name. In sample profile, the suffixes of function
338  // names are all stripped. Since it is possible that the mapper is
339  // built in post-thin-link phase and var promotion has been done,
340  // we need to add the substring of function name without the suffix
341  // into the GUIDToFuncNameMap.
343  if (CanonName != OrigName)
344  CurrentGUIDToFuncNameMap.insert(
345  {Function::getGUID(CanonName), CanonName});
346  }
347 
348  // Update GUIDToFuncNameMap for each function including inlinees.
349  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
350  }
351 
352  ~GUIDToFuncNameMapper() {
353  if (!CurrentReader.useMD5())
354  return;
355 
356  CurrentGUIDToFuncNameMap.clear();
357 
358  // Reset GUIDToFuncNameMap for of each function as they're no
359  // longer valid at this point.
360  SetGUIDToFuncNameMapForAll(nullptr);
361  }
362 
363 private:
364  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
365  std::queue<FunctionSamples *> FSToUpdate;
366  for (auto &IFS : CurrentReader.getProfiles()) {
367  FSToUpdate.push(&IFS.second);
368  }
369 
370  while (!FSToUpdate.empty()) {
371  FunctionSamples *FS = FSToUpdate.front();
372  FSToUpdate.pop();
373  FS->GUIDToFuncNameMap = Map;
374  for (const auto &ICS : FS->getCallsiteSamples()) {
375  const FunctionSamplesMap &FSMap = ICS.second;
376  for (const auto &IFS : FSMap) {
377  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
378  FSToUpdate.push(&FS);
379  }
380  }
381  }
382  }
383 
385  Module &CurrentModule;
386  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
387 };
388 
389 // Inline candidate used by iterative callsite prioritized inliner
390 struct InlineCandidate {
391  CallBase *CallInstr;
392  const FunctionSamples *CalleeSamples;
393  // Prorated callsite count, which will be used to guide inlining. For example,
394  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
395  // copies will get their own distribution factors and their prorated counts
396  // will be used to decide if they should be inlined independently.
397  uint64_t CallsiteCount;
398  // Call site distribution factor to prorate the profile samples for a
399  // duplicated callsite. Default value is 1.0.
400  float CallsiteDistribution;
401 };
402 
403 // Inline candidate comparer using call site weight
404 struct CandidateComparer {
405  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
406  if (LHS.CallsiteCount != RHS.CallsiteCount)
407  return LHS.CallsiteCount < RHS.CallsiteCount;
408 
409  const FunctionSamples *LCS = LHS.CalleeSamples;
410  const FunctionSamples *RCS = RHS.CalleeSamples;
411  assert(LCS && RCS && "Expect non-null FunctionSamples");
412 
413  // Tie breaker using number of samples try to favor smaller functions first
414  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
415  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
416 
417  // Tie breaker using GUID so we have stable/deterministic inlining order
418  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
419  }
420 };
421 
422 using CandidateQueue =
424  CandidateComparer>;
425 
426 // Sample profile matching - fuzzy match.
427 class SampleProfileMatcher {
428  Module &M;
429  SampleProfileReader &Reader;
430  const PseudoProbeManager *ProbeManager;
431 
432  // Profile mismatching statstics.
433  uint64_t TotalProfiledCallsite = 0;
434  uint64_t NumMismatchedCallsite = 0;
435  uint64_t MismatchedCallsiteSamples = 0;
436  uint64_t TotalCallsiteSamples = 0;
437  uint64_t TotalProfiledFunc = 0;
438  uint64_t NumMismatchedFuncHash = 0;
439  uint64_t MismatchedFuncHashSamples = 0;
440  uint64_t TotalFuncHashSamples = 0;
441 
442 public:
443  SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
444  const PseudoProbeManager *ProbeManager)
445  : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
446  void detectProfileMismatch();
447  void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
448 };
449 
450 /// Sample profile pass.
451 ///
452 /// This pass reads profile data from the file specified by
453 /// -sample-profile-file and annotates every affected function with the
454 /// profile information found in that file.
455 class SampleProfileLoader final
456  : public SampleProfileLoaderBaseImpl<BasicBlock> {
457 public:
458  SampleProfileLoader(
459  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
460  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
461  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
462  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
464  GetAC(std::move(GetAssumptionCache)),
465  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
466  LTOPhase(LTOPhase),
467  AnnotatedPassName(AnnotateSampleProfileInlinePhase
470  : CSINLINE_DEBUG) {}
471 
472  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
473  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
474  ProfileSummaryInfo *_PSI, CallGraph *CG);
475 
476 protected:
478  bool emitAnnotations(Function &F);
479  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
480  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
481  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
482  const FunctionSamples *
483  findFunctionSamples(const Instruction &I) const override;
484  std::vector<const FunctionSamples *>
485  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
486  void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
487  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
489  uint64_t Threshold);
490  // Attempt to promote indirect call and also inline the promoted call
491  bool tryPromoteAndInlineCandidate(
492  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
493  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
494 
495  bool inlineHotFunctions(Function &F,
496  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
497  std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
498  bool getExternalInlineAdvisorShouldInline(CallBase &CB);
499  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
500  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
501  bool
502  tryInlineCandidate(InlineCandidate &Candidate,
503  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
504  bool
505  inlineHotFunctionsWithPriority(Function &F,
506  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
507  // Inline cold/small functions in addition to hot ones
508  bool shouldInlineColdCallee(CallBase &CallInst);
509  void emitOptimizationRemarksForInlineCandidates(
510  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
511  bool Hot);
512  void promoteMergeNotInlinedContextSamples(
514  const Function &F);
515  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
516  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
517  void generateMDProfMetadata(Function &F);
518 
519  /// Map from function name to Function *. Used to find the function from
520  /// the function name. If the function name contains suffix, additional
521  /// entry is added to map from the stripped name to the function if there
522  /// is one-to-one mapping.
524 
527  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
528 
529  /// Profile tracker for different context.
530  std::unique_ptr<SampleContextTracker> ContextTracker;
531 
532  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
533  ///
534  /// We need to know the LTO phase because for example in ThinLTOPrelink
535  /// phase, in annotation, we should not promote indirect calls. Instead,
536  /// we will mark GUIDs that needs to be annotated to the function.
537  const ThinOrFullLTOPhase LTOPhase;
538  const std::string AnnotatedPassName;
539 
540  /// Profle Symbol list tells whether a function name appears in the binary
541  /// used to generate the current profile.
542  std::unique_ptr<ProfileSymbolList> PSL;
543 
544  /// Total number of samples collected in this profile.
545  ///
546  /// This is the sum of all the samples collected in all the functions executed
547  /// at runtime.
548  uint64_t TotalCollectedSamples = 0;
549 
550  // Information recorded when we declined to inline a call site
551  // because we have determined it is too cold is accumulated for
552  // each callee function. Initially this is just the entry count.
553  struct NotInlinedProfileInfo {
554  uint64_t entryCount;
555  };
557 
558  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
559  // all the function symbols defined or declared in current module.
560  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
561 
562  // All the Names used in FunctionSamples including outline function
563  // names, inline instance names and call target names.
564  StringSet<> NamesInProfile;
565 
566  // For symbol in profile symbol list, whether to regard their profiles
567  // to be accurate. It is mainly decided by existance of profile symbol
568  // list and -profile-accurate-for-symsinlist flag, but it can be
569  // overriden by -profile-sample-accurate or profile-sample-accurate
570  // attribute.
571  bool ProfAccForSymsInList;
572 
573  // External inline advisor used to replay inline decision from remarks.
574  std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
575 
576  // A pseudo probe helper to correlate the imported sample counts.
577  std::unique_ptr<PseudoProbeManager> ProbeManager;
578 
579  // A helper to implement the sample profile matching algorithm.
580  std::unique_ptr<SampleProfileMatcher> MatchingManager;
581 
582 private:
583  const char *getAnnotatedRemarkPassName() const {
584  return AnnotatedPassName.c_str();
585  }
586 };
587 } // end anonymous namespace
588 
589 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
591  return getProbeWeight(Inst);
592 
593  const DebugLoc &DLoc = Inst.getDebugLoc();
594  if (!DLoc)
595  return std::error_code();
596 
597  // Ignore all intrinsics, phinodes and branch instructions.
598  // Branch and phinodes instruction usually contains debug info from sources
599  // outside of the residing basic block, thus we ignore them during annotation.
600  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
601  return std::error_code();
602 
603  // For non-CS profile, if a direct call/invoke instruction is inlined in
604  // profile (findCalleeFunctionSamples returns non-empty result), but not
605  // inlined here, it means that the inlined callsite has no sample, thus the
606  // call instruction should have 0 count.
607  // For CS profile, the callsite count of previously inlined callees is
608  // populated with the entry count of the callees.
610  if (const auto *CB = dyn_cast<CallBase>(&Inst))
611  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
612  return 0;
613 
614  return getInstWeightImpl(Inst);
615 }
616 
617 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
618 // of non-probe instruction. So if all instructions of the BB give error_code,
619 // tell the inference algorithm to infer the BB weight.
620 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
622  "Profile is not pseudo probe based");
623  std::optional<PseudoProbe> Probe = extractProbe(Inst);
624  // Ignore the non-probe instruction. If none of the instruction in the BB is
625  // probe, we choose to infer the BB's weight.
626  if (!Probe)
627  return std::error_code();
628 
629  const FunctionSamples *FS = findFunctionSamples(Inst);
630  // If none of the instruction has FunctionSample, we choose to return zero
631  // value sample to indicate the BB is cold. This could happen when the
632  // instruction is from inlinee and no profile data is found.
633  // FIXME: This should not be affected by the source drift issue as 1) if the
634  // newly added function is top-level inliner, it won't match the CFG checksum
635  // in the function profile or 2) if it's the inlinee, the inlinee should have
636  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
637  // we can improve it by adding a switch for profile-sample-block-accurate for
638  // block level counts in the future.
639  if (!FS)
640  return 0;
641 
642  // For non-CS profile, If a direct call/invoke instruction is inlined in
643  // profile (findCalleeFunctionSamples returns non-empty result), but not
644  // inlined here, it means that the inlined callsite has no sample, thus the
645  // call instruction should have 0 count.
646  // For CS profile, the callsite count of previously inlined callees is
647  // populated with the entry count of the callees.
649  if (const auto *CB = dyn_cast<CallBase>(&Inst))
650  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
651  return 0;
652 
653  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
654  if (R) {
655  uint64_t Samples = R.get() * Probe->Factor;
656  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
657  if (FirstMark) {
658  ORE->emit([&]() {
659  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
660  Remark << "Applied " << ore::NV("NumSamples", Samples);
661  Remark << " samples from profile (ProbeId=";
662  Remark << ore::NV("ProbeId", Probe->Id);
663  Remark << ", Factor=";
664  Remark << ore::NV("Factor", Probe->Factor);
665  Remark << ", OriginalSamples=";
666  Remark << ore::NV("OriginalSamples", R.get());
667  Remark << ")";
668  return Remark;
669  });
670  }
671  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
672  << " - weight: " << R.get() << " - factor: "
673  << format("%0.2f", Probe->Factor) << ")\n");
674  return Samples;
675  }
676  return R;
677 }
678 
679 /// Get the FunctionSamples for a call instruction.
680 ///
681 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
682 /// instance in which that call instruction is calling to. It contains
683 /// all samples that resides in the inlined instance. We first find the
684 /// inlined instance in which the call instruction is from, then we
685 /// traverse its children to find the callsite with the matching
686 /// location.
687 ///
688 /// \param Inst Call/Invoke instruction to query.
689 ///
690 /// \returns The FunctionSamples pointer to the inlined instance.
691 const FunctionSamples *
692 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
693  const DILocation *DIL = Inst.getDebugLoc();
694  if (!DIL) {
695  return nullptr;
696  }
697 
698  StringRef CalleeName;
699  if (Function *Callee = Inst.getCalledFunction())
700  CalleeName = Callee->getName();
701 
703  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
704 
705  const FunctionSamples *FS = findFunctionSamples(Inst);
706  if (FS == nullptr)
707  return nullptr;
708 
709  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
710  CalleeName, Reader->getRemapper());
711 }
712 
713 /// Returns a vector of FunctionSamples that are the indirect call targets
714 /// of \p Inst. The vector is sorted by the total number of samples. Stores
715 /// the total call count of the indirect call in \p Sum.
716 std::vector<const FunctionSamples *>
717 SampleProfileLoader::findIndirectCallFunctionSamples(
718  const Instruction &Inst, uint64_t &Sum) const {
719  const DILocation *DIL = Inst.getDebugLoc();
720  std::vector<const FunctionSamples *> R;
721 
722  if (!DIL) {
723  return R;
724  }
725 
726  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
727  assert(L && R && "Expect non-null FunctionSamples");
728  if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
729  return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
730  return FunctionSamples::getGUID(L->getName()) <
731  FunctionSamples::getGUID(R->getName());
732  };
733 
735  auto CalleeSamples =
736  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
737  if (CalleeSamples.empty())
738  return R;
739 
740  // For CSSPGO, we only use target context profile's entry count
741  // as that already includes both inlined callee and non-inlined ones..
742  Sum = 0;
743  for (const auto *const FS : CalleeSamples) {
744  Sum += FS->getHeadSamplesEstimate();
745  R.push_back(FS);
746  }
747  llvm::sort(R, FSCompare);
748  return R;
749  }
750 
751  const FunctionSamples *FS = findFunctionSamples(Inst);
752  if (FS == nullptr)
753  return R;
754 
756  auto T = FS->findCallTargetMapAt(CallSite);
757  Sum = 0;
758  if (T)
759  for (const auto &T_C : T.get())
760  Sum += T_C.second;
761  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
762  if (M->empty())
763  return R;
764  for (const auto &NameFS : *M) {
765  Sum += NameFS.second.getHeadSamplesEstimate();
766  R.push_back(&NameFS.second);
767  }
768  llvm::sort(R, FSCompare);
769  }
770  return R;
771 }
772 
773 const FunctionSamples *
774 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
776  std::optional<PseudoProbe> Probe = extractProbe(Inst);
777  if (!Probe)
778  return nullptr;
779  }
780 
781  const DILocation *DIL = Inst.getDebugLoc();
782  if (!DIL)
783  return Samples;
784 
785  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
786  if (it.second) {
788  it.first->second = ContextTracker->getContextSamplesFor(DIL);
789  else
790  it.first->second =
791  Samples->findFunctionSamples(DIL, Reader->getRemapper());
792  }
793  return it.first->second;
794 }
795 
796 /// Check whether the indirect call promotion history of \p Inst allows
797 /// the promotion for \p Candidate.
798 /// If the profile count for the promotion candidate \p Candidate is
799 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
800 /// for \p Inst. If we already have at least MaxNumPromotions
801 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
802 /// cannot promote for \p Inst anymore.
803 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
804  uint32_t NumVals = 0;
805  uint64_t TotalCount = 0;
806  std::unique_ptr<InstrProfValueData[]> ValueData =
807  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
808  bool Valid =
809  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
810  ValueData.get(), NumVals, TotalCount, true);
811  // No valid value profile so no promoted targets have been recorded
812  // before. Ok to do ICP.
813  if (!Valid)
814  return true;
815 
816  unsigned NumPromoted = 0;
817  for (uint32_t I = 0; I < NumVals; I++) {
818  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
819  continue;
820 
821  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
822  // metadata, it means the candidate has been promoted for this
823  // indirect call.
824  if (ValueData[I].Value == Function::getGUID(Candidate))
825  return false;
826  NumPromoted++;
827  // If already have MaxNumPromotions promotion, don't do it anymore.
828  if (NumPromoted == MaxNumPromotions)
829  return false;
830  }
831  return true;
832 }
833 
834 /// Update indirect call target profile metadata for \p Inst.
835 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
836 /// If it is 0, it means updateIDTMetaData is used to mark a
837 /// certain target to be promoted already. If it is not zero,
838 /// we expect to use it to update the total count in the value profile.
839 static void
841  const SmallVectorImpl<InstrProfValueData> &CallTargets,
842  uint64_t Sum) {
843  // Bail out early if MaxNumPromotions is zero.
844  // This prevents allocating an array of zero length below.
845  //
846  // Note `updateIDTMetaData` is called in two places so check
847  // `MaxNumPromotions` inside it.
848  if (MaxNumPromotions == 0)
849  return;
850  uint32_t NumVals = 0;
851  // OldSum is the existing total count in the value profile data.
852  uint64_t OldSum = 0;
853  std::unique_ptr<InstrProfValueData[]> ValueData =
854  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
855  bool Valid =
856  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
857  ValueData.get(), NumVals, OldSum, true);
858 
859  DenseMap<uint64_t, uint64_t> ValueCountMap;
860  if (Sum == 0) {
861  assert((CallTargets.size() == 1 &&
862  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
863  "If sum is 0, assume only one element in CallTargets "
864  "with count being NOMORE_ICP_MAGICNUM");
865  // Initialize ValueCountMap with existing value profile data.
866  if (Valid) {
867  for (uint32_t I = 0; I < NumVals; I++)
868  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
869  }
870  auto Pair =
871  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
872  // If the target already exists in value profile, decrease the total
873  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
874  if (!Pair.second) {
875  OldSum -= Pair.first->second;
876  Pair.first->second = NOMORE_ICP_MAGICNUM;
877  }
878  Sum = OldSum;
879  } else {
880  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
881  // counts in the value profile.
882  if (Valid) {
883  for (uint32_t I = 0; I < NumVals; I++) {
884  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
885  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
886  }
887  }
888 
889  for (const auto &Data : CallTargets) {
890  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
891  if (Pair.second)
892  continue;
893  // The target represented by Data.Value has already been promoted.
894  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
895  // Sum by Data.Count.
896  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
897  Sum -= Data.Count;
898  }
899  }
900 
901  SmallVector<InstrProfValueData, 8> NewCallTargets;
902  for (const auto &ValueCount : ValueCountMap) {
903  NewCallTargets.emplace_back(
904  InstrProfValueData{ValueCount.first, ValueCount.second});
905  }
906 
907  llvm::sort(NewCallTargets,
908  [](const InstrProfValueData &L, const InstrProfValueData &R) {
909  if (L.Count != R.Count)
910  return L.Count > R.Count;
911  return L.Value > R.Value;
912  });
913 
914  uint32_t MaxMDCount =
915  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
916  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
917  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
918 }
919 
920 /// Attempt to promote indirect call and also inline the promoted call.
921 ///
922 /// \param F Caller function.
923 /// \param Candidate ICP and inline candidate.
924 /// \param SumOrigin Original sum of target counts for indirect call before
925 /// promoting given candidate.
926 /// \param Sum Prorated sum of remaining target counts for indirect call
927 /// after promoting given candidate.
928 /// \param InlinedCallSite Output vector for new call sites exposed after
929 /// inlining.
930 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
931  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
932  SmallVector<CallBase *, 8> *InlinedCallSite) {
933  // Bail out early if sample-loader inliner is disabled.
935  return false;
936 
937  // Bail out early if MaxNumPromotions is zero.
938  // This prevents allocating an array of zero length in callees below.
939  if (MaxNumPromotions == 0)
940  return false;
941  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
942  auto R = SymbolMap.find(CalleeFunctionName);
943  if (R == SymbolMap.end() || !R->getValue())
944  return false;
945 
946  auto &CI = *Candidate.CallInstr;
947  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
948  return false;
949 
950  const char *Reason = "Callee function not available";
951  // R->getValue() != &F is to prevent promoting a recursive call.
952  // If it is a recursive call, we do not inline it as it could bloat
953  // the code exponentially. There is way to better handle this, e.g.
954  // clone the caller first, and inline the cloned caller if it is
955  // recursive. As llvm does not inline recursive calls, we will
956  // simply ignore it instead of handling it explicitly.
957  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
958  R->getValue()->hasFnAttribute("use-sample-profile") &&
959  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
960  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
961  // in the value profile metadata so the target won't be promoted again.
962  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
963  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
964  updateIDTMetaData(CI, SortedCallTargets, 0);
965 
966  auto *DI = &pgo::promoteIndirectCall(
967  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
968  if (DI) {
969  Sum -= Candidate.CallsiteCount;
970  // Do not prorate the indirect callsite distribution since the original
971  // distribution will be used to scale down non-promoted profile target
972  // counts later. By doing this we lose track of the real callsite count
973  // for the leftover indirect callsite as a trade off for accurate call
974  // target counts.
975  // TODO: Ideally we would have two separate factors, one for call site
976  // counts and one is used to prorate call target counts.
977  // Do not update the promoted direct callsite distribution at this
978  // point since the original distribution combined with the callee profile
979  // will be used to prorate callsites from the callee if inlined. Once not
980  // inlined, the direct callsite distribution should be prorated so that
981  // the it will reflect the real callsite counts.
982  Candidate.CallInstr = DI;
983  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
984  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
985  if (!Inlined) {
986  // Prorate the direct callsite distribution so that it reflects real
987  // callsite counts.
989  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
990  }
991  return Inlined;
992  }
993  }
994  } else {
995  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
996  << Candidate.CalleeSamples->getFuncName() << " because "
997  << Reason << "\n");
998  }
999  return false;
1000 }
1001 
1002 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
1003  if (!ProfileSizeInline)
1004  return false;
1005 
1007  if (Callee == nullptr)
1008  return false;
1009 
1010  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
1011  GetAC, GetTLI);
1012 
1013  if (Cost.isNever())
1014  return false;
1015 
1016  if (Cost.isAlways())
1017  return true;
1018 
1019  return Cost.getCost() <= SampleColdCallSiteThreshold;
1020 }
1021 
1022 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1023  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1024  bool Hot) {
1025  for (auto *I : Candidates) {
1026  Function *CalledFunction = I->getCalledFunction();
1027  if (CalledFunction) {
1028  ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1029  "InlineAttempt", I->getDebugLoc(),
1030  I->getParent())
1031  << "previous inlining reattempted for "
1032  << (Hot ? "hotness: '" : "size: '")
1033  << ore::NV("Callee", CalledFunction) << "' into '"
1034  << ore::NV("Caller", &F) << "'");
1035  }
1036  }
1037 }
1038 
1039 void SampleProfileLoader::findExternalInlineCandidate(
1040  CallBase *CB, const FunctionSamples *Samples,
1041  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
1042  const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
1043 
1044  // If ExternalInlineAdvisor wants to inline an external function
1045  // make sure it's imported
1046  if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1047  // Samples may not exist for replayed function, if so
1048  // just add the direct GUID and move on
1049  if (!Samples) {
1050  InlinedGUIDs.insert(
1052  return;
1053  }
1054  // Otherwise, drop the threshold to import everything that we can
1055  Threshold = 0;
1056  }
1057 
1058  assert(Samples && "expect non-null caller profile");
1059 
1060  // For AutoFDO profile, retrieve candidate profiles by walking over
1061  // the nested inlinee profiles.
1063  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1064  return;
1065  }
1066 
1067  ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
1068  std::queue<ContextTrieNode *> CalleeList;
1069  CalleeList.push(Caller);
1070  while (!CalleeList.empty()) {
1071  ContextTrieNode *Node = CalleeList.front();
1072  CalleeList.pop();
1073  FunctionSamples *CalleeSample = Node->getFunctionSamples();
1074  // For CSSPGO profile, retrieve candidate profile by walking over the
1075  // trie built for context profile. Note that also take call targets
1076  // even if callee doesn't have a corresponding context profile.
1077  if (!CalleeSample)
1078  continue;
1079 
1080  // If pre-inliner decision is used, honor that for importing as well.
1081  bool PreInline =
1084  if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
1085  continue;
1086 
1087  StringRef Name = CalleeSample->getFuncName();
1088  Function *Func = SymbolMap.lookup(Name);
1089  // Add to the import list only when it's defined out of module.
1090  if (!Func || Func->isDeclaration())
1091  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
1092 
1093  // Import hot CallTargets, which may not be available in IR because full
1094  // profile annotation cannot be done until backend compilation in ThinLTO.
1095  for (const auto &BS : CalleeSample->getBodySamples())
1096  for (const auto &TS : BS.second.getCallTargets())
1097  if (TS.getValue() > Threshold) {
1098  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
1099  const Function *Callee = SymbolMap.lookup(CalleeName);
1100  if (!Callee || Callee->isDeclaration())
1101  InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
1102  }
1103 
1104  // Import hot child context profile associted with callees. Note that this
1105  // may have some overlap with the call target loop above, but doing this
1106  // based child context profile again effectively allow us to use the max of
1107  // entry count and call target count to determine importing.
1108  for (auto &Child : Node->getAllChildContext()) {
1109  ContextTrieNode *CalleeNode = &Child.second;
1110  CalleeList.push(CalleeNode);
1111  }
1112  }
1113 }
1114 
1115 /// Iteratively inline hot callsites of a function.
1116 ///
1117 /// Iteratively traverse all callsites of the function \p F, so as to
1118 /// find out callsites with corresponding inline instances.
1119 ///
1120 /// For such callsites,
1121 /// - If it is hot enough, inline the callsites and adds callsites of the callee
1122 /// into the caller. If the call is an indirect call, first promote
1123 /// it to direct call. Each indirect call is limited with a single target.
1124 ///
1125 /// - If a callsite is not inlined, merge the its profile to the outline
1126 /// version (if --sample-profile-merge-inlinee is true), or scale the
1127 /// counters of standalone function based on the profile of inlined
1128 /// instances (if --sample-profile-merge-inlinee is false).
1129 ///
1130 /// Later passes may consume the updated profiles.
1131 ///
1132 /// \param F function to perform iterative inlining.
1133 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1134 /// inlined in the profiled binary.
1135 ///
1136 /// \returns True if there is any inline happened.
1137 bool SampleProfileLoader::inlineHotFunctions(
1138  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1139  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1140  // Profile symbol list is ignored when profile-sample-accurate is on.
1141  assert((!ProfAccForSymsInList ||
1143  !F.hasFnAttribute("profile-sample-accurate"))) &&
1144  "ProfAccForSymsInList should be false when profile-sample-accurate "
1145  "is enabled");
1146 
1147  MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1148  bool Changed = false;
1149  bool LocalChanged = true;
1150  while (LocalChanged) {
1151  LocalChanged = false;
1153  for (auto &BB : F) {
1154  bool Hot = false;
1155  SmallVector<CallBase *, 10> AllCandidates;
1156  SmallVector<CallBase *, 10> ColdCandidates;
1157  for (auto &I : BB) {
1158  const FunctionSamples *FS = nullptr;
1159  if (auto *CB = dyn_cast<CallBase>(&I)) {
1160  if (!isa<IntrinsicInst>(I)) {
1161  if ((FS = findCalleeFunctionSamples(*CB))) {
1162  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1163  "GUIDToFuncNameMap has to be populated");
1164  AllCandidates.push_back(CB);
1165  if (FS->getHeadSamplesEstimate() > 0 ||
1167  LocalNotInlinedCallSites.insert({CB, FS});
1168  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1169  Hot = true;
1170  else if (shouldInlineColdCallee(*CB))
1171  ColdCandidates.push_back(CB);
1172  } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1173  AllCandidates.push_back(CB);
1174  }
1175  }
1176  }
1177  }
1178  if (Hot || ExternalInlineAdvisor) {
1179  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1180  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1181  } else {
1182  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1183  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1184  }
1185  }
1186  for (CallBase *I : CIS) {
1187  Function *CalledFunction = I->getCalledFunction();
1188  InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1189  0 /* dummy count */,
1190  1.0 /* dummy distribution factor */};
1191  // Do not inline recursive calls.
1192  if (CalledFunction == &F)
1193  continue;
1194  if (I->isIndirectCall()) {
1195  uint64_t Sum;
1196  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1197  uint64_t SumOrigin = Sum;
1198  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1199  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1200  PSI->getOrCompHotCountThreshold());
1201  continue;
1202  }
1203  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1204  continue;
1205 
1206  Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
1207  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1208  LocalNotInlinedCallSites.erase(I);
1209  LocalChanged = true;
1210  }
1211  }
1212  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1213  !CalledFunction->isDeclaration()) {
1214  if (tryInlineCandidate(Candidate)) {
1215  LocalNotInlinedCallSites.erase(I);
1216  LocalChanged = true;
1217  }
1218  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1219  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1220  InlinedGUIDs, SymbolMap,
1221  PSI->getOrCompHotCountThreshold());
1222  }
1223  }
1224  Changed |= LocalChanged;
1225  }
1226 
1227  // For CS profile, profile for not inlined context will be merged when
1228  // base profile is being retrieved.
1230  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1231  return Changed;
1232 }
1233 
1234 bool SampleProfileLoader::tryInlineCandidate(
1235  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1236  // Do not attempt to inline a candidate if
1237  // --disable-sample-loader-inlining is true.
1239  return false;
1240 
1241  CallBase &CB = *Candidate.CallInstr;
1242  Function *CalledFunction = CB.getCalledFunction();
1243  assert(CalledFunction && "Expect a callee with definition");
1244  DebugLoc DLoc = CB.getDebugLoc();
1245  BasicBlock *BB = CB.getParent();
1246 
1247  InlineCost Cost = shouldInlineCandidate(Candidate);
1248  if (Cost.isNever()) {
1249  ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1250  "InlineFail", DLoc, BB)
1251  << "incompatible inlining");
1252  return false;
1253  }
1254 
1255  if (!Cost)
1256  return false;
1257 
1258  InlineFunctionInfo IFI(nullptr, GetAC);
1259  IFI.UpdateProfile = false;
1260  InlineResult IR = InlineFunction(CB, IFI,
1261  /*MergeAttributes=*/true);
1262  if (!IR.isSuccess())
1263  return false;
1264 
1265  // The call to InlineFunction erases I, so we can't pass it here.
1266  emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
1267  Cost, true, getAnnotatedRemarkPassName());
1268 
1269  // Now populate the list of newly exposed call sites.
1270  if (InlinedCallSites) {
1271  InlinedCallSites->clear();
1272  for (auto &I : IFI.InlinedCallSites)
1273  InlinedCallSites->push_back(I);
1274  }
1275 
1277  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1278  ++NumCSInlined;
1279 
1280  // Prorate inlined probes for a duplicated inlining callsite which probably
1281  // has a distribution less than 100%. Samples for an inlinee should be
1282  // distributed among the copies of the original callsite based on each
1283  // callsite's distribution factor for counts accuracy. Note that an inlined
1284  // probe may come with its own distribution factor if it has been duplicated
1285  // in the inlinee body. The two factor are multiplied to reflect the
1286  // aggregation of duplication.
1287  if (Candidate.CallsiteDistribution < 1) {
1288  for (auto &I : IFI.InlinedCallSites) {
1289  if (std::optional<PseudoProbe> Probe = extractProbe(*I))
1290  setProbeDistributionFactor(*I, Probe->Factor *
1291  Candidate.CallsiteDistribution);
1292  }
1293  NumDuplicatedInlinesite++;
1294  }
1295 
1296  return true;
1297 }
1298 
1299 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1300  CallBase *CB) {
1301  assert(CB && "Expect non-null call instruction");
1302 
1303  if (isa<IntrinsicInst>(CB))
1304  return false;
1305 
1306  // Find the callee's profile. For indirect call, find hottest target profile.
1307  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1308  // If ExternalInlineAdvisor wants to inline this site, do so even
1309  // if Samples are not present.
1310  if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1311  return false;
1312 
1313  float Factor = 1.0;
1314  if (std::optional<PseudoProbe> Probe = extractProbe(*CB))
1315  Factor = Probe->Factor;
1316 
1317  uint64_t CallsiteCount =
1318  CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
1319  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1320  return true;
1321 }
1322 
1323 std::optional<InlineCost>
1324 SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1325  std::unique_ptr<InlineAdvice> Advice = nullptr;
1326  if (ExternalInlineAdvisor) {
1327  Advice = ExternalInlineAdvisor->getAdvice(CB);
1328  if (Advice) {
1329  if (!Advice->isInliningRecommended()) {
1330  Advice->recordUnattemptedInlining();
1331  return InlineCost::getNever("not previously inlined");
1332  }
1333  Advice->recordInlining();
1334  return InlineCost::getAlways("previously inlined");
1335  }
1336  }
1337 
1338  return {};
1339 }
1340 
1341 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1342  std::optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1343  return Cost ? !!Cost.value() : false;
1344 }
1345 
1346 InlineCost
1347 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1348  if (std::optional<InlineCost> ReplayCost =
1349  getExternalInlineAdvisorCost(*Candidate.CallInstr))
1350  return ReplayCost.value();
1351  // Adjust threshold based on call site hotness, only do this for callsite
1352  // prioritized inliner because otherwise cost-benefit check is done earlier.
1353  int SampleThreshold = SampleColdCallSiteThreshold;
1355  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1356  SampleThreshold = SampleHotCallSiteThreshold;
1357  else if (!ProfileSizeInline)
1358  return InlineCost::getNever("cold callsite");
1359  }
1360 
1361  Function *Callee = Candidate.CallInstr->getCalledFunction();
1362  assert(Callee && "Expect a definition for inline candidate of direct call");
1363 
1364  InlineParams Params = getInlineParams();
1365  // We will ignore the threshold from inline cost, so always get full cost.
1366  Params.ComputeFullInlineCost = true;
1368  // Checks if there is anything in the reachable portion of the callee at
1369  // this callsite that makes this inlining potentially illegal. Need to
1370  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1371  // when cost exceeds threshold without checking all IRs in the callee.
1372  // The acutal cost does not matter because we only checks isNever() to
1373  // see if it is legal to inline the callsite.
1374  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1375  GetTTI(*Callee), GetAC, GetTLI);
1376 
1377  // Honor always inline and never inline from call analyzer
1378  if (Cost.isNever() || Cost.isAlways())
1379  return Cost;
1380 
1381  // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1382  // decisions based on hotness as well as accurate function byte sizes for
1383  // given context using function/inlinee sizes from previous build. It
1384  // stores the decision in profile, and also adjust/merge context profile
1385  // aiming at better context-sensitive post-inline profile quality, assuming
1386  // all inline decision estimates are going to be honored by compiler. Here
1387  // we replay that inline decision under `sample-profile-use-preinliner`.
1388  // Note that we don't need to handle negative decision from preinliner as
1389  // context profile for not inlined calls are merged by preinliner already.
1390  if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1391  // Once two node are merged due to promotion, we're losing some context
1392  // so the original context-sensitive preinliner decision should be ignored
1393  // for SyntheticContext.
1394  SampleContext &Context = Candidate.CalleeSamples->getContext();
1395  if (!Context.hasState(SyntheticContext) &&
1396  Context.hasAttribute(ContextShouldBeInlined))
1397  return InlineCost::getAlways("preinliner");
1398  }
1399 
1400  // For old FDO inliner, we inline the call site as long as cost is not
1401  // "Never". The cost-benefit check is done earlier.
1403  return InlineCost::get(Cost.getCost(), INT_MAX);
1404  }
1405 
1406  // Otherwise only use the cost from call analyzer, but overwite threshold with
1407  // Sample PGO threshold.
1408  return InlineCost::get(Cost.getCost(), SampleThreshold);
1409 }
1410 
1411 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1412  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1413  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1414  // Profile symbol list is ignored when profile-sample-accurate is on.
1415  assert((!ProfAccForSymsInList ||
1417  !F.hasFnAttribute("profile-sample-accurate"))) &&
1418  "ProfAccForSymsInList should be false when profile-sample-accurate "
1419  "is enabled");
1420 
1421  // Populating worklist with initial call sites from root inliner, along
1422  // with call site weights.
1423  CandidateQueue CQueue;
1424  InlineCandidate NewCandidate;
1425  for (auto &BB : F) {
1426  for (auto &I : BB) {
1427  auto *CB = dyn_cast<CallBase>(&I);
1428  if (!CB)
1429  continue;
1430  if (getInlineCandidate(&NewCandidate, CB))
1431  CQueue.push(NewCandidate);
1432  }
1433  }
1434 
1435  // Cap the size growth from profile guided inlining. This is needed even
1436  // though cost of each inline candidate already accounts for callee size,
1437  // because with top-down inlining, we can grow inliner size significantly
1438  // with large number of smaller inlinees each pass the cost check.
1440  "Max inline size limit should not be smaller than min inline size "
1441  "limit.");
1442  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1445  if (ExternalInlineAdvisor)
1447 
1448  MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1449 
1450  // Perform iterative BFS call site prioritized inlining
1451  bool Changed = false;
1452  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1453  InlineCandidate Candidate = CQueue.top();
1454  CQueue.pop();
1455  CallBase *I = Candidate.CallInstr;
1456  Function *CalledFunction = I->getCalledFunction();
1457 
1458  if (CalledFunction == &F)
1459  continue;
1460  if (I->isIndirectCall()) {
1461  uint64_t Sum = 0;
1462  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1463  uint64_t SumOrigin = Sum;
1464  Sum *= Candidate.CallsiteDistribution;
1465  unsigned ICPCount = 0;
1466  for (const auto *FS : CalleeSamples) {
1467  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1468  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1469  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1470  PSI->getOrCompHotCountThreshold());
1471  continue;
1472  }
1473  uint64_t EntryCountDistributed =
1474  FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1475  // In addition to regular inline cost check, we also need to make sure
1476  // ICP isn't introducing excessive speculative checks even if individual
1477  // target looks beneficial to promote and inline. That means we should
1478  // only do ICP when there's a small number dominant targets.
1479  if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1480  EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1481  break;
1482  // TODO: Fix CallAnalyzer to handle all indirect calls.
1483  // For indirect call, we don't run CallAnalyzer to get InlineCost
1484  // before actual inlining. This is because we could see two different
1485  // types from the same definition, which makes CallAnalyzer choke as
1486  // it's expecting matching parameter type on both caller and callee
1487  // side. See example from PR18962 for the triggering cases (the bug was
1488  // fixed, but we generate different types).
1489  if (!PSI->isHotCount(EntryCountDistributed))
1490  break;
1491  SmallVector<CallBase *, 8> InlinedCallSites;
1492  // Attach function profile for promoted indirect callee, and update
1493  // call site count for the promoted inline candidate too.
1494  Candidate = {I, FS, EntryCountDistributed,
1495  Candidate.CallsiteDistribution};
1496  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1497  &InlinedCallSites)) {
1498  for (auto *CB : InlinedCallSites) {
1499  if (getInlineCandidate(&NewCandidate, CB))
1500  CQueue.emplace(NewCandidate);
1501  }
1502  ICPCount++;
1503  Changed = true;
1504  } else if (!ContextTracker) {
1505  LocalNotInlinedCallSites.insert({I, FS});
1506  }
1507  }
1508  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1509  !CalledFunction->isDeclaration()) {
1510  SmallVector<CallBase *, 8> InlinedCallSites;
1511  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1512  for (auto *CB : InlinedCallSites) {
1513  if (getInlineCandidate(&NewCandidate, CB))
1514  CQueue.emplace(NewCandidate);
1515  }
1516  Changed = true;
1517  } else if (!ContextTracker) {
1518  LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
1519  }
1520  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1521  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1522  InlinedGUIDs, SymbolMap,
1523  PSI->getOrCompHotCountThreshold());
1524  }
1525  }
1526 
1527  if (!CQueue.empty()) {
1528  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1529  ++NumCSInlinedHitMaxLimit;
1530  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1531  ++NumCSInlinedHitMinLimit;
1532  else
1533  ++NumCSInlinedHitGrowthLimit;
1534  }
1535 
1536  // For CS profile, profile for not inlined context will be merged when
1537  // base profile is being retrieved.
1539  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1540  return Changed;
1541 }
1542 
1543 void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1545  const Function &F) {
1546  // Accumulate not inlined callsite information into notInlinedSamples
1547  for (const auto &Pair : NonInlinedCallSites) {
1548  CallBase *I = Pair.first;
1549  Function *Callee = I->getCalledFunction();
1550  if (!Callee || Callee->isDeclaration())
1551  continue;
1552 
1553  ORE->emit(
1554  OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1555  I->getDebugLoc(), I->getParent())
1556  << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1557  << "' into '" << ore::NV("Caller", &F) << "'");
1558 
1559  ++NumCSNotInlined;
1560  const FunctionSamples *FS = Pair.second;
1561  if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
1562  continue;
1563  }
1564 
1565  // Do not merge a context that is already duplicated into the base profile.
1566  if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1567  continue;
1568 
1569  if (ProfileMergeInlinee) {
1570  // A function call can be replicated by optimizations like callsite
1571  // splitting or jump threading and the replicates end up sharing the
1572  // sample nested callee profile instead of slicing the original
1573  // inlinee's profile. We want to do merge exactly once by filtering out
1574  // callee profiles with a non-zero head sample count.
1575  if (FS->getHeadSamples() == 0) {
1576  // Use entry samples as head samples during the merge, as inlinees
1577  // don't have head samples.
1578  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1579  FS->getHeadSamplesEstimate());
1580 
1581  // Note that we have to do the merge right after processing function.
1582  // This allows OutlineFS's profile to be used for annotation during
1583  // top-down processing of functions' annotation.
1584  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1585  OutlineFS->merge(*FS, 1);
1586  // Set outlined profile to be synthetic to not bias the inliner.
1587  OutlineFS->SetContextSynthetic();
1588  }
1589  } else {
1590  auto pair =
1591  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1592  pair.first->second.entryCount += FS->getHeadSamplesEstimate();
1593  }
1594  }
1595 }
1596 
1597 /// Returns the sorted CallTargetMap \p M by count in descending order.
1601  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1602  R.emplace_back(
1603  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1604  }
1605  return R;
1606 }
1607 
1608 // Generate MD_prof metadata for every branch instruction using the
1609 // edge weights computed during propagation.
1610 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1611  // Generate MD_prof metadata for every branch instruction using the
1612  // edge weights computed during propagation.
1613  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1614  LLVMContext &Ctx = F.getContext();
1615  MDBuilder MDB(Ctx);
1616  for (auto &BI : F) {
1617  BasicBlock *BB = &BI;
1618 
1619  if (BlockWeights[BB]) {
1620  for (auto &I : *BB) {
1621  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1622  continue;
1623  if (!cast<CallBase>(I).getCalledFunction()) {
1624  const DebugLoc &DLoc = I.getDebugLoc();
1625  if (!DLoc)
1626  continue;
1627  const DILocation *DIL = DLoc;
1628  const FunctionSamples *FS = findFunctionSamples(I);
1629  if (!FS)
1630  continue;
1632  auto T = FS->findCallTargetMapAt(CallSite);
1633  if (!T || T.get().empty())
1634  continue;
1636  // Prorate the callsite counts based on the pre-ICP distribution
1637  // factor to reflect what is already done to the callsite before
1638  // ICP, such as calliste cloning.
1639  if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
1640  if (Probe->Factor < 1)
1641  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1642  }
1643  }
1644  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1646  uint64_t Sum = 0;
1647  for (const auto &C : T.get())
1648  Sum += C.second;
1649  // With CSSPGO all indirect call targets are counted torwards the
1650  // original indirect call site in the profile, including both
1651  // inlined and non-inlined targets.
1653  if (const FunctionSamplesMap *M =
1654  FS->findFunctionSamplesMapAt(CallSite)) {
1655  for (const auto &NameFS : *M)
1656  Sum += NameFS.second.getHeadSamplesEstimate();
1657  }
1658  }
1659  if (Sum)
1660  updateIDTMetaData(I, SortedCallTargets, Sum);
1661  else if (OverwriteExistingWeights)
1662  I.setMetadata(LLVMContext::MD_prof, nullptr);
1663  } else if (!isa<IntrinsicInst>(&I)) {
1664  I.setMetadata(LLVMContext::MD_prof,
1665  MDB.createBranchWeights(
1666  {static_cast<uint32_t>(BlockWeights[BB])}));
1667  }
1668  }
1670  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1671  // clear it for cold code.
1672  for (auto &I : *BB) {
1673  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1674  if (cast<CallBase>(I).isIndirectCall())
1675  I.setMetadata(LLVMContext::MD_prof, nullptr);
1676  else
1677  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1678  }
1679  }
1680  }
1681 
1682  Instruction *TI = BB->getTerminator();
1683  if (TI->getNumSuccessors() == 1)
1684  continue;
1685  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1686  !isa<IndirectBrInst>(TI))
1687  continue;
1688 
1689  DebugLoc BranchLoc = TI->getDebugLoc();
1690  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1691  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1692  : Twine("<UNKNOWN LOCATION>"))
1693  << ".\n");
1694  SmallVector<uint32_t, 4> Weights;
1695  uint32_t MaxWeight = 0;
1696  Instruction *MaxDestInst;
1697  // Since profi treats multiple edges (multiway branches) as a single edge,
1698  // we need to distribute the computed weight among the branches. We do
1699  // this by evenly splitting the edge weight among destinations.
1700  DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1701  std::vector<uint64_t> EdgeIndex;
1702  if (SampleProfileUseProfi) {
1703  EdgeIndex.resize(TI->getNumSuccessors());
1704  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1705  const BasicBlock *Succ = TI->getSuccessor(I);
1706  EdgeIndex[I] = EdgeMultiplicity[Succ];
1707  EdgeMultiplicity[Succ]++;
1708  }
1709  }
1710  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1711  BasicBlock *Succ = TI->getSuccessor(I);
1712  Edge E = std::make_pair(BB, Succ);
1713  uint64_t Weight = EdgeWeights[E];
1714  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1715  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1716  // if needed. Sample counts in profiles are 64-bit unsigned values,
1717  // but internally branch weights are expressed as 32-bit values.
1718  if (Weight > std::numeric_limits<uint32_t>::max()) {
1719  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1721  }
1722  if (!SampleProfileUseProfi) {
1723  // Weight is added by one to avoid propagation errors introduced by
1724  // 0 weights.
1725  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1726  } else {
1727  // Profi creates proper weights that do not require "+1" adjustments but
1728  // we evenly split the weight among branches with the same destination.
1729  uint64_t W = Weight / EdgeMultiplicity[Succ];
1730  // Rounding up, if needed, so that first branches are hotter.
1731  if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1732  W++;
1733  Weights.push_back(static_cast<uint32_t>(W));
1734  }
1735  if (Weight != 0) {
1736  if (Weight > MaxWeight) {
1737  MaxWeight = Weight;
1738  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1739  }
1740  }
1741  }
1742 
1743  misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1744 
1745  uint64_t TempWeight;
1746  // Only set weights if there is at least one non-zero weight.
1747  // In any other case, let the analyzer set weights.
1748  // Do not set weights if the weights are present unless under
1749  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1750  // twice. If the first annotation already set the weights, the second pass
1751  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1752  // weight should have their existing metadata (possibly annotated by LTO
1753  // prelink) cleared.
1754  if (MaxWeight > 0 &&
1755  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1756  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1757  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1758  ORE->emit([&]() {
1759  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1760  << "most popular destination for conditional branches at "
1761  << ore::NV("CondBranchesLoc", BranchLoc);
1762  });
1763  } else {
1765  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1766  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1767  } else {
1768  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1769  }
1770  }
1771  }
1772 }
1773 
1774 /// Once all the branch weights are computed, we emit the MD_prof
1775 /// metadata on BB using the computed values for each of its branches.
1776 ///
1777 /// \param F The function to query.
1778 ///
1779 /// \returns true if \p F was modified. Returns false, otherwise.
1780 bool SampleProfileLoader::emitAnnotations(Function &F) {
1781  bool Changed = false;
1782 
1784  if (!ProbeManager->profileIsValid(F, *Samples)) {
1785  LLVM_DEBUG(
1786  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1787  << F.getName());
1788  ++NumMismatchedProfile;
1789  return false;
1790  }
1791  ++NumMatchedProfile;
1792  } else {
1793  if (getFunctionLoc(F) == 0)
1794  return false;
1795 
1796  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1797  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1798  }
1799 
1800  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1802  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1803  else
1804  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1805 
1806  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1807 
1808  if (Changed)
1809  generateMDProfMetadata(F);
1810 
1811  emitCoverageRemarks(F);
1812  return Changed;
1813 }
1814 
1815 std::unique_ptr<ProfiledCallGraph>
1816 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1817  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1819  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1820  else
1821  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1822 
1823  // Add all functions into the profiled call graph even if they are not in
1824  // the profile. This makes sure functions missing from the profile still
1825  // gets a chance to be processed.
1826  for (auto &Node : CG) {
1827  const auto *F = Node.first;
1828  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1829  continue;
1830  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1831  }
1832 
1833  return ProfiledCG;
1834 }
1835 
1836 std::vector<Function *>
1837 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1838  std::vector<Function *> FunctionOrderList;
1839  FunctionOrderList.reserve(M.size());
1840 
1842  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1843  "together with -sample-profile-top-down-load.\n";
1844 
1845  if (!ProfileTopDownLoad || CG == nullptr) {
1846  if (ProfileMergeInlinee) {
1847  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1848  // because the profile for a function may be used for the profile
1849  // annotation of its outline copy before the profile merging of its
1850  // non-inlined inline instances, and that is not the way how
1851  // ProfileMergeInlinee is supposed to work.
1852  ProfileMergeInlinee = false;
1853  }
1854 
1855  for (Function &F : M)
1856  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1857  FunctionOrderList.push_back(&F);
1858  return FunctionOrderList;
1859  }
1860 
1861  assert(&CG->getModule() == &M);
1862 
1865  // Use profiled call edges to augment the top-down order. There are cases
1866  // that the top-down order computed based on the static call graph doesn't
1867  // reflect real execution order. For example
1868  //
1869  // 1. Incomplete static call graph due to unknown indirect call targets.
1870  // Adjusting the order by considering indirect call edges from the
1871  // profile can enable the inlining of indirect call targets by allowing
1872  // the caller processed before them.
1873  // 2. Mutual call edges in an SCC. The static processing order computed for
1874  // an SCC may not reflect the call contexts in the context-sensitive
1875  // profile, thus may cause potential inlining to be overlooked. The
1876  // function order in one SCC is being adjusted to a top-down order based
1877  // on the profile to favor more inlining. This is only a problem with CS
1878  // profile.
1879  // 3. Transitive indirect call edges due to inlining. When a callee function
1880  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1881  // every call edge originated from the callee B will be transferred to
1882  // the caller A. If any transferred edge (say A->C) is indirect, the
1883  // original profiled indirect edge B->C, even if considered, would not
1884  // enforce a top-down order from the caller A to the potential indirect
1885  // call target C in LTO postlink since the inlined callee B is gone from
1886  // the static call graph.
1887  // 4. #3 can happen even for direct call targets, due to functions defined
1888  // in header files. A header function (say A), when included into source
1889  // files, is defined multiple times but only one definition survives due
1890  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1891  // definitions can be useless based on a local file scope. More
1892  // importantly, the inlinee (say B), once fully inlined to a
1893  // to-be-dropped A, will have no profile to consume when its outlined
1894  // version is compiled. This can lead to a profile-less prelink
1895  // compilation for the outlined version of B which may be called from
1896  // external modules. while this isn't easy to fix, we rely on the
1897  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1898  // the A can be inlined in its local scope in prelink, it may not exist
1899  // in the merged IR in postlink, and we'll need the profiled call edges
1900  // to enforce a top-down order for the rest of the functions.
1901  //
1902  // Considering those cases, a profiled call graph completely independent of
1903  // the static call graph is constructed based on profile data, where
1904  // function objects are not even needed to handle case #3 and case 4.
1905  //
1906  // Note that static callgraph edges are completely ignored since they
1907  // can be conflicting with profiled edges for cyclic SCCs and may result in
1908  // an SCC order incompatible with profile-defined one. Using strictly
1909  // profile order ensures a maximum inlining experience. On the other hand,
1910  // static call edges are not so important when they don't correspond to a
1911  // context in the profile.
1912 
1913  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1914  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1915  while (!CGI.isAtEnd()) {
1916  auto Range = *CGI;
1917  if (SortProfiledSCC) {
1918  // Sort nodes in one SCC based on callsite hotness.
1920  Range = *SI;
1921  }
1922  for (auto *Node : Range) {
1923  Function *F = SymbolMap.lookup(Node->Name);
1924  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1925  FunctionOrderList.push_back(F);
1926  }
1927  ++CGI;
1928  }
1929  } else {
1931  while (!CGI.isAtEnd()) {
1932  for (CallGraphNode *Node : *CGI) {
1933  auto *F = Node->getFunction();
1934  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1935  FunctionOrderList.push_back(F);
1936  }
1937  ++CGI;
1938  }
1939  }
1940 
1941  LLVM_DEBUG({
1942  dbgs() << "Function processing order:\n";
1943  for (auto F : reverse(FunctionOrderList)) {
1944  dbgs() << F->getName() << "\n";
1945  }
1946  });
1947 
1948  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1949  return FunctionOrderList;
1950 }
1951 
1952 bool SampleProfileLoader::doInitialization(Module &M,
1954  auto &Ctx = M.getContext();
1955 
1956  auto ReaderOrErr = SampleProfileReader::create(
1957  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1958  if (std::error_code EC = ReaderOrErr.getError()) {
1959  std::string Msg = "Could not open profile: " + EC.message();
1960  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1961  return false;
1962  }
1963  Reader = std::move(ReaderOrErr.get());
1965  // set module before reading the profile so reader may be able to only
1966  // read the function profiles which are used by the current module.
1967  Reader->setModule(&M);
1968  if (std::error_code EC = Reader->read()) {
1969  std::string Msg = "profile reading failed: " + EC.message();
1970  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1971  return false;
1972  }
1973 
1974  PSL = Reader->getProfileSymbolList();
1975 
1976  // While profile-sample-accurate is on, ignore symbol list.
1977  ProfAccForSymsInList =
1979  if (ProfAccForSymsInList) {
1980  NamesInProfile.clear();
1981  if (auto NameTable = Reader->getNameTable())
1982  NamesInProfile.insert(NameTable->begin(), NameTable->end());
1983  CoverageTracker.setProfAccForSymsInList(true);
1984  }
1985 
1986  if (FAM && !ProfileInlineReplayFile.empty()) {
1987  ExternalInlineAdvisor = getReplayInlineAdvisor(
1988  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
1993  /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
1994  }
1995 
1996  // Apply tweaks if context-sensitive or probe-based profile is available.
1997  if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
1998  Reader->profileIsProbeBased()) {
2000  UseIterativeBFIInference = true;
2002  SampleProfileUseProfi = true;
2005  // Enable priority-base inliner and size inline by default for CSSPGO.
2007  ProfileSizeInline = true;
2010  // For CSSPGO, we also allow recursive inline to best use context profile.
2012  AllowRecursiveInline = true;
2013 
2014  if (Reader->profileIsPreInlined()) {
2016  UsePreInlinerDecision = true;
2017  }
2018 
2019  if (!Reader->profileIsCS()) {
2020  // Non-CS profile should be fine without a function size budget for the
2021  // inliner since the contexts in the profile are either all from inlining
2022  // in the prevoius build or pre-computed by the preinliner with a size
2023  // cap, thus they are bounded.
2024  if (!ProfileInlineLimitMin.getNumOccurrences())
2026  if (!ProfileInlineLimitMax.getNumOccurrences())
2028  }
2029  }
2030 
2031  if (Reader->profileIsCS()) {
2032  // Tracker for profiles under different context
2033  ContextTracker = std::make_unique<SampleContextTracker>(
2034  Reader->getProfiles(), &GUIDToFuncNameMap);
2035  }
2036 
2037  // Load pseudo probe descriptors for probe-based function samples.
2038  if (Reader->profileIsProbeBased()) {
2039  ProbeManager = std::make_unique<PseudoProbeManager>(M);
2040  if (!ProbeManager->moduleIsProbed(M)) {
2041  const char *Msg =
2042  "Pseudo-probe-based profile requires SampleProfileProbePass";
2043  Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2044  DS_Warning));
2045  return false;
2046  }
2047  }
2048 
2050  MatchingManager =
2051  std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
2052  }
2053 
2054  return true;
2055 }
2056 
2057 void SampleProfileMatcher::detectProfileMismatch(const Function &F,
2058  const FunctionSamples &FS) {
2060  uint64_t Count = FS.getTotalSamples();
2061  TotalFuncHashSamples += Count;
2062  TotalProfiledFunc++;
2063  if (!ProbeManager->profileIsValid(F, FS)) {
2064  MismatchedFuncHashSamples += Count;
2065  NumMismatchedFuncHash++;
2066  return;
2067  }
2068  }
2069 
2070  std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
2071 
2072  // Go through all the callsites on the IR and flag the callsite if the target
2073  // name is the same as the one in the profile.
2074  for (auto &BB : F) {
2075  for (auto &I : BB) {
2076  if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
2077  continue;
2078 
2079  const auto *CB = dyn_cast<CallBase>(&I);
2080  if (auto &DLoc = I.getDebugLoc()) {
2082 
2083  StringRef CalleeName;
2084  if (Function *Callee = CB->getCalledFunction())
2085  CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
2086 
2087  const auto CTM = FS.findCallTargetMapAt(IRCallsite);
2088  const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite);
2089 
2090  // Indirect call case.
2091  if (CalleeName.empty()) {
2092  // Since indirect call does not have the CalleeName, check
2093  // conservatively if callsite in the profile is a callsite location.
2094  // This is to avoid nums of false positive since otherwise all the
2095  // indirect call samples will be reported as mismatching.
2096  if ((CTM && !CTM->empty()) || (CallsiteFS && !CallsiteFS->empty()))
2097  MatchedCallsiteLocs.insert(IRCallsite);
2098  } else {
2099  // Check if the call target name is matched for direct call case.
2100  if ((CTM && CTM->count(CalleeName)) ||
2101  (CallsiteFS && CallsiteFS->count(CalleeName)))
2102  MatchedCallsiteLocs.insert(IRCallsite);
2103  }
2104  }
2105  }
2106  }
2107 
2108  auto isInvalidLineOffset = [](uint32_t LineOffset) {
2109  return LineOffset & 0x8000;
2110  };
2111 
2112  // Check if there are any callsites in the profile that does not match to any
2113  // IR callsites, those callsite samples will be discarded.
2114  for (auto &I : FS.getBodySamples()) {
2115  const LineLocation &Loc = I.first;
2116  if (isInvalidLineOffset(Loc.LineOffset))
2117  continue;
2118 
2119  uint64_t Count = I.second.getSamples();
2120  if (!I.second.getCallTargets().empty()) {
2121  TotalCallsiteSamples += Count;
2122  TotalProfiledCallsite++;
2123  if (!MatchedCallsiteLocs.count(Loc)) {
2124  MismatchedCallsiteSamples += Count;
2125  NumMismatchedCallsite++;
2126  }
2127  }
2128  }
2129 
2130  for (auto &I : FS.getCallsiteSamples()) {
2131  const LineLocation &Loc = I.first;
2132  if (isInvalidLineOffset(Loc.LineOffset))
2133  continue;
2134 
2135  uint64_t Count = 0;
2136  for (auto &FM : I.second) {
2137  Count += FM.second.getTotalSamples();
2138  }
2139  TotalCallsiteSamples += Count;
2140  TotalProfiledCallsite++;
2141  if (!MatchedCallsiteLocs.count(Loc)) {
2142  MismatchedCallsiteSamples += Count;
2143  NumMismatchedCallsite++;
2144  }
2145  }
2146 }
2147 
2148 void SampleProfileMatcher::detectProfileMismatch() {
2149  for (auto &F : M) {
2150  if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
2151  continue;
2152  FunctionSamples *FS = Reader.getSamplesFor(F);
2153  if (!FS)
2154  continue;
2155  detectProfileMismatch(F, *FS);
2156  }
2157 
2158  if (ReportProfileStaleness) {
2160  errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")"
2161  << " of functions' profile are invalid and "
2162  << " (" << MismatchedFuncHashSamples << "/" << TotalFuncHashSamples
2163  << ")"
2164  << " of samples are discarded due to function hash mismatch.\n";
2165  }
2166  errs() << "(" << NumMismatchedCallsite << "/" << TotalProfiledCallsite
2167  << ")"
2168  << " of callsites' profile are invalid and "
2169  << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
2170  << ")"
2171  << " of samples are discarded due to callsite location mismatch.\n";
2172  }
2173 
2175  LLVMContext &Ctx = M.getContext();
2176  MDBuilder MDB(Ctx);
2177 
2180  ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
2181  ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
2182  ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
2183  MismatchedFuncHashSamples);
2184  ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
2185  }
2186  ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
2187  MismatchedCallsiteSamples);
2188  ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);
2189 
2190  auto *MD = MDB.createLLVMStats(ProfStatsVec);
2191  auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
2192  NMD->addOperand(MD);
2193  }
2194 }
2195 
2196 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
2197  ProfileSummaryInfo *_PSI, CallGraph *CG) {
2198  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2199 
2200  PSI = _PSI;
2201  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2202  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2204  PSI->refresh();
2205  }
2206  // Compute the total number of samples collected in this profile.
2207  for (const auto &I : Reader->getProfiles())
2208  TotalCollectedSamples += I.second.getTotalSamples();
2209 
2210  auto Remapper = Reader->getRemapper();
2211  // Populate the symbol map.
2212  for (const auto &N_F : M.getValueSymbolTable()) {
2213  StringRef OrigName = N_F.getKey();
2214  Function *F = dyn_cast<Function>(N_F.getValue());
2215  if (F == nullptr || OrigName.empty())
2216  continue;
2217  SymbolMap[OrigName] = F;
2219  if (OrigName != NewName && !NewName.empty()) {
2220  auto r = SymbolMap.insert(std::make_pair(NewName, F));
2221  // Failiing to insert means there is already an entry in SymbolMap,
2222  // thus there are multiple functions that are mapped to the same
2223  // stripped name. In this case of name conflicting, set the value
2224  // to nullptr to avoid confusion.
2225  if (!r.second)
2226  r.first->second = nullptr;
2227  OrigName = NewName;
2228  }
2229  // Insert the remapped names into SymbolMap.
2230  if (Remapper) {
2231  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2232  if (*MapName != OrigName && !MapName->empty())
2233  SymbolMap.insert(std::make_pair(*MapName, F));
2234  }
2235  }
2236  }
2237  assert(SymbolMap.count(StringRef()) == 0 &&
2238  "No empty StringRef should be added in SymbolMap");
2239 
2241  MatchingManager->detectProfileMismatch();
2242 
2243  bool retval = false;
2244  for (auto *F : buildFunctionOrder(M, CG)) {
2245  assert(!F->isDeclaration());
2246  clearFunctionData();
2247  retval |= runOnFunction(*F, AM);
2248  }
2249 
2250  // Account for cold calls not inlined....
2252  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2253  notInlinedCallInfo)
2254  updateProfileCallee(pair.first, pair.second.entryCount);
2255 
2256  return retval;
2257 }
2258 
2260  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2261  DILocation2SampleMap.clear();
2262  // By default the entry count is initialized to -1, which will be treated
2263  // conservatively by getEntryCount as the same as unknown (None). This is
2264  // to avoid newly added code to be treated as cold. If we have samples
2265  // this will be overwritten in emitAnnotations.
2266  uint64_t initialEntryCount = -1;
2267 
2268  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2269  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2270  // initialize all the function entry counts to 0. It means all the
2271  // functions without profile will be regarded as cold.
2272  initialEntryCount = 0;
2273  // profile-sample-accurate is a user assertion which has a higher precedence
2274  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2275  ProfAccForSymsInList = false;
2276  }
2277  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2278 
2279  // PSL -- profile symbol list include all the symbols in sampled binary.
2280  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2281  // old functions without samples being cold, without having to worry
2282  // about new and hot functions being mistakenly treated as cold.
2283  if (ProfAccForSymsInList) {
2284  // Initialize the entry count to 0 for functions in the list.
2285  if (PSL->contains(F.getName()))
2286  initialEntryCount = 0;
2287 
2288  // Function in the symbol list but without sample will be regarded as
2289  // cold. To minimize the potential negative performance impact it could
2290  // have, we want to be a little conservative here saying if a function
2291  // shows up in the profile, no matter as outline function, inline instance
2292  // or call targets, treat the function as not being cold. This will handle
2293  // the cases such as most callsites of a function are inlined in sampled
2294  // binary but not inlined in current build (because of source code drift,
2295  // imprecise debug information, or the callsites are all cold individually
2296  // but not cold accumulatively...), so the outline function showing up as
2297  // cold in sampled binary will actually not be cold after current build.
2299  if (NamesInProfile.count(CanonName))
2300  initialEntryCount = -1;
2301  }
2302 
2303  // Initialize entry count when the function has no existing entry
2304  // count value.
2305  if (!F.getEntryCount())
2306  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2307  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2308  if (AM) {
2309  auto &FAM =
2311  .getManager();
2313  } else {
2314  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2315  ORE = OwnedORE.get();
2316  }
2317 
2319  Samples = ContextTracker->getBaseSamplesFor(F);
2320  else
2321  Samples = Reader->getSamplesFor(F);
2322 
2323  if (Samples && !Samples->empty())
2324  return emitAnnotations(F);
2325  return false;
2326 }
2327 
2329  ModuleAnalysisManager &AM) {
2332 
2333  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2334  return FAM.getResult<AssumptionAnalysis>(F);
2335  };
2336  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2337  return FAM.getResult<TargetIRAnalysis>(F);
2338  };
2339  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2341  };
2342 
2343  SampleProfileLoader SampleLoader(
2344  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2345  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2346  : ProfileRemappingFileName,
2347  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2348 
2349  if (!SampleLoader.doInitialization(M, &FAM))
2350  return PreservedAnalyses::all();
2351 
2354  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2355  return PreservedAnalyses::all();
2356 
2357  return PreservedAnalyses::none();
2358 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AnnotateSampleProfileInlinePhase
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
Instrumentation.h
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:922
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2592
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:2328
EnableExtTspBlockPlacement
cl::opt< bool > EnableExtTspBlockPlacement
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition: Instruction.cpp:810
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::InlineParams::ComputeFullInlineCost
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:233
AllowRecursiveInline
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
llvm::sampleprof::ContextDuplicatedIntoBase
@ ContextDuplicatedIntoBase
Definition: SampleProf.h:451
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:1118
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:304
llvm::InlineParams::AllowRecursiveCall
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:239
llvm::sampleprof::FunctionSamples::ProfileIsCS
static bool ProfileIsCS
Definition: SampleProf.h:1120
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
T
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:474
llvm::sampleprof::SampleContext::hasAttribute
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:588
llvm::Function
Definition: Function.h:60
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:98
CurrentReader
LVReader * CurrentReader
Definition: LVReader.cpp:153
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:2047
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:81
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1625
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
MapVector.h
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:131
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ProfileICPRelativeHotnessSkip
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
llvm::emitInlinedIntoBasedOnCost
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
Definition: InlineAdvisor.cpp:502
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1599
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
PersistProfileStaleness
static cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
llvm::sampleprof::ContextShouldBeInlined
@ ContextShouldBeInlined
Definition: SampleProf.h:450
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:840
Module.h
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:206
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:987
llvm::CallSiteFormat::Format::LineDiscriminator
@ LineDiscriminator
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:1017
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:710
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:500
llvm::scc_member_iterator
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
Definition: SCCIterator.h:252
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
ProfileInlineReplayFallback
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
llvm::ReplayInlinerSettings::Fallback::Original
@ Original
ProfileSampleBlockAccurate
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
UsePreInlinerDecision
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
llvm::sampleprof::LineLocation::LineOffset
uint32_t LineOffset
Definition: SampleProf.h:302
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::sampleprof::FunctionSamples::SetContextSynthetic
void SetContextSynthetic()
Definition: SampleProf.h:815
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:1020
BlockFrequencyInfoImpl.h
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1539
GlobalValue.h
DisableSampleLoaderInlining
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:266
llvm::sampleprof::SampleProfileReader::profileIsCS
bool profileIsCS() const
Whether input profile is fully context-sensitive.
Definition: SampleProfReader.h:477
SortProfiledSCC
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2822
InlinePriorityMode::Cost
@ Cost
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:80
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition: InlineCost.h:120
llvm::sampleprof::SyntheticContext
@ SyntheticContext
Definition: SampleProf.h:441
SI
@ SI
Definition: SIInstrInfo.cpp:7985
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:90
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1906
TargetLibraryInfo.h
llvm::sampleprof::FunctionSamples::getHeadSamplesEstimate
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
Definition: SampleProf.h:898
DenseSet.h
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:113
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:1144
SampleProf.h
InlineAdvisor.h
ProfileInlineReplayFormat
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:97
llvm::CallSiteFormat::Format::LineColumnDiscriminator
@ LineColumnDiscriminator
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
InstrProf.h
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ReplayInlinerSettings::Fallback::NeverInline
@ NeverInline
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:66
DebugLoc.h
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:248
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3101
SampleProfileLoaderBaseUtil.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::CallSiteFormat::Format::LineColumn
@ LineColumn
llvm::InlineContext
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:488
llvm::sampleprof::SampleContext
Definition: SampleProf.h:509
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:110
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:232
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:40
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:880
ValueSymbolTable.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1682
SampleProfile.h
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1411
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:426
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:822
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:704
ProfiledCallGraph.h
uint64_t
ProfileSummaryInfo.h
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
MisExpect.h
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:398
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:46
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:718
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::PseudoProbeManager
Definition: SampleProfileProbe.h:85
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::DenseMap
Definition: DenseMap.h:714
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:154
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:415
llvm::misexpect::checkExpectAnnotations
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ReplayInlinerSettings::Scope::Module
@ Module
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1861
llvm::X86AS::FS
@ FS
Definition: X86.h:200
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1129
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:99
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1859
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SampleProfileProbe.h
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:496
llvm::sampleprof::LineLocation
Represents the relative location of an instruction.
Definition: SampleProf.h:283
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:235
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:212
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::CallSiteFormat::Format::Line
@ Line
llvm::MapVector::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:118
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::logicalview::LVAttributeKind::Range
@ Range
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MapVector::lookup
ValueT lookup(const KeyT &Key) const
Definition: MapVector.h:111
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::extractProbe
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:49
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:482
uint32_t
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:679
CallPromotionUtils.h
llvm::ContextTrieNode
Definition: SampleContextTracker.h:35
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:780
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:654
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1017
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:33
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:466
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::InlinePass::ReplaySampleProfileInliner
@ ReplaySampleProfileInliner
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::sampleprof::SampleProfileReader::getProfiles
SampleProfileMap & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:441
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:248
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:126
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:851
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:408
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
ProfileInlineReplayScope
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:591
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:68
DiagnosticInfo.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:225
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:203
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:342
PseudoProbe.h
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:947
llvm::cl::value_desc
Definition: CommandLine.h:421
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:502
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:101
llvm::sampleprof::SampleProfileReader::profileIsPreInlined
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
Definition: SampleProfReader.h:480
llvm::InlinePass::SampleProfileInliner
@ SampleProfileInliner
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:1124
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:244
llvm::MDBuilder
Definition: MDBuilder.h:36
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:112
CallGraph.h
llvm::AnnotateInlinePassName
std::string AnnotateInlinePassName(InlineContext IC)
Definition: InlineAdvisor.cpp:580
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:24
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1028
Instructions.h
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:406
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:359
ReportProfileStaleness
static cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1599
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
llvm::ReplayInlinerSettings::Scope::Function
@ Function
TargetTransformInfo.h
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:931
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1062
StringMap.h
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::ReplayInlinerSettings::Fallback::AlwaysInline
@ AlwaysInline
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:180
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:449
llvm::ReplayInlinerSettings
Replay Inliner Setup.
Definition: ReplayInlineAdvisor.h:43
llvm::MapVector::erase
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:174
ProfileICPRelativeHotness
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::getReplayInlineAdvisor
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
Definition: ReplayInlineAdvisor.cpp:80
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:803
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809