LLVM  15.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/PriorityQueue.h"
29 #include "llvm/ADT/SCCIterator.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/ADT/StringMap.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/MDBuilder.h"
56 #include "llvm/IR/Module.h"
57 #include "llvm/IR/PassManager.h"
58 #include "llvm/IR/PseudoProbe.h"
60 #include "llvm/InitializePasses.h"
61 #include "llvm/Pass.h"
65 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/ErrorOr.h"
70 #include "llvm/Transforms/IPO.h"
79 #include <algorithm>
80 #include <cassert>
81 #include <cstdint>
82 #include <functional>
83 #include <limits>
84 #include <map>
85 #include <memory>
86 #include <queue>
87 #include <string>
88 #include <system_error>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 using namespace sampleprof;
94 using namespace llvm::sampleprofutil;
96 #define DEBUG_TYPE "sample-profile"
97 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
98 
99 STATISTIC(NumCSInlined,
100  "Number of functions inlined with context sensitive profile");
101 STATISTIC(NumCSNotInlined,
102  "Number of functions not inlined with context sensitive profile");
103 STATISTIC(NumMismatchedProfile,
104  "Number of functions with CFG mismatched profile");
105 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
106 STATISTIC(NumDuplicatedInlinesite,
107  "Number of inlined callsites with a partial distribution factor");
108 
109 STATISTIC(NumCSInlinedHitMinLimit,
110  "Number of functions with FDO inline stopped due to min size limit");
111 STATISTIC(NumCSInlinedHitMaxLimit,
112  "Number of functions with FDO inline stopped due to max size limit");
113 STATISTIC(
114  NumCSInlinedHitGrowthLimit,
115  "Number of functions with FDO inline stopped due to growth size limit");
116 
117 // Command line option to specify the file to read samples from. This is
118 // mainly used for debugging.
120  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
121  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
122 
123 // The named file contains a set of transformations that may have been applied
124 // to the symbol names between the program from which the sample data was
125 // collected and the current program's symbols.
127  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
128  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
129 
131  "profile-sample-accurate", cl::Hidden, cl::init(false),
132  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
133  "callsite and function as having 0 samples. Otherwise, treat "
134  "un-sampled callsites and functions conservatively as unknown. "));
135 
137  "profile-sample-block-accurate", cl::Hidden, cl::init(false),
138  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
139  "branches and calls as having 0 samples. Otherwise, treat "
140  "them conservatively as unknown. "));
141 
143  "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
144  cl::desc("For symbols in profile symbol list, regard their profiles to "
145  "be accurate. It may be overriden by profile-sample-accurate. "));
146 
148  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
149  cl::desc("Merge past inlinee's profile to outline version if sample "
150  "profile loader decided not to inline a call site. It will "
151  "only be enabled when top-down order of profile loading is "
152  "enabled. "));
153 
155  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
156  cl::desc("Do profile annotation and inlining for functions in top-down "
157  "order of call graph during sample profile loading. It only "
158  "works for new pass manager. "));
159 
160 static cl::opt<bool>
161  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
162  cl::desc("Process functions in a top-down order "
163  "defined by the profiled call graph when "
164  "-sample-profile-top-down-load is on."));
166  SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
167  cl::desc("Sort profiled recursion by edge weights."));
168 
170  "sample-profile-inline-size", cl::Hidden, cl::init(false),
171  cl::desc("Inline cold call sites in profile loader if it's beneficial "
172  "for code size."));
173 
174 // Since profiles are consumed by many passes, turning on this option has
175 // side effects. For instance, pre-link SCC inliner would see merged profiles
176 // and inline the hot functions (that are skipped in this pass).
178  "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
179  cl::desc("If true, artifically skip inline transformation in sample-loader "
180  "pass, and merge (or scale) profiles (as configured by "
181  "--sample-profile-merge-inlinee)."));
182 
184  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
185  cl::desc("The size growth ratio limit for proirity-based sample profile "
186  "loader inlining."));
187 
189  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
190  cl::desc("The lower bound of size growth limit for "
191  "proirity-based sample profile loader inlining."));
192 
194  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
195  cl::desc("The upper bound of size growth limit for "
196  "proirity-based sample profile loader inlining."));
197 
199  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
200  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
201  "inlining."));
202 
204  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
205  cl::desc("Threshold for inlining cold callsites"));
206 
208  "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
209  cl::desc(
210  "Relative hotness percentage threshold for indirect "
211  "call promotion in proirity-based sample profile loader inlining."));
212 
214  "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
215  cl::desc(
216  "Skip relative hotness check for ICP up to given number of targets."));
217 
219  "sample-profile-prioritized-inline", cl::Hidden,
220 
221  cl::desc("Use call site prioritized inlining for sample profile loader."
222  "Currently only CSSPGO is supported."));
223 
225  "sample-profile-use-preinliner", cl::Hidden,
226 
227  cl::desc("Use the preinliner decisions stored in profile context."));
228 
230  "sample-profile-recursive-inline", cl::Hidden,
231 
232  cl::desc("Allow sample loader inliner to inline recursive calls."));
233 
235  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
236  cl::desc(
237  "Optimization remarks file containing inline remarks to be replayed "
238  "by inlining from sample profile loader."),
239  cl::Hidden);
240 
242  "sample-profile-inline-replay-scope",
245  "Replay on functions that have remarks associated "
246  "with them (default)"),
248  "Replay on the entire module")),
249  cl::desc("Whether inline replay should be applied to the entire "
250  "Module or just the Functions (default) that are present as "
251  "callers in remarks during sample profile inlining."),
252  cl::Hidden);
253 
255  "sample-profile-inline-replay-fallback",
257  cl::values(
258  clEnumValN(
260  "All decisions not in replay send to original advisor (default)"),
262  "AlwaysInline", "All decisions not in replay are inlined"),
264  "All decisions not in replay are not inlined")),
265  cl::desc("How sample profile inline replay treats sites that don't come "
266  "from the replay. Original: defers to original advisor, "
267  "AlwaysInline: inline all sites not in replay, NeverInline: "
268  "inline no sites not in replay"),
269  cl::Hidden);
270 
272  "sample-profile-inline-replay-format",
274  cl::values(
275  clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
277  "<Line Number>:<Column Number>"),
279  "LineDiscriminator", "<Line Number>.<Discriminator>"),
281  "LineColumnDiscriminator",
282  "<Line Number>:<Column Number>.<Discriminator> (default)")),
283  cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
284 
285 static cl::opt<unsigned>
286  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
287  cl::desc("Max number of promotions for a single indirect "
288  "call callsite in sample profile loader"));
289 
291  "overwrite-existing-weights", cl::Hidden, cl::init(false),
292  cl::desc("Ignore existing branch weights on IR and always overwrite."));
293 
295  "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
296  cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
297  "sample-profile inline pass name."));
298 
300 
301 namespace {
302 
303 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
304 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
305 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
306 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
307 using BlockEdgeMap =
309 
310 class GUIDToFuncNameMapper {
311 public:
312  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
313  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
314  : CurrentReader(Reader), CurrentModule(M),
315  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
316  if (!CurrentReader.useMD5())
317  return;
318 
319  for (const auto &F : CurrentModule) {
320  StringRef OrigName = F.getName();
321  CurrentGUIDToFuncNameMap.insert(
322  {Function::getGUID(OrigName), OrigName});
323 
324  // Local to global var promotion used by optimization like thinlto
325  // will rename the var and add suffix like ".llvm.xxx" to the
326  // original local name. In sample profile, the suffixes of function
327  // names are all stripped. Since it is possible that the mapper is
328  // built in post-thin-link phase and var promotion has been done,
329  // we need to add the substring of function name without the suffix
330  // into the GUIDToFuncNameMap.
332  if (CanonName != OrigName)
333  CurrentGUIDToFuncNameMap.insert(
334  {Function::getGUID(CanonName), CanonName});
335  }
336 
337  // Update GUIDToFuncNameMap for each function including inlinees.
338  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
339  }
340 
341  ~GUIDToFuncNameMapper() {
342  if (!CurrentReader.useMD5())
343  return;
344 
345  CurrentGUIDToFuncNameMap.clear();
346 
347  // Reset GUIDToFuncNameMap for of each function as they're no
348  // longer valid at this point.
349  SetGUIDToFuncNameMapForAll(nullptr);
350  }
351 
352 private:
353  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
354  std::queue<FunctionSamples *> FSToUpdate;
355  for (auto &IFS : CurrentReader.getProfiles()) {
356  FSToUpdate.push(&IFS.second);
357  }
358 
359  while (!FSToUpdate.empty()) {
360  FunctionSamples *FS = FSToUpdate.front();
361  FSToUpdate.pop();
362  FS->GUIDToFuncNameMap = Map;
363  for (const auto &ICS : FS->getCallsiteSamples()) {
364  const FunctionSamplesMap &FSMap = ICS.second;
365  for (auto &IFS : FSMap) {
366  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
367  FSToUpdate.push(&FS);
368  }
369  }
370  }
371  }
372 
373  SampleProfileReader &CurrentReader;
374  Module &CurrentModule;
375  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
376 };
377 
378 // Inline candidate used by iterative callsite prioritized inliner
379 struct InlineCandidate {
380  CallBase *CallInstr;
381  const FunctionSamples *CalleeSamples;
382  // Prorated callsite count, which will be used to guide inlining. For example,
383  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
384  // copies will get their own distribution factors and their prorated counts
385  // will be used to decide if they should be inlined independently.
386  uint64_t CallsiteCount;
387  // Call site distribution factor to prorate the profile samples for a
388  // duplicated callsite. Default value is 1.0.
389  float CallsiteDistribution;
390 };
391 
392 // Inline candidate comparer using call site weight
393 struct CandidateComparer {
394  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
395  if (LHS.CallsiteCount != RHS.CallsiteCount)
396  return LHS.CallsiteCount < RHS.CallsiteCount;
397 
398  const FunctionSamples *LCS = LHS.CalleeSamples;
399  const FunctionSamples *RCS = RHS.CalleeSamples;
400  assert(LCS && RCS && "Expect non-null FunctionSamples");
401 
402  // Tie breaker using number of samples try to favor smaller functions first
403  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
404  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
405 
406  // Tie breaker using GUID so we have stable/deterministic inlining order
407  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
408  }
409 };
410 
411 using CandidateQueue =
413  CandidateComparer>;
414 
415 /// Sample profile pass.
416 ///
417 /// This pass reads profile data from the file specified by
418 /// -sample-profile-file and annotates every affected function with the
419 /// profile information found in that file.
420 class SampleProfileLoader final
421  : public SampleProfileLoaderBaseImpl<BasicBlock> {
422 public:
423  SampleProfileLoader(
424  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
425  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
426  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
427  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
429  GetAC(std::move(GetAssumptionCache)),
430  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
431  LTOPhase(LTOPhase),
432  AnnotatedPassName(AnnotateSampleProfileInlinePhase
435  : CSINLINE_DEBUG) {}
436 
437  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
438  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
439  ProfileSummaryInfo *_PSI, CallGraph *CG);
440 
441 protected:
443  bool emitAnnotations(Function &F);
444  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
445  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
446  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
447  const FunctionSamples *
448  findFunctionSamples(const Instruction &I) const override;
449  std::vector<const FunctionSamples *>
450  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
451  void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
452  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
454  uint64_t Threshold);
455  // Attempt to promote indirect call and also inline the promoted call
456  bool tryPromoteAndInlineCandidate(
457  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
458  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
459 
460  bool inlineHotFunctions(Function &F,
461  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
462  Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
463  bool getExternalInlineAdvisorShouldInline(CallBase &CB);
464  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
465  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
466  bool
467  tryInlineCandidate(InlineCandidate &Candidate,
468  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
469  bool
470  inlineHotFunctionsWithPriority(Function &F,
471  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
472  // Inline cold/small functions in addition to hot ones
473  bool shouldInlineColdCallee(CallBase &CallInst);
474  void emitOptimizationRemarksForInlineCandidates(
475  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
476  bool Hot);
477  void promoteMergeNotInlinedContextSamples(
479  const Function &F);
480  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
481  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
482  void generateMDProfMetadata(Function &F);
483 
484  /// Map from function name to Function *. Used to find the function from
485  /// the function name. If the function name contains suffix, additional
486  /// entry is added to map from the stripped name to the function if there
487  /// is one-to-one mapping.
489 
492  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
493 
494  /// Profile tracker for different context.
495  std::unique_ptr<SampleContextTracker> ContextTracker;
496 
497  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
498  ///
499  /// We need to know the LTO phase because for example in ThinLTOPrelink
500  /// phase, in annotation, we should not promote indirect calls. Instead,
501  /// we will mark GUIDs that needs to be annotated to the function.
502  const ThinOrFullLTOPhase LTOPhase;
503  const std::string AnnotatedPassName;
504 
505  /// Profle Symbol list tells whether a function name appears in the binary
506  /// used to generate the current profile.
507  std::unique_ptr<ProfileSymbolList> PSL;
508 
509  /// Total number of samples collected in this profile.
510  ///
511  /// This is the sum of all the samples collected in all the functions executed
512  /// at runtime.
513  uint64_t TotalCollectedSamples = 0;
514 
515  // Information recorded when we declined to inline a call site
516  // because we have determined it is too cold is accumulated for
517  // each callee function. Initially this is just the entry count.
518  struct NotInlinedProfileInfo {
519  uint64_t entryCount;
520  };
522 
523  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
524  // all the function symbols defined or declared in current module.
525  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
526 
527  // All the Names used in FunctionSamples including outline function
528  // names, inline instance names and call target names.
529  StringSet<> NamesInProfile;
530 
531  // For symbol in profile symbol list, whether to regard their profiles
532  // to be accurate. It is mainly decided by existance of profile symbol
533  // list and -profile-accurate-for-symsinlist flag, but it can be
534  // overriden by -profile-sample-accurate or profile-sample-accurate
535  // attribute.
536  bool ProfAccForSymsInList;
537 
538  // External inline advisor used to replay inline decision from remarks.
539  std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
540 
541  // A pseudo probe helper to correlate the imported sample counts.
542  std::unique_ptr<PseudoProbeManager> ProbeManager;
543 
544 private:
545  const char *getAnnotatedRemarkPassName() const {
546  return AnnotatedPassName.c_str();
547  }
548 };
549 
550 class SampleProfileLoaderLegacyPass : public ModulePass {
551 public:
552  // Class identification, replacement for typeinfo
553  static char ID;
554 
555  SampleProfileLoaderLegacyPass(
558  : ModulePass(ID), SampleLoader(
559  Name, SampleProfileRemappingFile, LTOPhase,
560  [&](Function &F) -> AssumptionCache & {
561  return ACT->getAssumptionCache(F);
562  },
563  [&](Function &F) -> TargetTransformInfo & {
564  return TTIWP->getTTI(F);
565  },
566  [&](Function &F) -> TargetLibraryInfo & {
567  return TLIWP->getTLI(F);
568  }) {
571  }
572 
573  void dump() { SampleLoader.dump(); }
574 
575  bool doInitialization(Module &M) override {
576  return SampleLoader.doInitialization(M);
577  }
578 
579  StringRef getPassName() const override { return "Sample profile pass"; }
580  bool runOnModule(Module &M) override;
581 
582  void getAnalysisUsage(AnalysisUsage &AU) const override {
587  }
588 
589 private:
590  SampleProfileLoader SampleLoader;
591  AssumptionCacheTracker *ACT = nullptr;
592  TargetTransformInfoWrapperPass *TTIWP = nullptr;
593  TargetLibraryInfoWrapperPass *TLIWP = nullptr;
594 };
595 
596 } // end anonymous namespace
597 
598 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
600  return getProbeWeight(Inst);
601 
602  const DebugLoc &DLoc = Inst.getDebugLoc();
603  if (!DLoc)
604  return std::error_code();
605 
606  // Ignore all intrinsics, phinodes and branch instructions.
607  // Branch and phinodes instruction usually contains debug info from sources
608  // outside of the residing basic block, thus we ignore them during annotation.
609  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
610  return std::error_code();
611 
612  // For non-CS profile, if a direct call/invoke instruction is inlined in
613  // profile (findCalleeFunctionSamples returns non-empty result), but not
614  // inlined here, it means that the inlined callsite has no sample, thus the
615  // call instruction should have 0 count.
616  // For CS profile, the callsite count of previously inlined callees is
617  // populated with the entry count of the callees.
619  if (const auto *CB = dyn_cast<CallBase>(&Inst))
620  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
621  return 0;
622 
623  return getInstWeightImpl(Inst);
624 }
625 
626 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
627 // of non-probe instruction. So if all instructions of the BB give error_code,
628 // tell the inference algorithm to infer the BB weight.
629 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
631  "Profile is not pseudo probe based");
632  Optional<PseudoProbe> Probe = extractProbe(Inst);
633  // Ignore the non-probe instruction. If none of the instruction in the BB is
634  // probe, we choose to infer the BB's weight.
635  if (!Probe)
636  return std::error_code();
637 
638  const FunctionSamples *FS = findFunctionSamples(Inst);
639  // If none of the instruction has FunctionSample, we choose to return zero
640  // value sample to indicate the BB is cold. This could happen when the
641  // instruction is from inlinee and no profile data is found.
642  // FIXME: This should not be affected by the source drift issue as 1) if the
643  // newly added function is top-level inliner, it won't match the CFG checksum
644  // in the function profile or 2) if it's the inlinee, the inlinee should have
645  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
646  // we can improve it by adding a switch for profile-sample-block-accurate for
647  // block level counts in the future.
648  if (!FS)
649  return 0;
650 
651  // For non-CS profile, If a direct call/invoke instruction is inlined in
652  // profile (findCalleeFunctionSamples returns non-empty result), but not
653  // inlined here, it means that the inlined callsite has no sample, thus the
654  // call instruction should have 0 count.
655  // For CS profile, the callsite count of previously inlined callees is
656  // populated with the entry count of the callees.
658  if (const auto *CB = dyn_cast<CallBase>(&Inst))
659  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
660  return 0;
661 
662  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
663  if (R) {
664  uint64_t Samples = R.get() * Probe->Factor;
665  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
666  if (FirstMark) {
667  ORE->emit([&]() {
668  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
669  Remark << "Applied " << ore::NV("NumSamples", Samples);
670  Remark << " samples from profile (ProbeId=";
671  Remark << ore::NV("ProbeId", Probe->Id);
672  Remark << ", Factor=";
673  Remark << ore::NV("Factor", Probe->Factor);
674  Remark << ", OriginalSamples=";
675  Remark << ore::NV("OriginalSamples", R.get());
676  Remark << ")";
677  return Remark;
678  });
679  }
680  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
681  << " - weight: " << R.get() << " - factor: "
682  << format("%0.2f", Probe->Factor) << ")\n");
683  return Samples;
684  }
685  return R;
686 }
687 
688 /// Get the FunctionSamples for a call instruction.
689 ///
690 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
691 /// instance in which that call instruction is calling to. It contains
692 /// all samples that resides in the inlined instance. We first find the
693 /// inlined instance in which the call instruction is from, then we
694 /// traverse its children to find the callsite with the matching
695 /// location.
696 ///
697 /// \param Inst Call/Invoke instruction to query.
698 ///
699 /// \returns The FunctionSamples pointer to the inlined instance.
700 const FunctionSamples *
701 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
702  const DILocation *DIL = Inst.getDebugLoc();
703  if (!DIL) {
704  return nullptr;
705  }
706 
707  StringRef CalleeName;
708  if (Function *Callee = Inst.getCalledFunction())
709  CalleeName = Callee->getName();
710 
712  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
713 
714  const FunctionSamples *FS = findFunctionSamples(Inst);
715  if (FS == nullptr)
716  return nullptr;
717 
718  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
719  CalleeName, Reader->getRemapper());
720 }
721 
722 /// Returns a vector of FunctionSamples that are the indirect call targets
723 /// of \p Inst. The vector is sorted by the total number of samples. Stores
724 /// the total call count of the indirect call in \p Sum.
725 std::vector<const FunctionSamples *>
726 SampleProfileLoader::findIndirectCallFunctionSamples(
727  const Instruction &Inst, uint64_t &Sum) const {
728  const DILocation *DIL = Inst.getDebugLoc();
729  std::vector<const FunctionSamples *> R;
730 
731  if (!DIL) {
732  return R;
733  }
734 
735  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
736  assert(L && R && "Expect non-null FunctionSamples");
737  if (L->getEntrySamples() != R->getEntrySamples())
738  return L->getEntrySamples() > R->getEntrySamples();
739  return FunctionSamples::getGUID(L->getName()) <
740  FunctionSamples::getGUID(R->getName());
741  };
742 
744  auto CalleeSamples =
745  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
746  if (CalleeSamples.empty())
747  return R;
748 
749  // For CSSPGO, we only use target context profile's entry count
750  // as that already includes both inlined callee and non-inlined ones..
751  Sum = 0;
752  for (const auto *const FS : CalleeSamples) {
753  Sum += FS->getEntrySamples();
754  R.push_back(FS);
755  }
756  llvm::sort(R, FSCompare);
757  return R;
758  }
759 
760  const FunctionSamples *FS = findFunctionSamples(Inst);
761  if (FS == nullptr)
762  return R;
763 
765  auto T = FS->findCallTargetMapAt(CallSite);
766  Sum = 0;
767  if (T)
768  for (const auto &T_C : T.get())
769  Sum += T_C.second;
770  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
771  if (M->empty())
772  return R;
773  for (const auto &NameFS : *M) {
774  Sum += NameFS.second.getEntrySamples();
775  R.push_back(&NameFS.second);
776  }
777  llvm::sort(R, FSCompare);
778  }
779  return R;
780 }
781 
782 const FunctionSamples *
783 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
785  Optional<PseudoProbe> Probe = extractProbe(Inst);
786  if (!Probe)
787  return nullptr;
788  }
789 
790  const DILocation *DIL = Inst.getDebugLoc();
791  if (!DIL)
792  return Samples;
793 
794  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
795  if (it.second) {
797  it.first->second = ContextTracker->getContextSamplesFor(DIL);
798  else
799  it.first->second =
800  Samples->findFunctionSamples(DIL, Reader->getRemapper());
801  }
802  return it.first->second;
803 }
804 
805 /// Check whether the indirect call promotion history of \p Inst allows
806 /// the promotion for \p Candidate.
807 /// If the profile count for the promotion candidate \p Candidate is
808 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
809 /// for \p Inst. If we already have at least MaxNumPromotions
810 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
811 /// cannot promote for \p Inst anymore.
812 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
813  uint32_t NumVals = 0;
814  uint64_t TotalCount = 0;
815  std::unique_ptr<InstrProfValueData[]> ValueData =
816  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
817  bool Valid =
818  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
819  ValueData.get(), NumVals, TotalCount, true);
820  // No valid value profile so no promoted targets have been recorded
821  // before. Ok to do ICP.
822  if (!Valid)
823  return true;
824 
825  unsigned NumPromoted = 0;
826  for (uint32_t I = 0; I < NumVals; I++) {
827  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
828  continue;
829 
830  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
831  // metadata, it means the candidate has been promoted for this
832  // indirect call.
833  if (ValueData[I].Value == Function::getGUID(Candidate))
834  return false;
835  NumPromoted++;
836  // If already have MaxNumPromotions promotion, don't do it anymore.
837  if (NumPromoted == MaxNumPromotions)
838  return false;
839  }
840  return true;
841 }
842 
843 /// Update indirect call target profile metadata for \p Inst.
844 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
845 /// If it is 0, it means updateIDTMetaData is used to mark a
846 /// certain target to be promoted already. If it is not zero,
847 /// we expect to use it to update the total count in the value profile.
848 static void
850  const SmallVectorImpl<InstrProfValueData> &CallTargets,
851  uint64_t Sum) {
852  // Bail out early if MaxNumPromotions is zero.
853  // This prevents allocating an array of zero length below.
854  //
855  // Note `updateIDTMetaData` is called in two places so check
856  // `MaxNumPromotions` inside it.
857  if (MaxNumPromotions == 0)
858  return;
859  uint32_t NumVals = 0;
860  // OldSum is the existing total count in the value profile data.
861  uint64_t OldSum = 0;
862  std::unique_ptr<InstrProfValueData[]> ValueData =
863  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
864  bool Valid =
865  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
866  ValueData.get(), NumVals, OldSum, true);
867 
868  DenseMap<uint64_t, uint64_t> ValueCountMap;
869  if (Sum == 0) {
870  assert((CallTargets.size() == 1 &&
871  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
872  "If sum is 0, assume only one element in CallTargets "
873  "with count being NOMORE_ICP_MAGICNUM");
874  // Initialize ValueCountMap with existing value profile data.
875  if (Valid) {
876  for (uint32_t I = 0; I < NumVals; I++)
877  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
878  }
879  auto Pair =
880  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
881  // If the target already exists in value profile, decrease the total
882  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
883  if (!Pair.second) {
884  OldSum -= Pair.first->second;
885  Pair.first->second = NOMORE_ICP_MAGICNUM;
886  }
887  Sum = OldSum;
888  } else {
889  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
890  // counts in the value profile.
891  if (Valid) {
892  for (uint32_t I = 0; I < NumVals; I++) {
893  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
894  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
895  }
896  }
897 
898  for (const auto &Data : CallTargets) {
899  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
900  if (Pair.second)
901  continue;
902  // The target represented by Data.Value has already been promoted.
903  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
904  // Sum by Data.Count.
905  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
906  Sum -= Data.Count;
907  }
908  }
909 
910  SmallVector<InstrProfValueData, 8> NewCallTargets;
911  for (const auto &ValueCount : ValueCountMap) {
912  NewCallTargets.emplace_back(
913  InstrProfValueData{ValueCount.first, ValueCount.second});
914  }
915 
916  llvm::sort(NewCallTargets,
917  [](const InstrProfValueData &L, const InstrProfValueData &R) {
918  if (L.Count != R.Count)
919  return L.Count > R.Count;
920  return L.Value > R.Value;
921  });
922 
923  uint32_t MaxMDCount =
924  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
925  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
926  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
927 }
928 
929 /// Attempt to promote indirect call and also inline the promoted call.
930 ///
931 /// \param F Caller function.
932 /// \param Candidate ICP and inline candidate.
933 /// \param SumOrigin Original sum of target counts for indirect call before
934 /// promoting given candidate.
935 /// \param Sum Prorated sum of remaining target counts for indirect call
936 /// after promoting given candidate.
937 /// \param InlinedCallSite Output vector for new call sites exposed after
938 /// inlining.
939 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
940  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
941  SmallVector<CallBase *, 8> *InlinedCallSite) {
942  // Bail out early if sample-loader inliner is disabled.
944  return false;
945 
946  // Bail out early if MaxNumPromotions is zero.
947  // This prevents allocating an array of zero length in callees below.
948  if (MaxNumPromotions == 0)
949  return false;
950  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
951  auto R = SymbolMap.find(CalleeFunctionName);
952  if (R == SymbolMap.end() || !R->getValue())
953  return false;
954 
955  auto &CI = *Candidate.CallInstr;
956  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
957  return false;
958 
959  const char *Reason = "Callee function not available";
960  // R->getValue() != &F is to prevent promoting a recursive call.
961  // If it is a recursive call, we do not inline it as it could bloat
962  // the code exponentially. There is way to better handle this, e.g.
963  // clone the caller first, and inline the cloned caller if it is
964  // recursive. As llvm does not inline recursive calls, we will
965  // simply ignore it instead of handling it explicitly.
966  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
967  R->getValue()->hasFnAttribute("use-sample-profile") &&
968  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
969  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
970  // in the value profile metadata so the target won't be promoted again.
971  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
972  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
973  updateIDTMetaData(CI, SortedCallTargets, 0);
974 
975  auto *DI = &pgo::promoteIndirectCall(
976  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
977  if (DI) {
978  Sum -= Candidate.CallsiteCount;
979  // Do not prorate the indirect callsite distribution since the original
980  // distribution will be used to scale down non-promoted profile target
981  // counts later. By doing this we lose track of the real callsite count
982  // for the leftover indirect callsite as a trade off for accurate call
983  // target counts.
984  // TODO: Ideally we would have two separate factors, one for call site
985  // counts and one is used to prorate call target counts.
986  // Do not update the promoted direct callsite distribution at this
987  // point since the original distribution combined with the callee profile
988  // will be used to prorate callsites from the callee if inlined. Once not
989  // inlined, the direct callsite distribution should be prorated so that
990  // the it will reflect the real callsite counts.
991  Candidate.CallInstr = DI;
992  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
993  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
994  if (!Inlined) {
995  // Prorate the direct callsite distribution so that it reflects real
996  // callsite counts.
998  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
999  }
1000  return Inlined;
1001  }
1002  }
1003  } else {
1004  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
1005  << Candidate.CalleeSamples->getFuncName() << " because "
1006  << Reason << "\n");
1007  }
1008  return false;
1009 }
1010 
1011 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
1012  if (!ProfileSizeInline)
1013  return false;
1014 
1016  if (Callee == nullptr)
1017  return false;
1018 
1019  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
1020  GetAC, GetTLI);
1021 
1022  if (Cost.isNever())
1023  return false;
1024 
1025  if (Cost.isAlways())
1026  return true;
1027 
1028  return Cost.getCost() <= SampleColdCallSiteThreshold;
1029 }
1030 
1031 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1032  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1033  bool Hot) {
1034  for (auto I : Candidates) {
1035  Function *CalledFunction = I->getCalledFunction();
1036  if (CalledFunction) {
1037  ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1038  "InlineAttempt", I->getDebugLoc(),
1039  I->getParent())
1040  << "previous inlining reattempted for "
1041  << (Hot ? "hotness: '" : "size: '")
1042  << ore::NV("Callee", CalledFunction) << "' into '"
1043  << ore::NV("Caller", &F) << "'");
1044  }
1045  }
1046 }
1047 
1048 void SampleProfileLoader::findExternalInlineCandidate(
1049  CallBase *CB, const FunctionSamples *Samples,
1050  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
1051  const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
1052 
1053  // If ExternalInlineAdvisor wants to inline an external function
1054  // make sure it's imported
1055  if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1056  // Samples may not exist for replayed function, if so
1057  // just add the direct GUID and move on
1058  if (!Samples) {
1059  InlinedGUIDs.insert(
1061  return;
1062  }
1063  // Otherwise, drop the threshold to import everything that we can
1064  Threshold = 0;
1065  }
1066 
1067  assert(Samples && "expect non-null caller profile");
1068 
1069  // For AutoFDO profile, retrieve candidate profiles by walking over
1070  // the nested inlinee profiles.
1072  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1073  return;
1074  }
1075 
1077  ContextTracker->getContextFor(Samples->getContext());
1078  std::queue<ContextTrieNode *> CalleeList;
1079  CalleeList.push(Caller);
1080  while (!CalleeList.empty()) {
1081  ContextTrieNode *Node = CalleeList.front();
1082  CalleeList.pop();
1083  FunctionSamples *CalleeSample = Node->getFunctionSamples();
1084  // For CSSPGO profile, retrieve candidate profile by walking over the
1085  // trie built for context profile. Note that also take call targets
1086  // even if callee doesn't have a corresponding context profile.
1087  if (!CalleeSample)
1088  continue;
1089 
1090  // If pre-inliner decision is used, honor that for importing as well.
1091  bool PreInline =
1094  if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
1095  continue;
1096 
1097  StringRef Name = CalleeSample->getFuncName();
1099  // Add to the import list only when it's defined out of module.
1100  if (!Func || Func->isDeclaration())
1101  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
1102 
1103  // Import hot CallTargets, which may not be available in IR because full
1104  // profile annotation cannot be done until backend compilation in ThinLTO.
1105  for (const auto &BS : CalleeSample->getBodySamples())
1106  for (const auto &TS : BS.second.getCallTargets())
1107  if (TS.getValue() > Threshold) {
1108  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
1109  const Function *Callee = SymbolMap.lookup(CalleeName);
1110  if (!Callee || Callee->isDeclaration())
1111  InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
1112  }
1113 
1114  // Import hot child context profile associted with callees. Note that this
1115  // may have some overlap with the call target loop above, but doing this
1116  // based child context profile again effectively allow us to use the max of
1117  // entry count and call target count to determine importing.
1118  for (auto &Child : Node->getAllChildContext()) {
1119  ContextTrieNode *CalleeNode = &Child.second;
1120  CalleeList.push(CalleeNode);
1121  }
1122  }
1123 }
1124 
1125 /// Iteratively inline hot callsites of a function.
1126 ///
1127 /// Iteratively traverse all callsites of the function \p F, so as to
1128 /// find out callsites with corresponding inline instances.
1129 ///
1130 /// For such callsites,
1131 /// - If it is hot enough, inline the callsites and adds callsites of the callee
1132 /// into the caller. If the call is an indirect call, first promote
1133 /// it to direct call. Each indirect call is limited with a single target.
1134 ///
1135 /// - If a callsite is not inlined, merge the its profile to the outline
1136 /// version (if --sample-profile-merge-inlinee is true), or scale the
1137 /// counters of standalone function based on the profile of inlined
1138 /// instances (if --sample-profile-merge-inlinee is false).
1139 ///
1140 /// Later passes may consume the updated profiles.
1141 ///
1142 /// \param F function to perform iterative inlining.
1143 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1144 /// inlined in the profiled binary.
1145 ///
1146 /// \returns True if there is any inline happened.
1147 bool SampleProfileLoader::inlineHotFunctions(
1148  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1149  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1150  // Profile symbol list is ignored when profile-sample-accurate is on.
1151  assert((!ProfAccForSymsInList ||
1153  !F.hasFnAttribute("profile-sample-accurate"))) &&
1154  "ProfAccForSymsInList should be false when profile-sample-accurate "
1155  "is enabled");
1156 
1157  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1158  bool Changed = false;
1159  bool LocalChanged = true;
1160  while (LocalChanged) {
1161  LocalChanged = false;
1163  for (auto &BB : F) {
1164  bool Hot = false;
1165  SmallVector<CallBase *, 10> AllCandidates;
1166  SmallVector<CallBase *, 10> ColdCandidates;
1167  for (auto &I : BB.getInstList()) {
1168  const FunctionSamples *FS = nullptr;
1169  if (auto *CB = dyn_cast<CallBase>(&I)) {
1170  if (!isa<IntrinsicInst>(I)) {
1171  if ((FS = findCalleeFunctionSamples(*CB))) {
1172  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1173  "GUIDToFuncNameMap has to be populated");
1174  AllCandidates.push_back(CB);
1175  if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS)
1176  LocalNotInlinedCallSites.try_emplace(CB, FS);
1177  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1178  Hot = true;
1179  else if (shouldInlineColdCallee(*CB))
1180  ColdCandidates.push_back(CB);
1181  } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1182  AllCandidates.push_back(CB);
1183  }
1184  }
1185  }
1186  }
1187  if (Hot || ExternalInlineAdvisor) {
1188  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1189  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1190  } else {
1191  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1192  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1193  }
1194  }
1195  for (CallBase *I : CIS) {
1196  Function *CalledFunction = I->getCalledFunction();
1197  InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1198  0 /* dummy count */,
1199  1.0 /* dummy distribution factor */};
1200  // Do not inline recursive calls.
1201  if (CalledFunction == &F)
1202  continue;
1203  if (I->isIndirectCall()) {
1204  uint64_t Sum;
1205  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1206  uint64_t SumOrigin = Sum;
1207  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1208  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1209  PSI->getOrCompHotCountThreshold());
1210  continue;
1211  }
1212  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1213  continue;
1214 
1215  Candidate = {I, FS, FS->getEntrySamples(), 1.0};
1216  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1217  LocalNotInlinedCallSites.erase(I);
1218  LocalChanged = true;
1219  }
1220  }
1221  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1222  !CalledFunction->isDeclaration()) {
1223  if (tryInlineCandidate(Candidate)) {
1224  LocalNotInlinedCallSites.erase(I);
1225  LocalChanged = true;
1226  }
1227  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1228  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1229  InlinedGUIDs, SymbolMap,
1230  PSI->getOrCompHotCountThreshold());
1231  }
1232  }
1233  Changed |= LocalChanged;
1234  }
1235 
1236  // For CS profile, profile for not inlined context will be merged when
1237  // base profile is being retrieved.
1239  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1240  return Changed;
1241 }
1242 
1243 bool SampleProfileLoader::tryInlineCandidate(
1244  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1245  // Do not attempt to inline a candidate if
1246  // --disable-sample-loader-inlining is true.
1248  return false;
1249 
1250  CallBase &CB = *Candidate.CallInstr;
1251  Function *CalledFunction = CB.getCalledFunction();
1252  assert(CalledFunction && "Expect a callee with definition");
1253  DebugLoc DLoc = CB.getDebugLoc();
1254  BasicBlock *BB = CB.getParent();
1255 
1256  InlineCost Cost = shouldInlineCandidate(Candidate);
1257  if (Cost.isNever()) {
1258  ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1259  "InlineFail", DLoc, BB)
1260  << "incompatible inlining");
1261  return false;
1262  }
1263 
1264  if (!Cost)
1265  return false;
1266 
1267  InlineFunctionInfo IFI(nullptr, GetAC);
1268  IFI.UpdateProfile = false;
1269  if (!InlineFunction(CB, IFI).isSuccess())
1270  return false;
1271 
1272  // Merge the attributes based on the inlining.
1274  *CalledFunction);
1275 
1276  // The call to InlineFunction erases I, so we can't pass it here.
1277  emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
1278  Cost, true, getAnnotatedRemarkPassName());
1279 
1280  // Now populate the list of newly exposed call sites.
1281  if (InlinedCallSites) {
1282  InlinedCallSites->clear();
1283  for (auto &I : IFI.InlinedCallSites)
1284  InlinedCallSites->push_back(I);
1285  }
1286 
1288  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1289  ++NumCSInlined;
1290 
1291  // Prorate inlined probes for a duplicated inlining callsite which probably
1292  // has a distribution less than 100%. Samples for an inlinee should be
1293  // distributed among the copies of the original callsite based on each
1294  // callsite's distribution factor for counts accuracy. Note that an inlined
1295  // probe may come with its own distribution factor if it has been duplicated
1296  // in the inlinee body. The two factor are multiplied to reflect the
1297  // aggregation of duplication.
1298  if (Candidate.CallsiteDistribution < 1) {
1299  for (auto &I : IFI.InlinedCallSites) {
1300  if (Optional<PseudoProbe> Probe = extractProbe(*I))
1302  Candidate.CallsiteDistribution);
1303  }
1304  NumDuplicatedInlinesite++;
1305  }
1306 
1307  return true;
1308 }
1309 
1310 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1311  CallBase *CB) {
1312  assert(CB && "Expect non-null call instruction");
1313 
1314  if (isa<IntrinsicInst>(CB))
1315  return false;
1316 
1317  // Find the callee's profile. For indirect call, find hottest target profile.
1318  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1319  // If ExternalInlineAdvisor wants to inline this site, do so even
1320  // if Samples are not present.
1321  if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1322  return false;
1323 
1324  float Factor = 1.0;
1325  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1326  Factor = Probe->Factor;
1327 
1328  uint64_t CallsiteCount =
1329  CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0;
1330  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1331  return true;
1332 }
1333 
1335 SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1336  std::unique_ptr<InlineAdvice> Advice = nullptr;
1337  if (ExternalInlineAdvisor) {
1338  Advice = ExternalInlineAdvisor->getAdvice(CB);
1339  if (Advice) {
1340  if (!Advice->isInliningRecommended()) {
1341  Advice->recordUnattemptedInlining();
1342  return InlineCost::getNever("not previously inlined");
1343  }
1344  Advice->recordInlining();
1345  return InlineCost::getAlways("previously inlined");
1346  }
1347  }
1348 
1349  return {};
1350 }
1351 
1352 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1353  Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1354  return Cost ? !!Cost.getValue() : false;
1355 }
1356 
1357 InlineCost
1358 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1359  if (Optional<InlineCost> ReplayCost =
1360  getExternalInlineAdvisorCost(*Candidate.CallInstr))
1361  return ReplayCost.getValue();
1362  // Adjust threshold based on call site hotness, only do this for callsite
1363  // prioritized inliner because otherwise cost-benefit check is done earlier.
1364  int SampleThreshold = SampleColdCallSiteThreshold;
1366  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1367  SampleThreshold = SampleHotCallSiteThreshold;
1368  else if (!ProfileSizeInline)
1369  return InlineCost::getNever("cold callsite");
1370  }
1371 
1372  Function *Callee = Candidate.CallInstr->getCalledFunction();
1373  assert(Callee && "Expect a definition for inline candidate of direct call");
1374 
1375  InlineParams Params = getInlineParams();
1376  // We will ignore the threshold from inline cost, so always get full cost.
1377  Params.ComputeFullInlineCost = true;
1379  // Checks if there is anything in the reachable portion of the callee at
1380  // this callsite that makes this inlining potentially illegal. Need to
1381  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1382  // when cost exceeds threshold without checking all IRs in the callee.
1383  // The acutal cost does not matter because we only checks isNever() to
1384  // see if it is legal to inline the callsite.
1385  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1386  GetTTI(*Callee), GetAC, GetTLI);
1387 
1388  // Honor always inline and never inline from call analyzer
1389  if (Cost.isNever() || Cost.isAlways())
1390  return Cost;
1391 
1392  // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1393  // decisions based on hotness as well as accurate function byte sizes for
1394  // given context using function/inlinee sizes from previous build. It
1395  // stores the decision in profile, and also adjust/merge context profile
1396  // aiming at better context-sensitive post-inline profile quality, assuming
1397  // all inline decision estimates are going to be honored by compiler. Here
1398  // we replay that inline decision under `sample-profile-use-preinliner`.
1399  // Note that we don't need to handle negative decision from preinliner as
1400  // context profile for not inlined calls are merged by preinliner already.
1401  if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1402  // Once two node are merged due to promotion, we're losing some context
1403  // so the original context-sensitive preinliner decision should be ignored
1404  // for SyntheticContext.
1405  SampleContext &Context = Candidate.CalleeSamples->getContext();
1406  if (!Context.hasState(SyntheticContext) &&
1407  Context.hasAttribute(ContextShouldBeInlined))
1408  return InlineCost::getAlways("preinliner");
1409  }
1410 
1411  // For old FDO inliner, we inline the call site as long as cost is not
1412  // "Never". The cost-benefit check is done earlier.
1414  return InlineCost::get(Cost.getCost(), INT_MAX);
1415  }
1416 
1417  // Otherwise only use the cost from call analyzer, but overwite threshold with
1418  // Sample PGO threshold.
1419  return InlineCost::get(Cost.getCost(), SampleThreshold);
1420 }
1421 
1422 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1423  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1424  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1425  // Profile symbol list is ignored when profile-sample-accurate is on.
1426  assert((!ProfAccForSymsInList ||
1428  !F.hasFnAttribute("profile-sample-accurate"))) &&
1429  "ProfAccForSymsInList should be false when profile-sample-accurate "
1430  "is enabled");
1431 
1432  // Populating worklist with initial call sites from root inliner, along
1433  // with call site weights.
1434  CandidateQueue CQueue;
1435  InlineCandidate NewCandidate;
1436  for (auto &BB : F) {
1437  for (auto &I : BB.getInstList()) {
1438  auto *CB = dyn_cast<CallBase>(&I);
1439  if (!CB)
1440  continue;
1441  if (getInlineCandidate(&NewCandidate, CB))
1442  CQueue.push(NewCandidate);
1443  }
1444  }
1445 
1446  // Cap the size growth from profile guided inlining. This is needed even
1447  // though cost of each inline candidate already accounts for callee size,
1448  // because with top-down inlining, we can grow inliner size significantly
1449  // with large number of smaller inlinees each pass the cost check.
1451  "Max inline size limit should not be smaller than min inline size "
1452  "limit.");
1453  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1456  if (ExternalInlineAdvisor)
1458 
1459  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1460 
1461  // Perform iterative BFS call site prioritized inlining
1462  bool Changed = false;
1463  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1464  InlineCandidate Candidate = CQueue.top();
1465  CQueue.pop();
1466  CallBase *I = Candidate.CallInstr;
1467  Function *CalledFunction = I->getCalledFunction();
1468 
1469  if (CalledFunction == &F)
1470  continue;
1471  if (I->isIndirectCall()) {
1472  uint64_t Sum = 0;
1473  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1474  uint64_t SumOrigin = Sum;
1475  Sum *= Candidate.CallsiteDistribution;
1476  unsigned ICPCount = 0;
1477  for (const auto *FS : CalleeSamples) {
1478  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1479  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1480  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1481  PSI->getOrCompHotCountThreshold());
1482  continue;
1483  }
1484  uint64_t EntryCountDistributed =
1485  FS->getEntrySamples() * Candidate.CallsiteDistribution;
1486  // In addition to regular inline cost check, we also need to make sure
1487  // ICP isn't introducing excessive speculative checks even if individual
1488  // target looks beneficial to promote and inline. That means we should
1489  // only do ICP when there's a small number dominant targets.
1490  if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1491  EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1492  break;
1493  // TODO: Fix CallAnalyzer to handle all indirect calls.
1494  // For indirect call, we don't run CallAnalyzer to get InlineCost
1495  // before actual inlining. This is because we could see two different
1496  // types from the same definition, which makes CallAnalyzer choke as
1497  // it's expecting matching parameter type on both caller and callee
1498  // side. See example from PR18962 for the triggering cases (the bug was
1499  // fixed, but we generate different types).
1500  if (!PSI->isHotCount(EntryCountDistributed))
1501  break;
1502  SmallVector<CallBase *, 8> InlinedCallSites;
1503  // Attach function profile for promoted indirect callee, and update
1504  // call site count for the promoted inline candidate too.
1505  Candidate = {I, FS, EntryCountDistributed,
1506  Candidate.CallsiteDistribution};
1507  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1508  &InlinedCallSites)) {
1509  for (auto *CB : InlinedCallSites) {
1510  if (getInlineCandidate(&NewCandidate, CB))
1511  CQueue.emplace(NewCandidate);
1512  }
1513  ICPCount++;
1514  Changed = true;
1515  } else if (!ContextTracker) {
1516  LocalNotInlinedCallSites.try_emplace(I, FS);
1517  }
1518  }
1519  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1520  !CalledFunction->isDeclaration()) {
1521  SmallVector<CallBase *, 8> InlinedCallSites;
1522  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1523  for (auto *CB : InlinedCallSites) {
1524  if (getInlineCandidate(&NewCandidate, CB))
1525  CQueue.emplace(NewCandidate);
1526  }
1527  Changed = true;
1528  } else if (!ContextTracker) {
1529  LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples);
1530  }
1531  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1532  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1533  InlinedGUIDs, SymbolMap,
1534  PSI->getOrCompHotCountThreshold());
1535  }
1536  }
1537 
1538  if (!CQueue.empty()) {
1539  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1540  ++NumCSInlinedHitMaxLimit;
1541  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1542  ++NumCSInlinedHitMinLimit;
1543  else
1544  ++NumCSInlinedHitGrowthLimit;
1545  }
1546 
1547  // For CS profile, profile for not inlined context will be merged when
1548  // base profile is being retrieved.
1550  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1551  return Changed;
1552 }
1553 
1554 void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1556  const Function &F) {
1557  // Accumulate not inlined callsite information into notInlinedSamples
1558  for (const auto &Pair : NonInlinedCallSites) {
1559  CallBase *I = Pair.getFirst();
1560  Function *Callee = I->getCalledFunction();
1561  if (!Callee || Callee->isDeclaration())
1562  continue;
1563 
1564  ORE->emit(
1565  OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1566  I->getDebugLoc(), I->getParent())
1567  << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1568  << "' into '" << ore::NV("Caller", &F) << "'");
1569 
1570  ++NumCSNotInlined;
1571  const FunctionSamples *FS = Pair.getSecond();
1572  if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
1573  continue;
1574  }
1575 
1576  // Do not merge a context that is already duplicated into the base profile.
1577  if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1578  continue;
1579 
1580  if (ProfileMergeInlinee) {
1581  // A function call can be replicated by optimizations like callsite
1582  // splitting or jump threading and the replicates end up sharing the
1583  // sample nested callee profile instead of slicing the original
1584  // inlinee's profile. We want to do merge exactly once by filtering out
1585  // callee profiles with a non-zero head sample count.
1586  if (FS->getHeadSamples() == 0) {
1587  // Use entry samples as head samples during the merge, as inlinees
1588  // don't have head samples.
1589  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1590  FS->getEntrySamples());
1591 
1592  // Note that we have to do the merge right after processing function.
1593  // This allows OutlineFS's profile to be used for annotation during
1594  // top-down processing of functions' annotation.
1595  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1596  OutlineFS->merge(*FS, 1);
1597  // Set outlined profile to be synthetic to not bias the inliner.
1598  OutlineFS->SetContextSynthetic();
1599  }
1600  } else {
1601  auto pair =
1602  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1603  pair.first->second.entryCount += FS->getEntrySamples();
1604  }
1605  }
1606 }
1607 
1608 /// Returns the sorted CallTargetMap \p M by count in descending order.
1612  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1613  R.emplace_back(
1614  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1615  }
1616  return R;
1617 }
1618 
1619 // Generate MD_prof metadata for every branch instruction using the
1620 // edge weights computed during propagation.
1621 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1622  // Generate MD_prof metadata for every branch instruction using the
1623  // edge weights computed during propagation.
1624  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1625  LLVMContext &Ctx = F.getContext();
1626  MDBuilder MDB(Ctx);
1627  for (auto &BI : F) {
1628  BasicBlock *BB = &BI;
1629 
1630  if (BlockWeights[BB]) {
1631  for (auto &I : BB->getInstList()) {
1632  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1633  continue;
1634  if (!cast<CallBase>(I).getCalledFunction()) {
1635  const DebugLoc &DLoc = I.getDebugLoc();
1636  if (!DLoc)
1637  continue;
1638  const DILocation *DIL = DLoc;
1639  const FunctionSamples *FS = findFunctionSamples(I);
1640  if (!FS)
1641  continue;
1643  auto T = FS->findCallTargetMapAt(CallSite);
1644  if (!T || T.get().empty())
1645  continue;
1647  // Prorate the callsite counts based on the pre-ICP distribution
1648  // factor to reflect what is already done to the callsite before
1649  // ICP, such as calliste cloning.
1650  if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1651  if (Probe->Factor < 1)
1652  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1653  }
1654  }
1655  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1657  uint64_t Sum = 0;
1658  for (const auto &C : T.get())
1659  Sum += C.second;
1660  // With CSSPGO all indirect call targets are counted torwards the
1661  // original indirect call site in the profile, including both
1662  // inlined and non-inlined targets.
1664  if (const FunctionSamplesMap *M =
1665  FS->findFunctionSamplesMapAt(CallSite)) {
1666  for (const auto &NameFS : *M)
1667  Sum += NameFS.second.getEntrySamples();
1668  }
1669  }
1670  if (Sum)
1671  updateIDTMetaData(I, SortedCallTargets, Sum);
1672  else if (OverwriteExistingWeights)
1673  I.setMetadata(LLVMContext::MD_prof, nullptr);
1674  } else if (!isa<IntrinsicInst>(&I)) {
1675  I.setMetadata(LLVMContext::MD_prof,
1676  MDB.createBranchWeights(
1677  {static_cast<uint32_t>(BlockWeights[BB])}));
1678  }
1679  }
1681  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1682  // clear it for cold code.
1683  for (auto &I : BB->getInstList()) {
1684  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1685  if (cast<CallBase>(I).isIndirectCall())
1686  I.setMetadata(LLVMContext::MD_prof, nullptr);
1687  else
1688  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1689  }
1690  }
1691  }
1692 
1693  Instruction *TI = BB->getTerminator();
1694  if (TI->getNumSuccessors() == 1)
1695  continue;
1696  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1697  !isa<IndirectBrInst>(TI))
1698  continue;
1699 
1700  DebugLoc BranchLoc = TI->getDebugLoc();
1701  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1702  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1703  : Twine("<UNKNOWN LOCATION>"))
1704  << ".\n");
1705  SmallVector<uint32_t, 4> Weights;
1706  uint32_t MaxWeight = 0;
1707  Instruction *MaxDestInst;
1708  // Since profi treats multiple edges (multiway branches) as a single edge,
1709  // we need to distribute the computed weight among the branches. We do
1710  // this by evenly splitting the edge weight among destinations.
1711  DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1712  std::vector<uint64_t> EdgeIndex;
1713  if (SampleProfileUseProfi) {
1714  EdgeIndex.resize(TI->getNumSuccessors());
1715  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1716  const BasicBlock *Succ = TI->getSuccessor(I);
1717  EdgeIndex[I] = EdgeMultiplicity[Succ];
1718  EdgeMultiplicity[Succ]++;
1719  }
1720  }
1721  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1722  BasicBlock *Succ = TI->getSuccessor(I);
1723  Edge E = std::make_pair(BB, Succ);
1724  uint64_t Weight = EdgeWeights[E];
1725  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1726  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1727  // if needed. Sample counts in profiles are 64-bit unsigned values,
1728  // but internally branch weights are expressed as 32-bit values.
1729  if (Weight > std::numeric_limits<uint32_t>::max()) {
1730  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1732  }
1733  if (!SampleProfileUseProfi) {
1734  // Weight is added by one to avoid propagation errors introduced by
1735  // 0 weights.
1736  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1737  } else {
1738  // Profi creates proper weights that do not require "+1" adjustments but
1739  // we evenly split the weight among branches with the same destination.
1740  uint64_t W = Weight / EdgeMultiplicity[Succ];
1741  // Rounding up, if needed, so that first branches are hotter.
1742  if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1743  W++;
1744  Weights.push_back(static_cast<uint32_t>(W));
1745  }
1746  if (Weight != 0) {
1747  if (Weight > MaxWeight) {
1748  MaxWeight = Weight;
1749  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1750  }
1751  }
1752  }
1753 
1754  // FIXME: Re-enable for sample profiling after investigating why the sum
1755  // of branch weights can be 0
1756  //
1757  // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1758 
1759  uint64_t TempWeight;
1760  // Only set weights if there is at least one non-zero weight.
1761  // In any other case, let the analyzer set weights.
1762  // Do not set weights if the weights are present unless under
1763  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1764  // twice. If the first annotation already set the weights, the second pass
1765  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1766  // weight should have their existing metadata (possibly annotated by LTO
1767  // prelink) cleared.
1768  if (MaxWeight > 0 &&
1769  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1770  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1771  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1772  ORE->emit([&]() {
1773  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1774  << "most popular destination for conditional branches at "
1775  << ore::NV("CondBranchesLoc", BranchLoc);
1776  });
1777  } else {
1779  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1780  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1781  } else {
1782  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1783  }
1784  }
1785  }
1786 }
1787 
1788 /// Once all the branch weights are computed, we emit the MD_prof
1789 /// metadata on BB using the computed values for each of its branches.
1790 ///
1791 /// \param F The function to query.
1792 ///
1793 /// \returns true if \p F was modified. Returns false, otherwise.
1794 bool SampleProfileLoader::emitAnnotations(Function &F) {
1795  bool Changed = false;
1796 
1798  if (!ProbeManager->profileIsValid(F, *Samples)) {
1799  LLVM_DEBUG(
1800  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1801  << F.getName());
1802  ++NumMismatchedProfile;
1803  return false;
1804  }
1805  ++NumMatchedProfile;
1806  } else {
1807  if (getFunctionLoc(F) == 0)
1808  return false;
1809 
1810  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1811  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1812  }
1813 
1814  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1816  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1817  else
1818  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1819 
1820  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1821 
1822  if (Changed)
1823  generateMDProfMetadata(F);
1824 
1825  emitCoverageRemarks(F);
1826  return Changed;
1827 }
1828 
1830 
1831 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1832  "Sample Profile loader", false, false)
1837 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1839 
1840 std::unique_ptr<ProfiledCallGraph>
1841 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1842  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1844  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1845  else
1846  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1847 
1848  // Add all functions into the profiled call graph even if they are not in
1849  // the profile. This makes sure functions missing from the profile still
1850  // gets a chance to be processed.
1851  for (auto &Node : CG) {
1852  const auto *F = Node.first;
1853  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1854  continue;
1855  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1856  }
1857 
1858  return ProfiledCG;
1859 }
1860 
1861 std::vector<Function *>
1862 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1863  std::vector<Function *> FunctionOrderList;
1864  FunctionOrderList.reserve(M.size());
1865 
1867  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1868  "together with -sample-profile-top-down-load.\n";
1869 
1870  if (!ProfileTopDownLoad || CG == nullptr) {
1871  if (ProfileMergeInlinee) {
1872  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1873  // because the profile for a function may be used for the profile
1874  // annotation of its outline copy before the profile merging of its
1875  // non-inlined inline instances, and that is not the way how
1876  // ProfileMergeInlinee is supposed to work.
1877  ProfileMergeInlinee = false;
1878  }
1879 
1880  for (Function &F : M)
1881  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1882  FunctionOrderList.push_back(&F);
1883  return FunctionOrderList;
1884  }
1885 
1886  assert(&CG->getModule() == &M);
1887 
1890  // Use profiled call edges to augment the top-down order. There are cases
1891  // that the top-down order computed based on the static call graph doesn't
1892  // reflect real execution order. For example
1893  //
1894  // 1. Incomplete static call graph due to unknown indirect call targets.
1895  // Adjusting the order by considering indirect call edges from the
1896  // profile can enable the inlining of indirect call targets by allowing
1897  // the caller processed before them.
1898  // 2. Mutual call edges in an SCC. The static processing order computed for
1899  // an SCC may not reflect the call contexts in the context-sensitive
1900  // profile, thus may cause potential inlining to be overlooked. The
1901  // function order in one SCC is being adjusted to a top-down order based
1902  // on the profile to favor more inlining. This is only a problem with CS
1903  // profile.
1904  // 3. Transitive indirect call edges due to inlining. When a callee function
1905  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1906  // every call edge originated from the callee B will be transferred to
1907  // the caller A. If any transferred edge (say A->C) is indirect, the
1908  // original profiled indirect edge B->C, even if considered, would not
1909  // enforce a top-down order from the caller A to the potential indirect
1910  // call target C in LTO postlink since the inlined callee B is gone from
1911  // the static call graph.
1912  // 4. #3 can happen even for direct call targets, due to functions defined
1913  // in header files. A header function (say A), when included into source
1914  // files, is defined multiple times but only one definition survives due
1915  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1916  // definitions can be useless based on a local file scope. More
1917  // importantly, the inlinee (say B), once fully inlined to a
1918  // to-be-dropped A, will have no profile to consume when its outlined
1919  // version is compiled. This can lead to a profile-less prelink
1920  // compilation for the outlined version of B which may be called from
1921  // external modules. while this isn't easy to fix, we rely on the
1922  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1923  // the A can be inlined in its local scope in prelink, it may not exist
1924  // in the merged IR in postlink, and we'll need the profiled call edges
1925  // to enforce a top-down order for the rest of the functions.
1926  //
1927  // Considering those cases, a profiled call graph completely independent of
1928  // the static call graph is constructed based on profile data, where
1929  // function objects are not even needed to handle case #3 and case 4.
1930  //
1931  // Note that static callgraph edges are completely ignored since they
1932  // can be conflicting with profiled edges for cyclic SCCs and may result in
1933  // an SCC order incompatible with profile-defined one. Using strictly
1934  // profile order ensures a maximum inlining experience. On the other hand,
1935  // static call edges are not so important when they don't correspond to a
1936  // context in the profile.
1937 
1938  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1939  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1940  while (!CGI.isAtEnd()) {
1941  auto Range = *CGI;
1942  if (SortProfiledSCC) {
1943  // Sort nodes in one SCC based on callsite hotness.
1945  Range = *SI;
1946  }
1947  for (auto *Node : Range) {
1948  Function *F = SymbolMap.lookup(Node->Name);
1949  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1950  FunctionOrderList.push_back(F);
1951  }
1952  ++CGI;
1953  }
1954  } else {
1956  while (!CGI.isAtEnd()) {
1957  for (CallGraphNode *Node : *CGI) {
1958  auto *F = Node->getFunction();
1959  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1960  FunctionOrderList.push_back(F);
1961  }
1962  ++CGI;
1963  }
1964  }
1965 
1966  LLVM_DEBUG({
1967  dbgs() << "Function processing order:\n";
1968  for (auto F : reverse(FunctionOrderList)) {
1969  dbgs() << F->getName() << "\n";
1970  }
1971  });
1972 
1973  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1974  return FunctionOrderList;
1975 }
1976 
1977 bool SampleProfileLoader::doInitialization(Module &M,
1979  auto &Ctx = M.getContext();
1980 
1981  auto ReaderOrErr = SampleProfileReader::create(
1982  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1983  if (std::error_code EC = ReaderOrErr.getError()) {
1984  std::string Msg = "Could not open profile: " + EC.message();
1985  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1986  return false;
1987  }
1988  Reader = std::move(ReaderOrErr.get());
1990  // set module before reading the profile so reader may be able to only
1991  // read the function profiles which are used by the current module.
1992  Reader->setModule(&M);
1993  if (std::error_code EC = Reader->read()) {
1994  std::string Msg = "profile reading failed: " + EC.message();
1995  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1996  return false;
1997  }
1998 
1999  PSL = Reader->getProfileSymbolList();
2000 
2001  // While profile-sample-accurate is on, ignore symbol list.
2002  ProfAccForSymsInList =
2004  if (ProfAccForSymsInList) {
2005  NamesInProfile.clear();
2006  if (auto NameTable = Reader->getNameTable())
2007  NamesInProfile.insert(NameTable->begin(), NameTable->end());
2008  CoverageTracker.setProfAccForSymsInList(true);
2009  }
2010 
2011  if (FAM && !ProfileInlineReplayFile.empty()) {
2012  ExternalInlineAdvisor = getReplayInlineAdvisor(
2013  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
2018  /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2019  }
2020 
2021  // Apply tweaks if context-sensitive or probe-based profile is available.
2022  if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2023  Reader->profileIsProbeBased()) {
2025  UseIterativeBFIInference = true;
2027  SampleProfileUseProfi = true;
2030  // Enable priority-base inliner and size inline by default for CSSPGO.
2032  ProfileSizeInline = true;
2035  // For CSSPGO, we also allow recursive inline to best use context profile.
2037  AllowRecursiveInline = true;
2038 
2039  if (Reader->profileIsPreInlined()) {
2041  UsePreInlinerDecision = true;
2042  }
2043 
2044  if (!Reader->profileIsCS()) {
2045  // Non-CS profile should be fine without a function size budget for the
2046  // inliner since the contexts in the profile are either all from inlining
2047  // in the prevoius build or pre-computed by the preinliner with a size
2048  // cap, thus they are bounded.
2049  if (!ProfileInlineLimitMin.getNumOccurrences())
2051  if (!ProfileInlineLimitMax.getNumOccurrences())
2053  }
2054  }
2055 
2056  if (Reader->profileIsCS()) {
2057  // Tracker for profiles under different context
2058  ContextTracker = std::make_unique<SampleContextTracker>(
2059  Reader->getProfiles(), &GUIDToFuncNameMap);
2060  }
2061 
2062  // Load pseudo probe descriptors for probe-based function samples.
2063  if (Reader->profileIsProbeBased()) {
2064  ProbeManager = std::make_unique<PseudoProbeManager>(M);
2065  if (!ProbeManager->moduleIsProbed(M)) {
2066  const char *Msg =
2067  "Pseudo-probe-based profile requires SampleProfileProbePass";
2068  Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2069  DS_Warning));
2070  return false;
2071  }
2072  }
2073 
2074  return true;
2075 }
2076 
2078  return new SampleProfileLoaderLegacyPass();
2079 }
2080 
2082  return new SampleProfileLoaderLegacyPass(Name);
2083 }
2084 
2085 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
2086  ProfileSummaryInfo *_PSI, CallGraph *CG) {
2087  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2088 
2089  PSI = _PSI;
2090  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2091  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2093  PSI->refresh();
2094  }
2095  // Compute the total number of samples collected in this profile.
2096  for (const auto &I : Reader->getProfiles())
2097  TotalCollectedSamples += I.second.getTotalSamples();
2098 
2099  auto Remapper = Reader->getRemapper();
2100  // Populate the symbol map.
2101  for (const auto &N_F : M.getValueSymbolTable()) {
2102  StringRef OrigName = N_F.getKey();
2103  Function *F = dyn_cast<Function>(N_F.getValue());
2104  if (F == nullptr || OrigName.empty())
2105  continue;
2106  SymbolMap[OrigName] = F;
2108  if (OrigName != NewName && !NewName.empty()) {
2109  auto r = SymbolMap.insert(std::make_pair(NewName, F));
2110  // Failiing to insert means there is already an entry in SymbolMap,
2111  // thus there are multiple functions that are mapped to the same
2112  // stripped name. In this case of name conflicting, set the value
2113  // to nullptr to avoid confusion.
2114  if (!r.second)
2115  r.first->second = nullptr;
2116  OrigName = NewName;
2117  }
2118  // Insert the remapped names into SymbolMap.
2119  if (Remapper) {
2120  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2121  if (*MapName != OrigName && !MapName->empty())
2122  SymbolMap.insert(std::make_pair(*MapName, F));
2123  }
2124  }
2125  }
2126  assert(SymbolMap.count(StringRef()) == 0 &&
2127  "No empty StringRef should be added in SymbolMap");
2128 
2129  bool retval = false;
2130  for (auto F : buildFunctionOrder(M, CG)) {
2131  assert(!F->isDeclaration());
2132  clearFunctionData();
2133  retval |= runOnFunction(*F, AM);
2134  }
2135 
2136  // Account for cold calls not inlined....
2138  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2139  notInlinedCallInfo)
2140  updateProfileCallee(pair.first, pair.second.entryCount);
2141 
2142  return retval;
2143 }
2144 
2145 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
2146  ACT = &getAnalysis<AssumptionCacheTracker>();
2147  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
2148  TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
2149  ProfileSummaryInfo *PSI =
2150  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2151  return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
2152 }
2153 
2155  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2156  DILocation2SampleMap.clear();
2157  // By default the entry count is initialized to -1, which will be treated
2158  // conservatively by getEntryCount as the same as unknown (None). This is
2159  // to avoid newly added code to be treated as cold. If we have samples
2160  // this will be overwritten in emitAnnotations.
2161  uint64_t initialEntryCount = -1;
2162 
2163  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2164  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2165  // initialize all the function entry counts to 0. It means all the
2166  // functions without profile will be regarded as cold.
2167  initialEntryCount = 0;
2168  // profile-sample-accurate is a user assertion which has a higher precedence
2169  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2170  ProfAccForSymsInList = false;
2171  }
2172  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2173 
2174  // PSL -- profile symbol list include all the symbols in sampled binary.
2175  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2176  // old functions without samples being cold, without having to worry
2177  // about new and hot functions being mistakenly treated as cold.
2178  if (ProfAccForSymsInList) {
2179  // Initialize the entry count to 0 for functions in the list.
2180  if (PSL->contains(F.getName()))
2181  initialEntryCount = 0;
2182 
2183  // Function in the symbol list but without sample will be regarded as
2184  // cold. To minimize the potential negative performance impact it could
2185  // have, we want to be a little conservative here saying if a function
2186  // shows up in the profile, no matter as outline function, inline instance
2187  // or call targets, treat the function as not being cold. This will handle
2188  // the cases such as most callsites of a function are inlined in sampled
2189  // binary but not inlined in current build (because of source code drift,
2190  // imprecise debug information, or the callsites are all cold individually
2191  // but not cold accumulatively...), so the outline function showing up as
2192  // cold in sampled binary will actually not be cold after current build.
2194  if (NamesInProfile.count(CanonName))
2195  initialEntryCount = -1;
2196  }
2197 
2198  // Initialize entry count when the function has no existing entry
2199  // count value.
2200  if (!F.getEntryCount())
2201  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2202  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2203  if (AM) {
2204  auto &FAM =
2206  .getManager();
2208  } else {
2209  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2210  ORE = OwnedORE.get();
2211  }
2212 
2214  Samples = ContextTracker->getBaseSamplesFor(F);
2215  else
2216  Samples = Reader->getSamplesFor(F);
2217 
2218  if (Samples && !Samples->empty())
2219  return emitAnnotations(F);
2220  return false;
2221 }
2222 
2224  ModuleAnalysisManager &AM) {
2227 
2228  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2229  return FAM.getResult<AssumptionAnalysis>(F);
2230  };
2231  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2232  return FAM.getResult<TargetIRAnalysis>(F);
2233  };
2234  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2236  };
2237 
2238  SampleProfileLoader SampleLoader(
2239  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2240  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2241  : ProfileRemappingFileName,
2242  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2243 
2244  if (!SampleLoader.doInitialization(M, &FAM))
2245  return PreservedAnalyses::all();
2246 
2249  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2250  return PreservedAnalyses::all();
2251 
2252  return PreservedAnalyses::none();
2253 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AnnotateSampleProfileInlinePhase
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
Instrumentation.h
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:129
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:920
llvm::InlineCost::getCost
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:135
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2479
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:2223
EnableExtTspBlockPlacement
cl::opt< bool > EnableExtTspBlockPlacement
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
AllowRecursiveInline
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
llvm::sampleprof::ContextDuplicatedIntoBase
@ ContextDuplicatedIntoBase
Definition: SampleProf.h:443
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:1112
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:304
llvm::sampleprof::FunctionSamples::ProfileIsCS
static bool ProfileIsCS
Definition: SampleProf.h:1114
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:471
llvm::sampleprof::SampleContext::hasAttribute
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:590
llvm::Function
Definition: Function.h:60
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:96
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:81
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1571
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ProfileICPRelativeHotnessSkip
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:304
llvm::emitInlinedIntoBasedOnCost
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
Definition: InlineAdvisor.cpp:500
llvm::createSampleProfileLoaderPass
ModulePass * createSampleProfileLoaderPass()
Definition: SampleProfile.cpp:2077
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1557
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::sampleprof::ContextShouldBeInlined
@ ContextShouldBeInlined
Definition: SampleProf.h:442
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:849
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:117
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:190
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::PseudoProbe::Factor
float Factor
Definition: PseudoProbe.h:80
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:981
llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:34
llvm::CallSiteFormat::Format::LineDiscriminator
@ LineDiscriminator
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:1011
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:712
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::initializeSampleProfileLoaderLegacyPassPass
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
llvm::InlineCost::isNever
bool isNever() const
Definition: InlineCost.h:130
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:497
llvm::scc_member_iterator
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
Definition: SCCIterator.h:252
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
ProfileInlineReplayFallback
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
llvm::ReplayInlinerSettings::Fallback::Original
@ Original
ProfileSampleBlockAccurate
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
UsePreInlinerDecision
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1366
llvm::InlineParams::ComputeFullInlineCost
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:217
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::PseudoProbe::Id
uint32_t Id
Definition: PseudoProbe.h:74
llvm::sampleprof::FunctionSamples::SetContextSynthetic
void SetContextSynthetic()
Definition: SampleProf.h:817
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:777
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:1014
BlockFrequencyInfoImpl.h
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1460
GlobalValue.h
DisableSampleLoaderInlining
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:264
llvm::sampleprof::SampleProfileReader::profileIsCS
bool profileIsCS() const
Whether input profile is fully context-sensitive.
Definition: SampleProfReader.h:474
SortProfiledSCC
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2803
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:80
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::sampleprof::ProfiledCallGraph
Definition: ProfiledCallGraph.h:62
llvm::sampleprof::SyntheticContext
@ SyntheticContext
Definition: SampleProf.h:433
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:87
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1607
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:141
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:113
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:1138
SampleProf.h
InlineAdvisor.h
ProfileInlineReplayFormat
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:95
llvm::CallSiteFormat::Format::LineColumnDiscriminator
@ LineColumnDiscriminator
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
InstrProf.h
MDBuilder.h
llvm::StringMap< NoneType, MallocAllocator >::clear
void clear()
Definition: StringMap.h:348
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ReplayInlinerSettings::Fallback::NeverInline
@ NeverInline
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:65
DebugLoc.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:248
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:112
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3081
SampleProfileLoaderBaseUtil.h
StringMap.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::CallSiteFormat::Format::LineColumn
@ LineColumn
llvm::InlineContext
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:485
llvm::sampleprof::SampleContext
Definition: SampleProf.h:501
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:110
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:232
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:882
ValueSymbolTable.h
SampleProfile.h
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:427
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:685
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
ProfiledCallGraph.h
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
ProfileSummaryInfo.h
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:620
llvm::sampleprof::FunctionSamples::getEntrySamples
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:896
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:395
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:46
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:720
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::DenseMap
Definition: DenseMap.h:716
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:160
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:407
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::ReplayInlinerSettings::Scope::Module
@ Module
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1675
llvm::X86AS::FS
@ FS
Definition: X86.h:192
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:306
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1123
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:97
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1787
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SampleProfileProbe.h
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:121
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:493
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:291
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::CallSiteFormat::Format::Line
@ Line
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:479
uint32_t
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:660
CallPromotionUtils.h
Profile
Load MIR Sample Profile
Definition: MIRSampleProfile.cpp:70
llvm::ContextTrieNode
Definition: SampleContextTracker.h:33
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:781
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:655
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:994
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:33
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:463
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::InlinePass::ReplaySampleProfileInliner
@ ReplaySampleProfileInliner
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::sampleprof::SampleProfileReader::getProfiles
SampleProfileMap & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:438
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:243
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
profile
sample profile
Definition: SampleProfile.cpp:1837
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:851
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:405
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
ProfileInlineReplayScope
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:559
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:68
DiagnosticInfo.h
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1562
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:199
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:342
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
PseudoProbe.h
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:941
llvm::cl::value_desc
Definition: CommandLine.h:414
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::StringMap< NoneType, MallocAllocator >::count
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:246
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:499
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:101
llvm::sampleprof::SampleProfileReader::profileIsPreInlined
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
Definition: SampleProfReader.h:477
llvm::extractProbe
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:48
llvm::InlinePass::SampleProfileInliner
@ SampleProfileInliner
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:1118
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:244
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:112
CallGraph.h
llvm::AnnotateInlinePassName
std::string AnnotateInlinePassName(InlineContext IC)
Definition: InlineAdvisor.cpp:577
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:24
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1022
Instructions.h
loader
sample Sample Profile loader
Definition: SampleProfile.cpp:1838
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:398
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1610
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:224
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
llvm::ReplayInlinerSettings::Scope::Function
@ Function
TargetTransformInfo.h
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1748
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:937
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1039
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AttributeFuncs::mergeAttributesForInlining
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Definition: Attributes.cpp:2050
llvm::ReplayInlinerSettings::Fallback::AlwaysInline
@ AlwaysInline
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:223
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::ReplayInlinerSettings
Replay Inliner Setup.
Definition: ReplayInlineAdvisor.h:43
ProfileICPRelativeHotness
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::getReplayInlineAdvisor
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
Definition: ReplayInlineAdvisor.cpp:80
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:812
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:223
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:792