LLVM  14.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SCCIterator.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringMap.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/Twine.h"
44 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/CFG.h"
54 #include "llvm/IR/DebugLoc.h"
55 #include "llvm/IR/DiagnosticInfo.h"
56 #include "llvm/IR/Dominators.h"
57 #include "llvm/IR/Function.h"
58 #include "llvm/IR/GlobalValue.h"
59 #include "llvm/IR/InstrTypes.h"
60 #include "llvm/IR/Instruction.h"
61 #include "llvm/IR/Instructions.h"
62 #include "llvm/IR/IntrinsicInst.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/MDBuilder.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/PassManager.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorOr.h"
80 #include "llvm/Transforms/IPO.h"
90 #include <algorithm>
91 #include <cassert>
92 #include <cstdint>
93 #include <functional>
94 #include <limits>
95 #include <map>
96 #include <memory>
97 #include <queue>
98 #include <string>
99 #include <system_error>
100 #include <utility>
101 #include <vector>
102 
103 using namespace llvm;
104 using namespace sampleprof;
105 using namespace llvm::sampleprofutil;
107 #define DEBUG_TYPE "sample-profile"
108 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
109 
110 STATISTIC(NumCSInlined,
111  "Number of functions inlined with context sensitive profile");
112 STATISTIC(NumCSNotInlined,
113  "Number of functions not inlined with context sensitive profile");
114 STATISTIC(NumMismatchedProfile,
115  "Number of functions with CFG mismatched profile");
116 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
117 STATISTIC(NumDuplicatedInlinesite,
118  "Number of inlined callsites with a partial distribution factor");
119 
120 STATISTIC(NumCSInlinedHitMinLimit,
121  "Number of functions with FDO inline stopped due to min size limit");
122 STATISTIC(NumCSInlinedHitMaxLimit,
123  "Number of functions with FDO inline stopped due to max size limit");
124 STATISTIC(
125  NumCSInlinedHitGrowthLimit,
126  "Number of functions with FDO inline stopped due to growth size limit");
127 
128 // Command line option to specify the file to read samples from. This is
129 // mainly used for debugging.
131  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
132  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
133 
134 // The named file contains a set of transformations that may have been applied
135 // to the symbol names between the program from which the sample data was
136 // collected and the current program's symbols.
138  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
139  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
140 
142  "profile-sample-accurate", cl::Hidden, cl::init(false),
143  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
144  "callsite and function as having 0 samples. Otherwise, treat "
145  "un-sampled callsites and functions conservatively as unknown. "));
146 
148  "profile-sample-block-accurate", cl::Hidden, cl::init(false),
149  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
150  "branches and calls as having 0 samples. Otherwise, treat "
151  "them conservatively as unknown. "));
152 
154  "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
155  cl::init(true),
156  cl::desc("For symbols in profile symbol list, regard their profiles to "
157  "be accurate. It may be overriden by profile-sample-accurate. "));
158 
160  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
161  cl::desc("Merge past inlinee's profile to outline version if sample "
162  "profile loader decided not to inline a call site. It will "
163  "only be enabled when top-down order of profile loading is "
164  "enabled. "));
165 
167  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
168  cl::desc("Do profile annotation and inlining for functions in top-down "
169  "order of call graph during sample profile loading. It only "
170  "works for new pass manager. "));
171 
172 static cl::opt<bool>
173  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
174  cl::desc("Process functions in a top-down order "
175  "defined by the profiled call graph when "
176  "-sample-profile-top-down-load is on."));
178  SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
179  cl::desc("Sort profiled recursion by edge weights."));
180 
182  "sample-profile-inline-size", cl::Hidden, cl::init(false),
183  cl::desc("Inline cold call sites in profile loader if it's beneficial "
184  "for code size."));
185 
187  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
188  cl::desc("The size growth ratio limit for proirity-based sample profile "
189  "loader inlining."));
190 
192  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
193  cl::desc("The lower bound of size growth limit for "
194  "proirity-based sample profile loader inlining."));
195 
197  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
198  cl::desc("The upper bound of size growth limit for "
199  "proirity-based sample profile loader inlining."));
200 
202  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
203  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
204  "inlining."));
205 
207  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
208  cl::desc("Threshold for inlining cold callsites"));
209 
211  "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
212  cl::desc(
213  "Relative hotness percentage threshold for indirect "
214  "call promotion in proirity-based sample profile loader inlining."));
215 
217  "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
218  cl::desc(
219  "Skip relative hotness check for ICP up to given number of targets."));
220 
222  "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
223  cl::init(false),
224  cl::desc("Use call site prioritized inlining for sample profile loader."
225  "Currently only CSSPGO is supported."));
226 
228  "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
229  cl::init(false),
230  cl::desc("Use the preinliner decisions stored in profile context."));
231 
233  "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
234  cl::init(false),
235  cl::desc("Allow sample loader inliner to inline recursive calls."));
236 
238  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
239  cl::desc(
240  "Optimization remarks file containing inline remarks to be replayed "
241  "by inlining from sample profile loader."),
242  cl::Hidden);
243 
245  "sample-profile-inline-replay-scope",
248  "Replay on functions that have remarks associated "
249  "with them (default)"),
251  "Replay on the entire module")),
252  cl::desc("Whether inline replay should be applied to the entire "
253  "Module or just the Functions (default) that are present as "
254  "callers in remarks during sample profile inlining."),
255  cl::Hidden);
256 
258  "sample-profile-inline-replay-fallback",
260  cl::values(
261  clEnumValN(
263  "All decisions not in replay send to original advisor (default)"),
265  "AlwaysInline", "All decisions not in replay are inlined"),
267  "All decisions not in replay are not inlined")),
268  cl::desc("How sample profile inline replay treats sites that don't come "
269  "from the replay. Original: defers to original advisor, "
270  "AlwaysInline: inline all sites not in replay, NeverInline: "
271  "inline no sites not in replay"),
272  cl::Hidden);
273 
275  "sample-profile-inline-replay-format",
277  cl::values(
278  clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
280  "<Line Number>:<Column Number>"),
282  "LineDiscriminator", "<Line Number>.<Discriminator>"),
284  "LineColumnDiscriminator",
285  "<Line Number>:<Column Number>.<Discriminator> (default)")),
286  cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
287 
288 static cl::opt<unsigned>
289  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
291  cl::desc("Max number of promotions for a single indirect "
292  "call callsite in sample profile loader"));
293 
295  "overwrite-existing-weights", cl::Hidden, cl::init(false),
296  cl::desc("Ignore existing branch weights on IR and always overwrite."));
297 
298 namespace {
299 
300 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
301 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
302 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
303 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
304 using BlockEdgeMap =
306 
307 class GUIDToFuncNameMapper {
308 public:
309  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
310  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
311  : CurrentReader(Reader), CurrentModule(M),
312  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
313  if (!CurrentReader.useMD5())
314  return;
315 
316  for (const auto &F : CurrentModule) {
317  StringRef OrigName = F.getName();
318  CurrentGUIDToFuncNameMap.insert(
319  {Function::getGUID(OrigName), OrigName});
320 
321  // Local to global var promotion used by optimization like thinlto
322  // will rename the var and add suffix like ".llvm.xxx" to the
323  // original local name. In sample profile, the suffixes of function
324  // names are all stripped. Since it is possible that the mapper is
325  // built in post-thin-link phase and var promotion has been done,
326  // we need to add the substring of function name without the suffix
327  // into the GUIDToFuncNameMap.
329  if (CanonName != OrigName)
330  CurrentGUIDToFuncNameMap.insert(
331  {Function::getGUID(CanonName), CanonName});
332  }
333 
334  // Update GUIDToFuncNameMap for each function including inlinees.
335  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
336  }
337 
338  ~GUIDToFuncNameMapper() {
339  if (!CurrentReader.useMD5())
340  return;
341 
342  CurrentGUIDToFuncNameMap.clear();
343 
344  // Reset GUIDToFuncNameMap for of each function as they're no
345  // longer valid at this point.
346  SetGUIDToFuncNameMapForAll(nullptr);
347  }
348 
349 private:
350  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
351  std::queue<FunctionSamples *> FSToUpdate;
352  for (auto &IFS : CurrentReader.getProfiles()) {
353  FSToUpdate.push(&IFS.second);
354  }
355 
356  while (!FSToUpdate.empty()) {
357  FunctionSamples *FS = FSToUpdate.front();
358  FSToUpdate.pop();
359  FS->GUIDToFuncNameMap = Map;
360  for (const auto &ICS : FS->getCallsiteSamples()) {
361  const FunctionSamplesMap &FSMap = ICS.second;
362  for (auto &IFS : FSMap) {
363  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
364  FSToUpdate.push(&FS);
365  }
366  }
367  }
368  }
369 
370  SampleProfileReader &CurrentReader;
371  Module &CurrentModule;
372  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
373 };
374 
375 // Inline candidate used by iterative callsite prioritized inliner
376 struct InlineCandidate {
377  CallBase *CallInstr;
378  const FunctionSamples *CalleeSamples;
379  // Prorated callsite count, which will be used to guide inlining. For example,
380  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
381  // copies will get their own distribution factors and their prorated counts
382  // will be used to decide if they should be inlined independently.
383  uint64_t CallsiteCount;
384  // Call site distribution factor to prorate the profile samples for a
385  // duplicated callsite. Default value is 1.0.
386  float CallsiteDistribution;
387 };
388 
389 // Inline candidate comparer using call site weight
390 struct CandidateComparer {
391  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
392  if (LHS.CallsiteCount != RHS.CallsiteCount)
393  return LHS.CallsiteCount < RHS.CallsiteCount;
394 
395  const FunctionSamples *LCS = LHS.CalleeSamples;
396  const FunctionSamples *RCS = RHS.CalleeSamples;
397  assert(LCS && RCS && "Expect non-null FunctionSamples");
398 
399  // Tie breaker using number of samples try to favor smaller functions first
400  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
401  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
402 
403  // Tie breaker using GUID so we have stable/deterministic inlining order
404  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
405  }
406 };
407 
408 using CandidateQueue =
410  CandidateComparer>;
411 
412 /// Sample profile pass.
413 ///
414 /// This pass reads profile data from the file specified by
415 /// -sample-profile-file and annotates every affected function with the
416 /// profile information found in that file.
417 class SampleProfileLoader final
418  : public SampleProfileLoaderBaseImpl<BasicBlock> {
419 public:
420  SampleProfileLoader(
421  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
422  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
423  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
424  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
426  GetAC(std::move(GetAssumptionCache)),
427  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
428  LTOPhase(LTOPhase) {}
429 
430  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
431  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
432  ProfileSummaryInfo *_PSI, CallGraph *CG);
433 
434 protected:
436  bool emitAnnotations(Function &F);
437  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
438  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
439  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
440  const FunctionSamples *
441  findFunctionSamples(const Instruction &I) const override;
442  std::vector<const FunctionSamples *>
443  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
444  void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
445  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
448  // Attempt to promote indirect call and also inline the promoted call
449  bool tryPromoteAndInlineCandidate(
450  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
451  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
452 
453  bool inlineHotFunctions(Function &F,
454  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
455  Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
456  bool getExternalInlineAdvisorShouldInline(CallBase &CB);
457  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
458  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
459  bool
460  tryInlineCandidate(InlineCandidate &Candidate,
461  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
462  bool
463  inlineHotFunctionsWithPriority(Function &F,
464  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
465  // Inline cold/small functions in addition to hot ones
466  bool shouldInlineColdCallee(CallBase &CallInst);
467  void emitOptimizationRemarksForInlineCandidates(
468  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
469  bool Hot);
470  void promoteMergeNotInlinedContextSamples(
472  const Function &F);
473  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
474  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
475  void generateMDProfMetadata(Function &F);
476 
477  /// Map from function name to Function *. Used to find the function from
478  /// the function name. If the function name contains suffix, additional
479  /// entry is added to map from the stripped name to the function if there
480  /// is one-to-one mapping.
482 
485  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
486 
487  /// Profile tracker for different context.
488  std::unique_ptr<SampleContextTracker> ContextTracker;
489 
490  /// Flag indicating whether input profile is context-sensitive
491  bool ProfileIsCSFlat = false;
492 
493  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
494  ///
495  /// We need to know the LTO phase because for example in ThinLTOPrelink
496  /// phase, in annotation, we should not promote indirect calls. Instead,
497  /// we will mark GUIDs that needs to be annotated to the function.
498  ThinOrFullLTOPhase LTOPhase;
499 
500  /// Profle Symbol list tells whether a function name appears in the binary
501  /// used to generate the current profile.
502  std::unique_ptr<ProfileSymbolList> PSL;
503 
504  /// Total number of samples collected in this profile.
505  ///
506  /// This is the sum of all the samples collected in all the functions executed
507  /// at runtime.
508  uint64_t TotalCollectedSamples = 0;
509 
510  // Information recorded when we declined to inline a call site
511  // because we have determined it is too cold is accumulated for
512  // each callee function. Initially this is just the entry count.
513  struct NotInlinedProfileInfo {
514  uint64_t entryCount;
515  };
517 
518  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
519  // all the function symbols defined or declared in current module.
520  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
521 
522  // All the Names used in FunctionSamples including outline function
523  // names, inline instance names and call target names.
524  StringSet<> NamesInProfile;
525 
526  // For symbol in profile symbol list, whether to regard their profiles
527  // to be accurate. It is mainly decided by existance of profile symbol
528  // list and -profile-accurate-for-symsinlist flag, but it can be
529  // overriden by -profile-sample-accurate or profile-sample-accurate
530  // attribute.
531  bool ProfAccForSymsInList;
532 
533  // External inline advisor used to replay inline decision from remarks.
534  std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
535 
536  // A pseudo probe helper to correlate the imported sample counts.
537  std::unique_ptr<PseudoProbeManager> ProbeManager;
538 };
539 
540 class SampleProfileLoaderLegacyPass : public ModulePass {
541 public:
542  // Class identification, replacement for typeinfo
543  static char ID;
544 
545  SampleProfileLoaderLegacyPass(
548  : ModulePass(ID), SampleLoader(
549  Name, SampleProfileRemappingFile, LTOPhase,
550  [&](Function &F) -> AssumptionCache & {
551  return ACT->getAssumptionCache(F);
552  },
553  [&](Function &F) -> TargetTransformInfo & {
554  return TTIWP->getTTI(F);
555  },
556  [&](Function &F) -> TargetLibraryInfo & {
557  return TLIWP->getTLI(F);
558  }) {
561  }
562 
563  void dump() { SampleLoader.dump(); }
564 
565  bool doInitialization(Module &M) override {
566  return SampleLoader.doInitialization(M);
567  }
568 
569  StringRef getPassName() const override { return "Sample profile pass"; }
570  bool runOnModule(Module &M) override;
571 
572  void getAnalysisUsage(AnalysisUsage &AU) const override {
577  }
578 
579 private:
580  SampleProfileLoader SampleLoader;
581  AssumptionCacheTracker *ACT = nullptr;
582  TargetTransformInfoWrapperPass *TTIWP = nullptr;
583  TargetLibraryInfoWrapperPass *TLIWP = nullptr;
584 };
585 
586 } // end anonymous namespace
587 
588 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
590  return getProbeWeight(Inst);
591 
592  const DebugLoc &DLoc = Inst.getDebugLoc();
593  if (!DLoc)
594  return std::error_code();
595 
596  // Ignore all intrinsics, phinodes and branch instructions.
597  // Branch and phinodes instruction usually contains debug info from sources
598  // outside of the residing basic block, thus we ignore them during annotation.
599  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
600  return std::error_code();
601 
602  // For non-CS profile, if a direct call/invoke instruction is inlined in
603  // profile (findCalleeFunctionSamples returns non-empty result), but not
604  // inlined here, it means that the inlined callsite has no sample, thus the
605  // call instruction should have 0 count.
606  // For CS profile, the callsite count of previously inlined callees is
607  // populated with the entry count of the callees.
608  if (!ProfileIsCSFlat)
609  if (const auto *CB = dyn_cast<CallBase>(&Inst))
610  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
611  return 0;
612 
613  return getInstWeightImpl(Inst);
614 }
615 
616 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
617 // of non-probe instruction. So if all instructions of the BB give error_code,
618 // tell the inference algorithm to infer the BB weight.
619 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
621  "Profile is not pseudo probe based");
622  Optional<PseudoProbe> Probe = extractProbe(Inst);
623  // Ignore the non-probe instruction. If none of the instruction in the BB is
624  // probe, we choose to infer the BB's weight.
625  if (!Probe)
626  return std::error_code();
627 
628  const FunctionSamples *FS = findFunctionSamples(Inst);
629  // If none of the instruction has FunctionSample, we choose to return zero
630  // value sample to indicate the BB is cold. This could happen when the
631  // instruction is from inlinee and no profile data is found.
632  // FIXME: This should not be affected by the source drift issue as 1) if the
633  // newly added function is top-level inliner, it won't match the CFG checksum
634  // in the function profile or 2) if it's the inlinee, the inlinee should have
635  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
636  // we can improve it by adding a switch for profile-sample-block-accurate for
637  // block level counts in the future.
638  if (!FS)
639  return 0;
640 
641  // For non-CS profile, If a direct call/invoke instruction is inlined in
642  // profile (findCalleeFunctionSamples returns non-empty result), but not
643  // inlined here, it means that the inlined callsite has no sample, thus the
644  // call instruction should have 0 count.
645  // For CS profile, the callsite count of previously inlined callees is
646  // populated with the entry count of the callees.
647  if (!ProfileIsCSFlat)
648  if (const auto *CB = dyn_cast<CallBase>(&Inst))
649  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
650  return 0;
651 
652  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
653  if (R) {
654  uint64_t Samples = R.get() * Probe->Factor;
655  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
656  if (FirstMark) {
657  ORE->emit([&]() {
658  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
659  Remark << "Applied " << ore::NV("NumSamples", Samples);
660  Remark << " samples from profile (ProbeId=";
661  Remark << ore::NV("ProbeId", Probe->Id);
662  Remark << ", Factor=";
663  Remark << ore::NV("Factor", Probe->Factor);
664  Remark << ", OriginalSamples=";
665  Remark << ore::NV("OriginalSamples", R.get());
666  Remark << ")";
667  return Remark;
668  });
669  }
670  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
671  << " - weight: " << R.get() << " - factor: "
672  << format("%0.2f", Probe->Factor) << ")\n");
673  return Samples;
674  }
675  return R;
676 }
677 
678 /// Get the FunctionSamples for a call instruction.
679 ///
680 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
681 /// instance in which that call instruction is calling to. It contains
682 /// all samples that resides in the inlined instance. We first find the
683 /// inlined instance in which the call instruction is from, then we
684 /// traverse its children to find the callsite with the matching
685 /// location.
686 ///
687 /// \param Inst Call/Invoke instruction to query.
688 ///
689 /// \returns The FunctionSamples pointer to the inlined instance.
690 const FunctionSamples *
691 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
692  const DILocation *DIL = Inst.getDebugLoc();
693  if (!DIL) {
694  return nullptr;
695  }
696 
697  StringRef CalleeName;
698  if (Function *Callee = Inst.getCalledFunction())
699  CalleeName = Callee->getName();
700 
701  if (ProfileIsCSFlat)
702  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
703 
704  const FunctionSamples *FS = findFunctionSamples(Inst);
705  if (FS == nullptr)
706  return nullptr;
707 
708  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
709  CalleeName, Reader->getRemapper());
710 }
711 
712 /// Returns a vector of FunctionSamples that are the indirect call targets
713 /// of \p Inst. The vector is sorted by the total number of samples. Stores
714 /// the total call count of the indirect call in \p Sum.
715 std::vector<const FunctionSamples *>
716 SampleProfileLoader::findIndirectCallFunctionSamples(
717  const Instruction &Inst, uint64_t &Sum) const {
718  const DILocation *DIL = Inst.getDebugLoc();
719  std::vector<const FunctionSamples *> R;
720 
721  if (!DIL) {
722  return R;
723  }
724 
725  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
726  assert(L && R && "Expect non-null FunctionSamples");
727  if (L->getEntrySamples() != R->getEntrySamples())
728  return L->getEntrySamples() > R->getEntrySamples();
729  return FunctionSamples::getGUID(L->getName()) <
730  FunctionSamples::getGUID(R->getName());
731  };
732 
733  if (ProfileIsCSFlat) {
734  auto CalleeSamples =
735  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
736  if (CalleeSamples.empty())
737  return R;
738 
739  // For CSSPGO, we only use target context profile's entry count
740  // as that already includes both inlined callee and non-inlined ones..
741  Sum = 0;
742  for (const auto *const FS : CalleeSamples) {
743  Sum += FS->getEntrySamples();
744  R.push_back(FS);
745  }
746  llvm::sort(R, FSCompare);
747  return R;
748  }
749 
750  const FunctionSamples *FS = findFunctionSamples(Inst);
751  if (FS == nullptr)
752  return R;
753 
755  auto T = FS->findCallTargetMapAt(CallSite);
756  Sum = 0;
757  if (T)
758  for (const auto &T_C : T.get())
759  Sum += T_C.second;
760  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
761  if (M->empty())
762  return R;
763  for (const auto &NameFS : *M) {
764  Sum += NameFS.second.getEntrySamples();
765  R.push_back(&NameFS.second);
766  }
767  llvm::sort(R, FSCompare);
768  }
769  return R;
770 }
771 
772 const FunctionSamples *
773 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
775  Optional<PseudoProbe> Probe = extractProbe(Inst);
776  if (!Probe)
777  return nullptr;
778  }
779 
780  const DILocation *DIL = Inst.getDebugLoc();
781  if (!DIL)
782  return Samples;
783 
784  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
785  if (it.second) {
786  if (ProfileIsCSFlat)
787  it.first->second = ContextTracker->getContextSamplesFor(DIL);
788  else
789  it.first->second =
790  Samples->findFunctionSamples(DIL, Reader->getRemapper());
791  }
792  return it.first->second;
793 }
794 
795 /// Check whether the indirect call promotion history of \p Inst allows
796 /// the promotion for \p Candidate.
797 /// If the profile count for the promotion candidate \p Candidate is
798 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
799 /// for \p Inst. If we already have at least MaxNumPromotions
800 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
801 /// cannot promote for \p Inst anymore.
802 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
803  uint32_t NumVals = 0;
804  uint64_t TotalCount = 0;
805  std::unique_ptr<InstrProfValueData[]> ValueData =
806  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
807  bool Valid =
808  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
809  ValueData.get(), NumVals, TotalCount, true);
810  // No valid value profile so no promoted targets have been recorded
811  // before. Ok to do ICP.
812  if (!Valid)
813  return true;
814 
815  unsigned NumPromoted = 0;
816  for (uint32_t I = 0; I < NumVals; I++) {
817  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
818  continue;
819 
820  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
821  // metadata, it means the candidate has been promoted for this
822  // indirect call.
823  if (ValueData[I].Value == Function::getGUID(Candidate))
824  return false;
825  NumPromoted++;
826  // If already have MaxNumPromotions promotion, don't do it anymore.
827  if (NumPromoted == MaxNumPromotions)
828  return false;
829  }
830  return true;
831 }
832 
833 /// Update indirect call target profile metadata for \p Inst.
834 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
835 /// If it is 0, it means updateIDTMetaData is used to mark a
836 /// certain target to be promoted already. If it is not zero,
837 /// we expect to use it to update the total count in the value profile.
838 static void
840  const SmallVectorImpl<InstrProfValueData> &CallTargets,
841  uint64_t Sum) {
842  uint32_t NumVals = 0;
843  // OldSum is the existing total count in the value profile data.
844  uint64_t OldSum = 0;
845  std::unique_ptr<InstrProfValueData[]> ValueData =
846  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
847  bool Valid =
848  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
849  ValueData.get(), NumVals, OldSum, true);
850 
851  DenseMap<uint64_t, uint64_t> ValueCountMap;
852  if (Sum == 0) {
853  assert((CallTargets.size() == 1 &&
854  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
855  "If sum is 0, assume only one element in CallTargets "
856  "with count being NOMORE_ICP_MAGICNUM");
857  // Initialize ValueCountMap with existing value profile data.
858  if (Valid) {
859  for (uint32_t I = 0; I < NumVals; I++)
860  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
861  }
862  auto Pair =
863  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
864  // If the target already exists in value profile, decrease the total
865  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
866  if (!Pair.second) {
867  OldSum -= Pair.first->second;
868  Pair.first->second = NOMORE_ICP_MAGICNUM;
869  }
870  Sum = OldSum;
871  } else {
872  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
873  // counts in the value profile.
874  if (Valid) {
875  for (uint32_t I = 0; I < NumVals; I++) {
876  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
877  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
878  }
879  }
880 
881  for (const auto &Data : CallTargets) {
882  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
883  if (Pair.second)
884  continue;
885  // The target represented by Data.Value has already been promoted.
886  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
887  // Sum by Data.Count.
888  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
889  Sum -= Data.Count;
890  }
891  }
892 
893  SmallVector<InstrProfValueData, 8> NewCallTargets;
894  for (const auto &ValueCount : ValueCountMap) {
895  NewCallTargets.emplace_back(
896  InstrProfValueData{ValueCount.first, ValueCount.second});
897  }
898 
899  llvm::sort(NewCallTargets,
900  [](const InstrProfValueData &L, const InstrProfValueData &R) {
901  if (L.Count != R.Count)
902  return L.Count > R.Count;
903  return L.Value > R.Value;
904  });
905 
906  uint32_t MaxMDCount =
907  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
908  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
909  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
910 }
911 
912 /// Attempt to promote indirect call and also inline the promoted call.
913 ///
914 /// \param F Caller function.
915 /// \param Candidate ICP and inline candidate.
916 /// \param SumOrigin Original sum of target counts for indirect call before
917 /// promoting given candidate.
918 /// \param Sum Prorated sum of remaining target counts for indirect call
919 /// after promoting given candidate.
920 /// \param InlinedCallSite Output vector for new call sites exposed after
921 /// inlining.
922 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
923  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
924  SmallVector<CallBase *, 8> *InlinedCallSite) {
925  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
926  auto R = SymbolMap.find(CalleeFunctionName);
927  if (R == SymbolMap.end() || !R->getValue())
928  return false;
929 
930  auto &CI = *Candidate.CallInstr;
931  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
932  return false;
933 
934  const char *Reason = "Callee function not available";
935  // R->getValue() != &F is to prevent promoting a recursive call.
936  // If it is a recursive call, we do not inline it as it could bloat
937  // the code exponentially. There is way to better handle this, e.g.
938  // clone the caller first, and inline the cloned caller if it is
939  // recursive. As llvm does not inline recursive calls, we will
940  // simply ignore it instead of handling it explicitly.
941  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
942  R->getValue()->hasFnAttribute("use-sample-profile") &&
943  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
944  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
945  // in the value profile metadata so the target won't be promoted again.
946  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
947  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
948  updateIDTMetaData(CI, SortedCallTargets, 0);
949 
950  auto *DI = &pgo::promoteIndirectCall(
951  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
952  if (DI) {
953  Sum -= Candidate.CallsiteCount;
954  // Do not prorate the indirect callsite distribution since the original
955  // distribution will be used to scale down non-promoted profile target
956  // counts later. By doing this we lose track of the real callsite count
957  // for the leftover indirect callsite as a trade off for accurate call
958  // target counts.
959  // TODO: Ideally we would have two separate factors, one for call site
960  // counts and one is used to prorate call target counts.
961  // Do not update the promoted direct callsite distribution at this
962  // point since the original distribution combined with the callee profile
963  // will be used to prorate callsites from the callee if inlined. Once not
964  // inlined, the direct callsite distribution should be prorated so that
965  // the it will reflect the real callsite counts.
966  Candidate.CallInstr = DI;
967  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
968  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
969  if (!Inlined) {
970  // Prorate the direct callsite distribution so that it reflects real
971  // callsite counts.
973  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
974  }
975  return Inlined;
976  }
977  }
978  } else {
979  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
980  << Candidate.CalleeSamples->getFuncName() << " because "
981  << Reason << "\n");
982  }
983  return false;
984 }
985 
986 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
987  if (!ProfileSizeInline)
988  return false;
989 
991  if (Callee == nullptr)
992  return false;
993 
994  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
995  GetAC, GetTLI);
996 
997  if (Cost.isNever())
998  return false;
999 
1000  if (Cost.isAlways())
1001  return true;
1002 
1003  return Cost.getCost() <= SampleColdCallSiteThreshold;
1004 }
1005 
1006 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1007  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1008  bool Hot) {
1009  for (auto I : Candidates) {
1010  Function *CalledFunction = I->getCalledFunction();
1011  if (CalledFunction) {
1012  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
1013  I->getDebugLoc(), I->getParent())
1014  << "previous inlining reattempted for "
1015  << (Hot ? "hotness: '" : "size: '")
1016  << ore::NV("Callee", CalledFunction) << "' into '"
1017  << ore::NV("Caller", &F) << "'");
1018  }
1019  }
1020 }
1021 
1022 void SampleProfileLoader::findExternalInlineCandidate(
1023  CallBase *CB, const FunctionSamples *Samples,
1024  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
1026 
1027  // If ExternalInlineAdvisor wants to inline an external function
1028  // make sure it's imported
1029  if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1030  // Samples may not exist for replayed function, if so
1031  // just add the direct GUID and move on
1032  if (!Samples) {
1033  InlinedGUIDs.insert(
1035  return;
1036  }
1037  // Otherwise, drop the threshold to import everything that we can
1038  Threshold = 0;
1039  }
1040 
1041  assert(Samples && "expect non-null caller profile");
1042 
1043  // For AutoFDO profile, retrieve candidate profiles by walking over
1044  // the nested inlinee profiles.
1045  if (!ProfileIsCSFlat) {
1046  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1047  return;
1048  }
1049 
1051  ContextTracker->getContextFor(Samples->getContext());
1052  std::queue<ContextTrieNode *> CalleeList;
1053  CalleeList.push(Caller);
1054  while (!CalleeList.empty()) {
1055  ContextTrieNode *Node = CalleeList.front();
1056  CalleeList.pop();
1057  FunctionSamples *CalleeSample = Node->getFunctionSamples();
1058  // For CSSPGO profile, retrieve candidate profile by walking over the
1059  // trie built for context profile. Note that also take call targets
1060  // even if callee doesn't have a corresponding context profile.
1061  if (!CalleeSample)
1062  continue;
1063 
1064  // If pre-inliner decision is used, honor that for importing as well.
1065  bool PreInline =
1068  if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
1069  continue;
1070 
1071  StringRef Name = CalleeSample->getFuncName();
1073  // Add to the import list only when it's defined out of module.
1074  if (!Func || Func->isDeclaration())
1075  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
1076 
1077  // Import hot CallTargets, which may not be available in IR because full
1078  // profile annotation cannot be done until backend compilation in ThinLTO.
1079  for (const auto &BS : CalleeSample->getBodySamples())
1080  for (const auto &TS : BS.second.getCallTargets())
1081  if (TS.getValue() > Threshold) {
1082  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
1083  const Function *Callee = SymbolMap.lookup(CalleeName);
1084  if (!Callee || Callee->isDeclaration())
1085  InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
1086  }
1087 
1088  // Import hot child context profile associted with callees. Note that this
1089  // may have some overlap with the call target loop above, but doing this
1090  // based child context profile again effectively allow us to use the max of
1091  // entry count and call target count to determine importing.
1092  for (auto &Child : Node->getAllChildContext()) {
1093  ContextTrieNode *CalleeNode = &Child.second;
1094  CalleeList.push(CalleeNode);
1095  }
1096  }
1097 }
1098 
1099 /// Iteratively inline hot callsites of a function.
1100 ///
1101 /// Iteratively traverse all callsites of the function \p F, and find if
1102 /// the corresponding inlined instance exists and is hot in profile. If
1103 /// it is hot enough, inline the callsites and adds new callsites of the
1104 /// callee into the caller. If the call is an indirect call, first promote
1105 /// it to direct call. Each indirect call is limited with a single target.
1106 ///
1107 /// \param F function to perform iterative inlining.
1108 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1109 /// inlined in the profiled binary.
1110 ///
1111 /// \returns True if there is any inline happened.
1112 bool SampleProfileLoader::inlineHotFunctions(
1113  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1114  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1115  // Profile symbol list is ignored when profile-sample-accurate is on.
1116  assert((!ProfAccForSymsInList ||
1118  !F.hasFnAttribute("profile-sample-accurate"))) &&
1119  "ProfAccForSymsInList should be false when profile-sample-accurate "
1120  "is enabled");
1121 
1122  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1123  bool Changed = false;
1124  bool LocalChanged = true;
1125  while (LocalChanged) {
1126  LocalChanged = false;
1128  for (auto &BB : F) {
1129  bool Hot = false;
1130  SmallVector<CallBase *, 10> AllCandidates;
1131  SmallVector<CallBase *, 10> ColdCandidates;
1132  for (auto &I : BB.getInstList()) {
1133  const FunctionSamples *FS = nullptr;
1134  if (auto *CB = dyn_cast<CallBase>(&I)) {
1135  if (!isa<IntrinsicInst>(I)) {
1136  if ((FS = findCalleeFunctionSamples(*CB))) {
1137  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1138  "GUIDToFuncNameMap has to be populated");
1139  AllCandidates.push_back(CB);
1140  if (FS->getEntrySamples() > 0 || ProfileIsCSFlat)
1141  LocalNotInlinedCallSites.try_emplace(CB, FS);
1142  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1143  Hot = true;
1144  else if (shouldInlineColdCallee(*CB))
1145  ColdCandidates.push_back(CB);
1146  } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1147  AllCandidates.push_back(CB);
1148  }
1149  }
1150  }
1151  }
1152  if (Hot || ExternalInlineAdvisor) {
1153  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1154  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1155  } else {
1156  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1157  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1158  }
1159  }
1160  for (CallBase *I : CIS) {
1161  Function *CalledFunction = I->getCalledFunction();
1162  InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1163  0 /* dummy count */,
1164  1.0 /* dummy distribution factor */};
1165  // Do not inline recursive calls.
1166  if (CalledFunction == &F)
1167  continue;
1168  if (I->isIndirectCall()) {
1169  uint64_t Sum;
1170  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1171  uint64_t SumOrigin = Sum;
1172  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1173  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1174  PSI->getOrCompHotCountThreshold());
1175  continue;
1176  }
1177  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1178  continue;
1179 
1180  Candidate = {I, FS, FS->getEntrySamples(), 1.0};
1181  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1182  LocalNotInlinedCallSites.erase(I);
1183  LocalChanged = true;
1184  }
1185  }
1186  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1187  !CalledFunction->isDeclaration()) {
1188  if (tryInlineCandidate(Candidate)) {
1189  LocalNotInlinedCallSites.erase(I);
1190  LocalChanged = true;
1191  }
1192  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1193  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1194  InlinedGUIDs, SymbolMap,
1195  PSI->getOrCompHotCountThreshold());
1196  }
1197  }
1198  Changed |= LocalChanged;
1199  }
1200 
1201  // For CS profile, profile for not inlined context will be merged when
1202  // base profile is being retrieved.
1204  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1205  return Changed;
1206 }
1207 
1208 bool SampleProfileLoader::tryInlineCandidate(
1209  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1210 
1211  CallBase &CB = *Candidate.CallInstr;
1212  Function *CalledFunction = CB.getCalledFunction();
1213  assert(CalledFunction && "Expect a callee with definition");
1214  DebugLoc DLoc = CB.getDebugLoc();
1215  BasicBlock *BB = CB.getParent();
1216 
1217  InlineCost Cost = shouldInlineCandidate(Candidate);
1218  if (Cost.isNever()) {
1219  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
1220  << "incompatible inlining");
1221  return false;
1222  }
1223 
1224  if (!Cost)
1225  return false;
1226 
1227  InlineFunctionInfo IFI(nullptr, GetAC);
1228  IFI.UpdateProfile = false;
1229  if (InlineFunction(CB, IFI).isSuccess()) {
1230  // Merge the attributes based on the inlining.
1232  *CalledFunction);
1233 
1234  // The call to InlineFunction erases I, so we can't pass it here.
1235  emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
1236  *BB->getParent(), Cost, true, CSINLINE_DEBUG);
1237 
1238  // Now populate the list of newly exposed call sites.
1239  if (InlinedCallSites) {
1240  InlinedCallSites->clear();
1241  for (auto &I : IFI.InlinedCallSites)
1242  InlinedCallSites->push_back(I);
1243  }
1244 
1245  if (ProfileIsCSFlat)
1246  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1247  ++NumCSInlined;
1248 
1249  // Prorate inlined probes for a duplicated inlining callsite which probably
1250  // has a distribution less than 100%. Samples for an inlinee should be
1251  // distributed among the copies of the original callsite based on each
1252  // callsite's distribution factor for counts accuracy. Note that an inlined
1253  // probe may come with its own distribution factor if it has been duplicated
1254  // in the inlinee body. The two factor are multiplied to reflect the
1255  // aggregation of duplication.
1256  if (Candidate.CallsiteDistribution < 1) {
1257  for (auto &I : IFI.InlinedCallSites) {
1258  if (Optional<PseudoProbe> Probe = extractProbe(*I))
1260  Candidate.CallsiteDistribution);
1261  }
1262  NumDuplicatedInlinesite++;
1263  }
1264 
1265  return true;
1266  }
1267  return false;
1268 }
1269 
1270 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1271  CallBase *CB) {
1272  assert(CB && "Expect non-null call instruction");
1273 
1274  if (isa<IntrinsicInst>(CB))
1275  return false;
1276 
1277  // Find the callee's profile. For indirect call, find hottest target profile.
1278  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1279  // If ExternalInlineAdvisor wants to inline this site, do so even
1280  // if Samples are not present.
1281  if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1282  return false;
1283 
1284  float Factor = 1.0;
1285  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1286  Factor = Probe->Factor;
1287 
1288  uint64_t CallsiteCount = 0;
1289  ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
1290  if (Weight)
1291  CallsiteCount = Weight.get();
1292  if (CalleeSamples)
1293  CallsiteCount = std::max(
1294  CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
1295 
1296  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1297  return true;
1298 }
1299 
1301 SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1302  std::unique_ptr<InlineAdvice> Advice = nullptr;
1303  if (ExternalInlineAdvisor) {
1304  Advice = ExternalInlineAdvisor->getAdvice(CB);
1305  if (Advice) {
1306  if (!Advice->isInliningRecommended()) {
1307  Advice->recordUnattemptedInlining();
1308  return InlineCost::getNever("not previously inlined");
1309  }
1310  Advice->recordInlining();
1311  return InlineCost::getAlways("previously inlined");
1312  }
1313  }
1314 
1315  return {};
1316 }
1317 
1318 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1319  Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1320  return Cost ? !!Cost.getValue() : false;
1321 }
1322 
1323 InlineCost
1324 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1325  if (Optional<InlineCost> ReplayCost =
1326  getExternalInlineAdvisorCost(*Candidate.CallInstr))
1327  return ReplayCost.getValue();
1328  // Adjust threshold based on call site hotness, only do this for callsite
1329  // prioritized inliner because otherwise cost-benefit check is done earlier.
1330  int SampleThreshold = SampleColdCallSiteThreshold;
1332  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1333  SampleThreshold = SampleHotCallSiteThreshold;
1334  else if (!ProfileSizeInline)
1335  return InlineCost::getNever("cold callsite");
1336  }
1337 
1338  Function *Callee = Candidate.CallInstr->getCalledFunction();
1339  assert(Callee && "Expect a definition for inline candidate of direct call");
1340 
1341  InlineParams Params = getInlineParams();
1342  // We will ignore the threshold from inline cost, so always get full cost.
1343  Params.ComputeFullInlineCost = true;
1345  // Checks if there is anything in the reachable portion of the callee at
1346  // this callsite that makes this inlining potentially illegal. Need to
1347  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1348  // when cost exceeds threshold without checking all IRs in the callee.
1349  // The acutal cost does not matter because we only checks isNever() to
1350  // see if it is legal to inline the callsite.
1351  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1352  GetTTI(*Callee), GetAC, GetTLI);
1353 
1354  // Honor always inline and never inline from call analyzer
1355  if (Cost.isNever() || Cost.isAlways())
1356  return Cost;
1357 
1358  // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1359  // decisions based on hotness as well as accurate function byte sizes for
1360  // given context using function/inlinee sizes from previous build. It
1361  // stores the decision in profile, and also adjust/merge context profile
1362  // aiming at better context-sensitive post-inline profile quality, assuming
1363  // all inline decision estimates are going to be honored by compiler. Here
1364  // we replay that inline decision under `sample-profile-use-preinliner`.
1365  // Note that we don't need to handle negative decision from preinliner as
1366  // context profile for not inlined calls are merged by preinliner already.
1367  if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1368  // Once two node are merged due to promotion, we're losing some context
1369  // so the original context-sensitive preinliner decision should be ignored
1370  // for SyntheticContext.
1371  SampleContext &Context = Candidate.CalleeSamples->getContext();
1372  if (!Context.hasState(SyntheticContext) &&
1373  Context.hasAttribute(ContextShouldBeInlined))
1374  return InlineCost::getAlways("preinliner");
1375  }
1376 
1377  // For old FDO inliner, we inline the call site as long as cost is not
1378  // "Never". The cost-benefit check is done earlier.
1380  return InlineCost::get(Cost.getCost(), INT_MAX);
1381  }
1382 
1383  // Otherwise only use the cost from call analyzer, but overwite threshold with
1384  // Sample PGO threshold.
1385  return InlineCost::get(Cost.getCost(), SampleThreshold);
1386 }
1387 
1388 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1389  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1390 
1391  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1392  // Profile symbol list is ignored when profile-sample-accurate is on.
1393  assert((!ProfAccForSymsInList ||
1395  !F.hasFnAttribute("profile-sample-accurate"))) &&
1396  "ProfAccForSymsInList should be false when profile-sample-accurate "
1397  "is enabled");
1398 
1399  // Populating worklist with initial call sites from root inliner, along
1400  // with call site weights.
1401  CandidateQueue CQueue;
1402  InlineCandidate NewCandidate;
1403  for (auto &BB : F) {
1404  for (auto &I : BB.getInstList()) {
1405  auto *CB = dyn_cast<CallBase>(&I);
1406  if (!CB)
1407  continue;
1408  if (getInlineCandidate(&NewCandidate, CB))
1409  CQueue.push(NewCandidate);
1410  }
1411  }
1412 
1413  // Cap the size growth from profile guided inlining. This is needed even
1414  // though cost of each inline candidate already accounts for callee size,
1415  // because with top-down inlining, we can grow inliner size significantly
1416  // with large number of smaller inlinees each pass the cost check.
1418  "Max inline size limit should not be smaller than min inline size "
1419  "limit.");
1420  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1423  if (ExternalInlineAdvisor)
1425 
1426  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1427 
1428  // Perform iterative BFS call site prioritized inlining
1429  bool Changed = false;
1430  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1431  InlineCandidate Candidate = CQueue.top();
1432  CQueue.pop();
1433  CallBase *I = Candidate.CallInstr;
1434  Function *CalledFunction = I->getCalledFunction();
1435 
1436  if (CalledFunction == &F)
1437  continue;
1438  if (I->isIndirectCall()) {
1439  uint64_t Sum = 0;
1440  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1441  uint64_t SumOrigin = Sum;
1442  Sum *= Candidate.CallsiteDistribution;
1443  unsigned ICPCount = 0;
1444  for (const auto *FS : CalleeSamples) {
1445  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1446  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1447  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1448  PSI->getOrCompHotCountThreshold());
1449  continue;
1450  }
1451  uint64_t EntryCountDistributed =
1452  FS->getEntrySamples() * Candidate.CallsiteDistribution;
1453  // In addition to regular inline cost check, we also need to make sure
1454  // ICP isn't introducing excessive speculative checks even if individual
1455  // target looks beneficial to promote and inline. That means we should
1456  // only do ICP when there's a small number dominant targets.
1457  if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1458  EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1459  break;
1460  // TODO: Fix CallAnalyzer to handle all indirect calls.
1461  // For indirect call, we don't run CallAnalyzer to get InlineCost
1462  // before actual inlining. This is because we could see two different
1463  // types from the same definition, which makes CallAnalyzer choke as
1464  // it's expecting matching parameter type on both caller and callee
1465  // side. See example from PR18962 for the triggering cases (the bug was
1466  // fixed, but we generate different types).
1467  if (!PSI->isHotCount(EntryCountDistributed))
1468  break;
1469  SmallVector<CallBase *, 8> InlinedCallSites;
1470  // Attach function profile for promoted indirect callee, and update
1471  // call site count for the promoted inline candidate too.
1472  Candidate = {I, FS, EntryCountDistributed,
1473  Candidate.CallsiteDistribution};
1474  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1475  &InlinedCallSites)) {
1476  for (auto *CB : InlinedCallSites) {
1477  if (getInlineCandidate(&NewCandidate, CB))
1478  CQueue.emplace(NewCandidate);
1479  }
1480  ICPCount++;
1481  Changed = true;
1482  } else if (!ContextTracker) {
1483  LocalNotInlinedCallSites.try_emplace(I, FS);
1484  }
1485  }
1486  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1487  !CalledFunction->isDeclaration()) {
1488  SmallVector<CallBase *, 8> InlinedCallSites;
1489  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1490  for (auto *CB : InlinedCallSites) {
1491  if (getInlineCandidate(&NewCandidate, CB))
1492  CQueue.emplace(NewCandidate);
1493  }
1494  Changed = true;
1495  } else if (!ContextTracker) {
1496  LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples);
1497  }
1498  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1499  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1500  InlinedGUIDs, SymbolMap,
1501  PSI->getOrCompHotCountThreshold());
1502  }
1503  }
1504 
1505  if (!CQueue.empty()) {
1506  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1507  ++NumCSInlinedHitMaxLimit;
1508  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1509  ++NumCSInlinedHitMinLimit;
1510  else
1511  ++NumCSInlinedHitGrowthLimit;
1512  }
1513 
1514  // For CS profile, profile for not inlined context will be merged when
1515  // base profile is being retrieved.
1517  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1518  return Changed;
1519 }
1520 
1521 void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1523  const Function &F) {
1524  // Accumulate not inlined callsite information into notInlinedSamples
1525  for (const auto &Pair : NonInlinedCallSites) {
1526  CallBase *I = Pair.getFirst();
1527  Function *Callee = I->getCalledFunction();
1528  if (!Callee || Callee->isDeclaration())
1529  continue;
1530 
1531  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
1532  I->getDebugLoc(), I->getParent())
1533  << "previous inlining not repeated: '"
1534  << ore::NV("Callee", Callee) << "' into '"
1535  << ore::NV("Caller", &F) << "'");
1536 
1537  ++NumCSNotInlined;
1538  const FunctionSamples *FS = Pair.getSecond();
1539  if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
1540  continue;
1541  }
1542 
1543  if (ProfileMergeInlinee) {
1544  // A function call can be replicated by optimizations like callsite
1545  // splitting or jump threading and the replicates end up sharing the
1546  // sample nested callee profile instead of slicing the original
1547  // inlinee's profile. We want to do merge exactly once by filtering out
1548  // callee profiles with a non-zero head sample count.
1549  if (FS->getHeadSamples() == 0) {
1550  // Use entry samples as head samples during the merge, as inlinees
1551  // don't have head samples.
1552  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1553  FS->getEntrySamples());
1554 
1555  // Note that we have to do the merge right after processing function.
1556  // This allows OutlineFS's profile to be used for annotation during
1557  // top-down processing of functions' annotation.
1558  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1559  OutlineFS->merge(*FS, 1);
1560  // Set outlined profile to be synthetic to not bias the inliner.
1561  OutlineFS->SetContextSynthetic();
1562  }
1563  } else {
1564  auto pair =
1565  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1566  pair.first->second.entryCount += FS->getEntrySamples();
1567  }
1568  }
1569 }
1570 
1571 /// Returns the sorted CallTargetMap \p M by count in descending order.
1575  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1576  R.emplace_back(
1577  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1578  }
1579  return R;
1580 }
1581 
1582 // Generate MD_prof metadata for every branch instruction using the
1583 // edge weights computed during propagation.
1584 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1585  // Generate MD_prof metadata for every branch instruction using the
1586  // edge weights computed during propagation.
1587  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1588  LLVMContext &Ctx = F.getContext();
1589  MDBuilder MDB(Ctx);
1590  for (auto &BI : F) {
1591  BasicBlock *BB = &BI;
1592 
1593  if (BlockWeights[BB]) {
1594  for (auto &I : BB->getInstList()) {
1595  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1596  continue;
1597  if (!cast<CallBase>(I).getCalledFunction()) {
1598  const DebugLoc &DLoc = I.getDebugLoc();
1599  if (!DLoc)
1600  continue;
1601  const DILocation *DIL = DLoc;
1602  const FunctionSamples *FS = findFunctionSamples(I);
1603  if (!FS)
1604  continue;
1606  auto T = FS->findCallTargetMapAt(CallSite);
1607  if (!T || T.get().empty())
1608  continue;
1610  // Prorate the callsite counts based on the pre-ICP distribution
1611  // factor to reflect what is already done to the callsite before
1612  // ICP, such as calliste cloning.
1613  if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1614  if (Probe->Factor < 1)
1615  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1616  }
1617  }
1618  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1620  uint64_t Sum = 0;
1621  for (const auto &C : T.get())
1622  Sum += C.second;
1623  // With CSSPGO all indirect call targets are counted torwards the
1624  // original indirect call site in the profile, including both
1625  // inlined and non-inlined targets.
1627  if (const FunctionSamplesMap *M =
1628  FS->findFunctionSamplesMapAt(CallSite)) {
1629  for (const auto &NameFS : *M)
1630  Sum += NameFS.second.getEntrySamples();
1631  }
1632  }
1633  if (Sum)
1634  updateIDTMetaData(I, SortedCallTargets, Sum);
1635  else if (OverwriteExistingWeights)
1636  I.setMetadata(LLVMContext::MD_prof, nullptr);
1637  } else if (!isa<IntrinsicInst>(&I)) {
1638  I.setMetadata(LLVMContext::MD_prof,
1639  MDB.createBranchWeights(
1640  {static_cast<uint32_t>(BlockWeights[BB])}));
1641  }
1642  }
1644  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1645  // clear it for cold code.
1646  for (auto &I : BB->getInstList()) {
1647  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1648  if (cast<CallBase>(I).isIndirectCall())
1649  I.setMetadata(LLVMContext::MD_prof, nullptr);
1650  else
1651  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1652  }
1653  }
1654  }
1655 
1656  Instruction *TI = BB->getTerminator();
1657  if (TI->getNumSuccessors() == 1)
1658  continue;
1659  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1660  !isa<IndirectBrInst>(TI))
1661  continue;
1662 
1663  DebugLoc BranchLoc = TI->getDebugLoc();
1664  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1665  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1666  : Twine("<UNKNOWN LOCATION>"))
1667  << ".\n");
1668  SmallVector<uint32_t, 4> Weights;
1669  uint32_t MaxWeight = 0;
1670  Instruction *MaxDestInst;
1671  // Since profi treats multiple edges (multiway branches) as a single edge,
1672  // we need to distribute the computed weight among the branches. We do
1673  // this by evenly splitting the edge weight among destinations.
1674  DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1675  std::vector<uint64_t> EdgeIndex;
1676  if (SampleProfileUseProfi) {
1677  EdgeIndex.resize(TI->getNumSuccessors());
1678  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1679  const BasicBlock *Succ = TI->getSuccessor(I);
1680  EdgeIndex[I] = EdgeMultiplicity[Succ];
1681  EdgeMultiplicity[Succ]++;
1682  }
1683  }
1684  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1685  BasicBlock *Succ = TI->getSuccessor(I);
1686  Edge E = std::make_pair(BB, Succ);
1687  uint64_t Weight = EdgeWeights[E];
1688  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1689  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1690  // if needed. Sample counts in profiles are 64-bit unsigned values,
1691  // but internally branch weights are expressed as 32-bit values.
1692  if (Weight > std::numeric_limits<uint32_t>::max()) {
1693  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1695  }
1696  if (!SampleProfileUseProfi) {
1697  // Weight is added by one to avoid propagation errors introduced by
1698  // 0 weights.
1699  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1700  } else {
1701  // Profi creates proper weights that do not require "+1" adjustments but
1702  // we evenly split the weight among branches with the same destination.
1703  uint64_t W = Weight / EdgeMultiplicity[Succ];
1704  // Rounding up, if needed, so that first branches are hotter.
1705  if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1706  W++;
1707  Weights.push_back(static_cast<uint32_t>(W));
1708  }
1709  if (Weight != 0) {
1710  if (Weight > MaxWeight) {
1711  MaxWeight = Weight;
1712  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1713  }
1714  }
1715  }
1716 
1717  uint64_t TempWeight;
1718  // Only set weights if there is at least one non-zero weight.
1719  // In any other case, let the analyzer set weights.
1720  // Do not set weights if the weights are present unless under
1721  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1722  // twice. If the first annotation already set the weights, the second pass
1723  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1724  // weight should have their existing metadata (possibly annotated by LTO
1725  // prelink) cleared.
1726  if (MaxWeight > 0 &&
1727  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1728  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1729  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1730  ORE->emit([&]() {
1731  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1732  << "most popular destination for conditional branches at "
1733  << ore::NV("CondBranchesLoc", BranchLoc);
1734  });
1735  } else {
1737  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1738  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1739  } else {
1740  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1741  }
1742  }
1743  }
1744 }
1745 
1746 /// Once all the branch weights are computed, we emit the MD_prof
1747 /// metadata on BB using the computed values for each of its branches.
1748 ///
1749 /// \param F The function to query.
1750 ///
1751 /// \returns true if \p F was modified. Returns false, otherwise.
1752 bool SampleProfileLoader::emitAnnotations(Function &F) {
1753  bool Changed = false;
1754 
1756  if (!ProbeManager->profileIsValid(F, *Samples)) {
1757  LLVM_DEBUG(
1758  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1759  << F.getName());
1760  ++NumMismatchedProfile;
1761  return false;
1762  }
1763  ++NumMatchedProfile;
1764  } else {
1765  if (getFunctionLoc(F) == 0)
1766  return false;
1767 
1768  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1769  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1770  }
1771 
1772  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1774  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1775  else
1776  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1777 
1778  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1779 
1780  if (Changed)
1781  generateMDProfMetadata(F);
1782 
1783  emitCoverageRemarks(F);
1784  return Changed;
1785 }
1786 
1788 
1789 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1790  "Sample Profile loader", false, false)
1795 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1797 
1798 std::unique_ptr<ProfiledCallGraph>
1799 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1800  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1801  if (ProfileIsCSFlat)
1802  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1803  else
1804  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1805 
1806  // Add all functions into the profiled call graph even if they are not in
1807  // the profile. This makes sure functions missing from the profile still
1808  // gets a chance to be processed.
1809  for (auto &Node : CG) {
1810  const auto *F = Node.first;
1811  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1812  continue;
1813  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1814  }
1815 
1816  return ProfiledCG;
1817 }
1818 
1819 std::vector<Function *>
1820 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1821  std::vector<Function *> FunctionOrderList;
1822  FunctionOrderList.reserve(M.size());
1823 
1825  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1826  "together with -sample-profile-top-down-load.\n";
1827 
1828  if (!ProfileTopDownLoad || CG == nullptr) {
1829  if (ProfileMergeInlinee) {
1830  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1831  // because the profile for a function may be used for the profile
1832  // annotation of its outline copy before the profile merging of its
1833  // non-inlined inline instances, and that is not the way how
1834  // ProfileMergeInlinee is supposed to work.
1835  ProfileMergeInlinee = false;
1836  }
1837 
1838  for (Function &F : M)
1839  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1840  FunctionOrderList.push_back(&F);
1841  return FunctionOrderList;
1842  }
1843 
1844  assert(&CG->getModule() == &M);
1845 
1846  if (UseProfiledCallGraph ||
1847  (ProfileIsCSFlat && !UseProfiledCallGraph.getNumOccurrences())) {
1848  // Use profiled call edges to augment the top-down order. There are cases
1849  // that the top-down order computed based on the static call graph doesn't
1850  // reflect real execution order. For example
1851  //
1852  // 1. Incomplete static call graph due to unknown indirect call targets.
1853  // Adjusting the order by considering indirect call edges from the
1854  // profile can enable the inlining of indirect call targets by allowing
1855  // the caller processed before them.
1856  // 2. Mutual call edges in an SCC. The static processing order computed for
1857  // an SCC may not reflect the call contexts in the context-sensitive
1858  // profile, thus may cause potential inlining to be overlooked. The
1859  // function order in one SCC is being adjusted to a top-down order based
1860  // on the profile to favor more inlining. This is only a problem with CS
1861  // profile.
1862  // 3. Transitive indirect call edges due to inlining. When a callee function
1863  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1864  // every call edge originated from the callee B will be transferred to
1865  // the caller A. If any transferred edge (say A->C) is indirect, the
1866  // original profiled indirect edge B->C, even if considered, would not
1867  // enforce a top-down order from the caller A to the potential indirect
1868  // call target C in LTO postlink since the inlined callee B is gone from
1869  // the static call graph.
1870  // 4. #3 can happen even for direct call targets, due to functions defined
1871  // in header files. A header function (say A), when included into source
1872  // files, is defined multiple times but only one definition survives due
1873  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1874  // definitions can be useless based on a local file scope. More
1875  // importantly, the inlinee (say B), once fully inlined to a
1876  // to-be-dropped A, will have no profile to consume when its outlined
1877  // version is compiled. This can lead to a profile-less prelink
1878  // compilation for the outlined version of B which may be called from
1879  // external modules. while this isn't easy to fix, we rely on the
1880  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1881  // the A can be inlined in its local scope in prelink, it may not exist
1882  // in the merged IR in postlink, and we'll need the profiled call edges
1883  // to enforce a top-down order for the rest of the functions.
1884  //
1885  // Considering those cases, a profiled call graph completely independent of
1886  // the static call graph is constructed based on profile data, where
1887  // function objects are not even needed to handle case #3 and case 4.
1888  //
1889  // Note that static callgraph edges are completely ignored since they
1890  // can be conflicting with profiled edges for cyclic SCCs and may result in
1891  // an SCC order incompatible with profile-defined one. Using strictly
1892  // profile order ensures a maximum inlining experience. On the other hand,
1893  // static call edges are not so important when they don't correspond to a
1894  // context in the profile.
1895 
1896  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1897  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1898  while (!CGI.isAtEnd()) {
1899  auto Range = *CGI;
1900  if (SortProfiledSCC) {
1901  // Sort nodes in one SCC based on callsite hotness.
1903  Range = *SI;
1904  }
1905  for (auto *Node : Range) {
1906  Function *F = SymbolMap.lookup(Node->Name);
1907  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1908  FunctionOrderList.push_back(F);
1909  }
1910  ++CGI;
1911  }
1912  } else {
1914  while (!CGI.isAtEnd()) {
1915  for (CallGraphNode *Node : *CGI) {
1916  auto *F = Node->getFunction();
1917  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1918  FunctionOrderList.push_back(F);
1919  }
1920  ++CGI;
1921  }
1922  }
1923 
1924  LLVM_DEBUG({
1925  dbgs() << "Function processing order:\n";
1926  for (auto F : reverse(FunctionOrderList)) {
1927  dbgs() << F->getName() << "\n";
1928  }
1929  });
1930 
1931  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1932  return FunctionOrderList;
1933 }
1934 
1935 bool SampleProfileLoader::doInitialization(Module &M,
1937  auto &Ctx = M.getContext();
1938 
1939  auto ReaderOrErr = SampleProfileReader::create(
1940  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1941  if (std::error_code EC = ReaderOrErr.getError()) {
1942  std::string Msg = "Could not open profile: " + EC.message();
1943  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1944  return false;
1945  }
1946  Reader = std::move(ReaderOrErr.get());
1948  // set module before reading the profile so reader may be able to only
1949  // read the function profiles which are used by the current module.
1950  Reader->setModule(&M);
1951  if (std::error_code EC = Reader->read()) {
1952  std::string Msg = "profile reading failed: " + EC.message();
1953  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1954  return false;
1955  }
1956 
1957  PSL = Reader->getProfileSymbolList();
1958 
1959  // While profile-sample-accurate is on, ignore symbol list.
1960  ProfAccForSymsInList =
1962  if (ProfAccForSymsInList) {
1963  NamesInProfile.clear();
1964  if (auto NameTable = Reader->getNameTable())
1965  NamesInProfile.insert(NameTable->begin(), NameTable->end());
1966  CoverageTracker.setProfAccForSymsInList(true);
1967  }
1968 
1969  if (FAM && !ProfileInlineReplayFile.empty()) {
1970  ExternalInlineAdvisor = getReplayInlineAdvisor(
1971  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
1976  /*EmitRemarks=*/false);
1977  }
1978 
1979  // Apply tweaks if context-sensitive profile is available.
1980  if (Reader->profileIsCSFlat() || Reader->profileIsCSNested()) {
1981  ProfileIsCSFlat = Reader->profileIsCSFlat();
1982  // Enable priority-base inliner and size inline by default for CSSPGO.
1984  ProfileSizeInline = true;
1987 
1988  // For CSSPGO, use preinliner decision by default when available.
1990  UsePreInlinerDecision = true;
1991 
1992  // For CSSPGO, we also allow recursive inline to best use context profile.
1994  AllowRecursiveInline = true;
1995 
1996  // Enable iterative-BFI by default for CSSPGO.
1998  UseIterativeBFIInference = true;
1999  // Enable Profi by default for CSSPGO.
2001  SampleProfileUseProfi = true;
2002 
2004  // Tracker for profiles under different context
2005  ContextTracker = std::make_unique<SampleContextTracker>(
2006  Reader->getProfiles(), &GUIDToFuncNameMap);
2007  }
2008  }
2009 
2010  // Load pseudo probe descriptors for probe-based function samples.
2011  if (Reader->profileIsProbeBased()) {
2012  ProbeManager = std::make_unique<PseudoProbeManager>(M);
2013  if (!ProbeManager->moduleIsProbed(M)) {
2014  const char *Msg =
2015  "Pseudo-probe-based profile requires SampleProfileProbePass";
2016  Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2017  DS_Warning));
2018  return false;
2019  }
2020  }
2021 
2022  return true;
2023 }
2024 
2026  return new SampleProfileLoaderLegacyPass();
2027 }
2028 
2030  return new SampleProfileLoaderLegacyPass(Name);
2031 }
2032 
2033 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
2034  ProfileSummaryInfo *_PSI, CallGraph *CG) {
2035  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2036 
2037  PSI = _PSI;
2038  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2039  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2041  PSI->refresh();
2042  }
2043  // Compute the total number of samples collected in this profile.
2044  for (const auto &I : Reader->getProfiles())
2045  TotalCollectedSamples += I.second.getTotalSamples();
2046 
2047  auto Remapper = Reader->getRemapper();
2048  // Populate the symbol map.
2049  for (const auto &N_F : M.getValueSymbolTable()) {
2050  StringRef OrigName = N_F.getKey();
2051  Function *F = dyn_cast<Function>(N_F.getValue());
2052  if (F == nullptr || OrigName.empty())
2053  continue;
2054  SymbolMap[OrigName] = F;
2056  if (OrigName != NewName && !NewName.empty()) {
2057  auto r = SymbolMap.insert(std::make_pair(NewName, F));
2058  // Failiing to insert means there is already an entry in SymbolMap,
2059  // thus there are multiple functions that are mapped to the same
2060  // stripped name. In this case of name conflicting, set the value
2061  // to nullptr to avoid confusion.
2062  if (!r.second)
2063  r.first->second = nullptr;
2064  OrigName = NewName;
2065  }
2066  // Insert the remapped names into SymbolMap.
2067  if (Remapper) {
2068  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2069  if (*MapName != OrigName && !MapName->empty())
2070  SymbolMap.insert(std::make_pair(*MapName, F));
2071  }
2072  }
2073  }
2074  assert(SymbolMap.count(StringRef()) == 0 &&
2075  "No empty StringRef should be added in SymbolMap");
2076 
2077  bool retval = false;
2078  for (auto F : buildFunctionOrder(M, CG)) {
2079  assert(!F->isDeclaration());
2080  clearFunctionData();
2081  retval |= runOnFunction(*F, AM);
2082  }
2083 
2084  // Account for cold calls not inlined....
2085  if (!ProfileIsCSFlat)
2086  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2087  notInlinedCallInfo)
2088  updateProfileCallee(pair.first, pair.second.entryCount);
2089 
2090  return retval;
2091 }
2092 
2093 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
2094  ACT = &getAnalysis<AssumptionCacheTracker>();
2095  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
2096  TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
2097  ProfileSummaryInfo *PSI =
2098  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2099  return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
2100 }
2101 
2103  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2104  DILocation2SampleMap.clear();
2105  // By default the entry count is initialized to -1, which will be treated
2106  // conservatively by getEntryCount as the same as unknown (None). This is
2107  // to avoid newly added code to be treated as cold. If we have samples
2108  // this will be overwritten in emitAnnotations.
2109  uint64_t initialEntryCount = -1;
2110 
2111  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2112  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2113  // initialize all the function entry counts to 0. It means all the
2114  // functions without profile will be regarded as cold.
2115  initialEntryCount = 0;
2116  // profile-sample-accurate is a user assertion which has a higher precedence
2117  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2118  ProfAccForSymsInList = false;
2119  }
2120  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2121 
2122  // PSL -- profile symbol list include all the symbols in sampled binary.
2123  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2124  // old functions without samples being cold, without having to worry
2125  // about new and hot functions being mistakenly treated as cold.
2126  if (ProfAccForSymsInList) {
2127  // Initialize the entry count to 0 for functions in the list.
2128  if (PSL->contains(F.getName()))
2129  initialEntryCount = 0;
2130 
2131  // Function in the symbol list but without sample will be regarded as
2132  // cold. To minimize the potential negative performance impact it could
2133  // have, we want to be a little conservative here saying if a function
2134  // shows up in the profile, no matter as outline function, inline instance
2135  // or call targets, treat the function as not being cold. This will handle
2136  // the cases such as most callsites of a function are inlined in sampled
2137  // binary but not inlined in current build (because of source code drift,
2138  // imprecise debug information, or the callsites are all cold individually
2139  // but not cold accumulatively...), so the outline function showing up as
2140  // cold in sampled binary will actually not be cold after current build.
2142  if (NamesInProfile.count(CanonName))
2143  initialEntryCount = -1;
2144  }
2145 
2146  // Initialize entry count when the function has no existing entry
2147  // count value.
2148  if (!F.getEntryCount().hasValue())
2149  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2150  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2151  if (AM) {
2152  auto &FAM =
2154  .getManager();
2156  } else {
2157  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2158  ORE = OwnedORE.get();
2159  }
2160 
2161  if (ProfileIsCSFlat)
2162  Samples = ContextTracker->getBaseSamplesFor(F);
2163  else
2164  Samples = Reader->getSamplesFor(F);
2165 
2166  if (Samples && !Samples->empty())
2167  return emitAnnotations(F);
2168  return false;
2169 }
2170 
2172  ModuleAnalysisManager &AM) {
2175 
2176  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2177  return FAM.getResult<AssumptionAnalysis>(F);
2178  };
2179  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2180  return FAM.getResult<TargetIRAnalysis>(F);
2181  };
2182  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2184  };
2185 
2186  SampleProfileLoader SampleLoader(
2187  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2188  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2189  : ProfileRemappingFileName,
2190  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2191 
2192  if (!SampleLoader.doInitialization(M, &FAM))
2193  return PreservedAnalyses::all();
2194 
2197  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2198  return PreservedAnalyses::all();
2199 
2200  return PreservedAnalyses::none();
2201 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
Instrumentation.h
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:123
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:855
llvm::getReplayInlineAdvisor
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
Definition: ReplayInlineAdvisor.cpp:79
llvm::InlineCost::getCost
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:129
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2418
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:2171
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:1047
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:305
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
DebugInfoMetadata.h
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
T
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:474
llvm::sampleprof::SampleContext::hasAttribute
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:561
llvm::Function
Definition: Function.h:62
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:107
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1175
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:208
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:80
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1541
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ProfileICPRelativeHotnessSkip
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::emitInlinedIntoBasedOnCost
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
Definition: InlineAdvisor.cpp:488
llvm::createSampleProfileLoaderPass
ModulePass * createSampleProfileLoaderPass()
Definition: SampleProfile.cpp:2025
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::sampleprof::ContextShouldBeInlined
@ ContextShouldBeInlined
Definition: SampleProf.h:415
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:839
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:414
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:111
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:184
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::PseudoProbe::Factor
float Factor
Definition: PseudoProbe.h:80
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
SampleProfileInference.h
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:916
llvm::CallSiteFormat::Format::LineDiscriminator
@ LineDiscriminator
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:946
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:683
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
llvm::initializeSampleProfileLoaderLegacyPassPass
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
llvm::InlineCost::isNever
bool isNever() const
Definition: InlineCost.h:124
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:500
llvm::scc_member_iterator
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
Definition: SCCIterator.h:252
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
ProfileInlineReplayFallback
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
llvm::ReplayInlinerSettings::Fallback::Original
@ Original
ProfileSampleBlockAccurate
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::InlineParams::ComputeFullInlineCost
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:211
Context
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:96
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::PseudoProbe::Id
uint32_t Id
Definition: PseudoProbe.h:74
llvm::sampleprof::FunctionSamples::SetContextSynthetic
void SetContextSynthetic()
Definition: SampleProf.h:752
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AllowRecursiveInline
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Allow sample loader inliner to inline recursive calls."))
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:775
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:949
BlockFrequencyInfoImpl.h
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1430
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:243
SortProfiledSCC
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2788
PostDominators.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:47
llvm::sampleprof::FunctionSamples::ProfileIsCSFlat
static bool ProfileIsCSFlat
Definition: SampleProf.h:1049
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:373
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:81
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
UsePreInlinerDecision
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use the preinliner decisions stored in profile context."))
llvm::sampleprof::ProfiledCallGraph
Definition: ProfiledCallGraph.h:65
llvm::sampleprof::SyntheticContext
@ SyntheticContext
Definition: SampleProf.h:406
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:81
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1610
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:113
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:1075
SampleProf.h
InlineAdvisor.h
ProfileInlineReplayFormat
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:106
llvm::CallSiteFormat::Format::LineColumnDiscriminator
@ LineColumnDiscriminator
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
InstrProf.h
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ReplayInlinerSettings::Fallback::NeverInline
@ NeverInline
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:65
DebugLoc.h
SmallPtrSet.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:250
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:787
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:106
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3063
SampleProfileLoaderBaseUtil.h
StringMap.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::CallSiteFormat::Format::LineColumn
@ LineColumn
llvm::sampleprof::SampleProfileReader::profileIsCSNested
bool profileIsCSNested() const
Whether input profile is fully context-sensitive and nested.
Definition: SampleProfReader.h:480
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:488
llvm::sampleprof::SampleContext
Definition: SampleProf.h:472
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:108
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:232
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:817
ValueSymbolTable.h
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
SampleProfile.h
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:422
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:152
ProfiledCallGraph.h
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
ProfileSummaryInfo.h
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2474
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
llvm::sampleprof::FunctionSamples::getEntrySamples
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:831
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:398
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:46
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:691
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::DenseMap
Definition: DenseMap.h:714
llvm::sampleprof::SampleProfileReader::profileIsCSFlat
bool profileIsCSFlat() const
Whether input profile is fully context-sensitive and flat.
Definition: SampleProfReader.h:477
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:117
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:380
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ReplayInlinerSettings::Scope::Module
@ Module
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1707
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1060
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:108
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1787
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SampleProfileProbe.h
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:115
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:496
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:286
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::CallSiteFormat::Format::Line
@ Line
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:482
uint32_t
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
CallPromotionUtils.h
Profile
Load MIR Sample Profile
Definition: MIRSampleProfile.cpp:62
llvm::ContextTrieNode
Definition: SampleContextTracker.h:36
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
CallGraphSCCPass.h
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:650
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1000
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:39
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:466
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:185
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::sampleprof::SampleProfileReader::getProfiles
SampleProfileMap & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:441
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
profile
sample profile
Definition: SampleProfile.cpp:1795
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:850
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:408
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
GenericDomTree.h
ProfileInlineReplayScope
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:517
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:60
DiagnosticInfo.h
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1590
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:199
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:345
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:876
llvm::cl::value_desc
Definition: CommandLine.h:422
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:579
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:502
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:102
llvm::extractProbe
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:48
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:1053
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:304
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:112
CallGraph.h
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:25
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:957
Instructions.h
loader
sample Sample Profile loader
Definition: SampleProfile.cpp:1796
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:371
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
Dominators.h
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1573
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
llvm::ReplayInlinerSettings::Scope::Function
@ Function
TargetTransformInfo.h
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1751
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1045
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
llvm::AttributeFuncs::mergeAttributesForInlining
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Definition: Attributes.cpp:1984
llvm::ReplayInlinerSettings::Fallback::AlwaysInline
@ AlwaysInline
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:255
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:217
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::ReplayInlinerSettings
Replay Inliner Setup.
Definition: ReplayInlineAdvisor.h:43
ProfileICPRelativeHotness
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:802
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:228
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:915
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:780