LLVM  14.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SCCIterator.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringMap.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/Twine.h"
44 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/CFG.h"
54 #include "llvm/IR/DebugLoc.h"
55 #include "llvm/IR/DiagnosticInfo.h"
56 #include "llvm/IR/Dominators.h"
57 #include "llvm/IR/Function.h"
58 #include "llvm/IR/GlobalValue.h"
59 #include "llvm/IR/InstrTypes.h"
60 #include "llvm/IR/Instruction.h"
61 #include "llvm/IR/Instructions.h"
62 #include "llvm/IR/IntrinsicInst.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/MDBuilder.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/PassManager.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorOr.h"
80 #include "llvm/Transforms/IPO.h"
89 #include <algorithm>
90 #include <cassert>
91 #include <cstdint>
92 #include <functional>
93 #include <limits>
94 #include <map>
95 #include <memory>
96 #include <queue>
97 #include <string>
98 #include <system_error>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 using namespace sampleprof;
104 using namespace llvm::sampleprofutil;
106 #define DEBUG_TYPE "sample-profile"
107 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
108 
109 STATISTIC(NumCSInlined,
110  "Number of functions inlined with context sensitive profile");
111 STATISTIC(NumCSNotInlined,
112  "Number of functions not inlined with context sensitive profile");
113 STATISTIC(NumMismatchedProfile,
114  "Number of functions with CFG mismatched profile");
115 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
116 STATISTIC(NumDuplicatedInlinesite,
117  "Number of inlined callsites with a partial distribution factor");
118 
119 STATISTIC(NumCSInlinedHitMinLimit,
120  "Number of functions with FDO inline stopped due to min size limit");
121 STATISTIC(NumCSInlinedHitMaxLimit,
122  "Number of functions with FDO inline stopped due to max size limit");
123 STATISTIC(
124  NumCSInlinedHitGrowthLimit,
125  "Number of functions with FDO inline stopped due to growth size limit");
126 
127 // Command line option to specify the file to read samples from. This is
128 // mainly used for debugging.
130  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
131  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
132 
133 // The named file contains a set of transformations that may have been applied
134 // to the symbol names between the program from which the sample data was
135 // collected and the current program's symbols.
137  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
138  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
139 
141  "profile-sample-accurate", cl::Hidden, cl::init(false),
142  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
143  "callsite and function as having 0 samples. Otherwise, treat "
144  "un-sampled callsites and functions conservatively as unknown. "));
145 
147  "profile-sample-block-accurate", cl::Hidden, cl::init(false),
148  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
149  "branches and calls as having 0 samples. Otherwise, treat "
150  "them conservatively as unknown. "));
151 
153  "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
154  cl::init(true),
155  cl::desc("For symbols in profile symbol list, regard their profiles to "
156  "be accurate. It may be overriden by profile-sample-accurate. "));
157 
159  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
160  cl::desc("Merge past inlinee's profile to outline version if sample "
161  "profile loader decided not to inline a call site. It will "
162  "only be enabled when top-down order of profile loading is "
163  "enabled. "));
164 
166  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
167  cl::desc("Do profile annotation and inlining for functions in top-down "
168  "order of call graph during sample profile loading. It only "
169  "works for new pass manager. "));
170 
171 static cl::opt<bool>
172  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
173  cl::desc("Process functions in a top-down order "
174  "defined by the profiled call graph when "
175  "-sample-profile-top-down-load is on."));
176 
178  "sample-profile-inline-size", cl::Hidden, cl::init(false),
179  cl::desc("Inline cold call sites in profile loader if it's beneficial "
180  "for code size."));
181 
183  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
184  cl::desc("The size growth ratio limit for proirity-based sample profile "
185  "loader inlining."));
186 
188  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
189  cl::desc("The lower bound of size growth limit for "
190  "proirity-based sample profile loader inlining."));
191 
193  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
194  cl::desc("The upper bound of size growth limit for "
195  "proirity-based sample profile loader inlining."));
196 
198  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
199  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
200  "inlining."));
201 
203  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
204  cl::desc("Threshold for inlining cold callsites"));
205 
207  "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
208  cl::desc(
209  "Relative hotness percentage threshold for indirect "
210  "call promotion in proirity-based sample profile loader inlining."));
211 
213  "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
214  cl::desc(
215  "Skip relative hotness check for ICP up to given number of targets."));
216 
218  "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
219  cl::init(false),
220  cl::desc("Use call site prioritized inlining for sample profile loader."
221  "Currently only CSSPGO is supported."));
222 
224  "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
225  cl::init(false),
226  cl::desc("Use the preinliner decisions stored in profile context."));
227 
229  "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
230  cl::init(false),
231  cl::desc("Allow sample loader inliner to inline recursive calls."));
232 
234  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
235  cl::desc(
236  "Optimization remarks file containing inline remarks to be replayed "
237  "by inlining from sample profile loader."),
238  cl::Hidden);
239 
240 static cl::opt<unsigned>
241  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
243  cl::desc("Max number of promotions for a single indirect "
244  "call callsite in sample profile loader"));
245 
247  "overwrite-existing-weights", cl::Hidden, cl::init(false),
248  cl::desc("Ignore existing branch weights on IR and always overwrite."));
249 
250 namespace {
251 
252 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
253 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
254 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
255 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
256 using BlockEdgeMap =
258 
259 class GUIDToFuncNameMapper {
260 public:
261  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
262  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
263  : CurrentReader(Reader), CurrentModule(M),
264  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
265  if (!CurrentReader.useMD5())
266  return;
267 
268  for (const auto &F : CurrentModule) {
269  StringRef OrigName = F.getName();
270  CurrentGUIDToFuncNameMap.insert(
271  {Function::getGUID(OrigName), OrigName});
272 
273  // Local to global var promotion used by optimization like thinlto
274  // will rename the var and add suffix like ".llvm.xxx" to the
275  // original local name. In sample profile, the suffixes of function
276  // names are all stripped. Since it is possible that the mapper is
277  // built in post-thin-link phase and var promotion has been done,
278  // we need to add the substring of function name without the suffix
279  // into the GUIDToFuncNameMap.
281  if (CanonName != OrigName)
282  CurrentGUIDToFuncNameMap.insert(
283  {Function::getGUID(CanonName), CanonName});
284  }
285 
286  // Update GUIDToFuncNameMap for each function including inlinees.
287  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
288  }
289 
290  ~GUIDToFuncNameMapper() {
291  if (!CurrentReader.useMD5())
292  return;
293 
294  CurrentGUIDToFuncNameMap.clear();
295 
296  // Reset GUIDToFuncNameMap for of each function as they're no
297  // longer valid at this point.
298  SetGUIDToFuncNameMapForAll(nullptr);
299  }
300 
301 private:
302  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
303  std::queue<FunctionSamples *> FSToUpdate;
304  for (auto &IFS : CurrentReader.getProfiles()) {
305  FSToUpdate.push(&IFS.second);
306  }
307 
308  while (!FSToUpdate.empty()) {
309  FunctionSamples *FS = FSToUpdate.front();
310  FSToUpdate.pop();
311  FS->GUIDToFuncNameMap = Map;
312  for (const auto &ICS : FS->getCallsiteSamples()) {
313  const FunctionSamplesMap &FSMap = ICS.second;
314  for (auto &IFS : FSMap) {
315  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
316  FSToUpdate.push(&FS);
317  }
318  }
319  }
320  }
321 
322  SampleProfileReader &CurrentReader;
323  Module &CurrentModule;
324  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
325 };
326 
327 // Inline candidate used by iterative callsite prioritized inliner
328 struct InlineCandidate {
329  CallBase *CallInstr;
330  const FunctionSamples *CalleeSamples;
331  // Prorated callsite count, which will be used to guide inlining. For example,
332  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
333  // copies will get their own distribution factors and their prorated counts
334  // will be used to decide if they should be inlined independently.
335  uint64_t CallsiteCount;
336  // Call site distribution factor to prorate the profile samples for a
337  // duplicated callsite. Default value is 1.0.
338  float CallsiteDistribution;
339 };
340 
341 // Inline candidate comparer using call site weight
342 struct CandidateComparer {
343  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
344  if (LHS.CallsiteCount != RHS.CallsiteCount)
345  return LHS.CallsiteCount < RHS.CallsiteCount;
346 
347  const FunctionSamples *LCS = LHS.CalleeSamples;
348  const FunctionSamples *RCS = RHS.CalleeSamples;
349  assert(LCS && RCS && "Expect non-null FunctionSamples");
350 
351  // Tie breaker using number of samples try to favor smaller functions first
352  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
353  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
354 
355  // Tie breaker using GUID so we have stable/deterministic inlining order
356  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
357  }
358 };
359 
360 using CandidateQueue =
362  CandidateComparer>;
363 
364 /// Sample profile pass.
365 ///
366 /// This pass reads profile data from the file specified by
367 /// -sample-profile-file and annotates every affected function with the
368 /// profile information found in that file.
369 class SampleProfileLoader final
370  : public SampleProfileLoaderBaseImpl<BasicBlock> {
371 public:
372  SampleProfileLoader(
373  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
374  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
375  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
376  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
378  GetAC(std::move(GetAssumptionCache)),
379  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
380  LTOPhase(LTOPhase) {}
381 
382  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
383  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
384  ProfileSummaryInfo *_PSI, CallGraph *CG);
385 
386 protected:
388  bool emitAnnotations(Function &F);
389  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
390  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
391  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
392  const FunctionSamples *
393  findFunctionSamples(const Instruction &I) const override;
394  std::vector<const FunctionSamples *>
395  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
396  void findExternalInlineCandidate(const FunctionSamples *Samples,
397  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
400  // Attempt to promote indirect call and also inline the promoted call
401  bool tryPromoteAndInlineCandidate(
402  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
403  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
404  bool inlineHotFunctions(Function &F,
405  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
406  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
407  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
408  bool
409  tryInlineCandidate(InlineCandidate &Candidate,
410  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
411  bool
412  inlineHotFunctionsWithPriority(Function &F,
413  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
414  // Inline cold/small functions in addition to hot ones
415  bool shouldInlineColdCallee(CallBase &CallInst);
416  void emitOptimizationRemarksForInlineCandidates(
417  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
418  bool Hot);
419  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
420  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
421  void generateMDProfMetadata(Function &F);
422 
423  /// Map from function name to Function *. Used to find the function from
424  /// the function name. If the function name contains suffix, additional
425  /// entry is added to map from the stripped name to the function if there
426  /// is one-to-one mapping.
428 
431  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
432 
433  /// Profile tracker for different context.
434  std::unique_ptr<SampleContextTracker> ContextTracker;
435 
436  /// Flag indicating whether input profile is context-sensitive
437  bool ProfileIsCS = false;
438 
439  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
440  ///
441  /// We need to know the LTO phase because for example in ThinLTOPrelink
442  /// phase, in annotation, we should not promote indirect calls. Instead,
443  /// we will mark GUIDs that needs to be annotated to the function.
444  ThinOrFullLTOPhase LTOPhase;
445 
446  /// Profle Symbol list tells whether a function name appears in the binary
447  /// used to generate the current profile.
448  std::unique_ptr<ProfileSymbolList> PSL;
449 
450  /// Total number of samples collected in this profile.
451  ///
452  /// This is the sum of all the samples collected in all the functions executed
453  /// at runtime.
454  uint64_t TotalCollectedSamples = 0;
455 
456  // Information recorded when we declined to inline a call site
457  // because we have determined it is too cold is accumulated for
458  // each callee function. Initially this is just the entry count.
459  struct NotInlinedProfileInfo {
460  uint64_t entryCount;
461  };
463 
464  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
465  // all the function symbols defined or declared in current module.
466  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
467 
468  // All the Names used in FunctionSamples including outline function
469  // names, inline instance names and call target names.
470  StringSet<> NamesInProfile;
471 
472  // For symbol in profile symbol list, whether to regard their profiles
473  // to be accurate. It is mainly decided by existance of profile symbol
474  // list and -profile-accurate-for-symsinlist flag, but it can be
475  // overriden by -profile-sample-accurate or profile-sample-accurate
476  // attribute.
477  bool ProfAccForSymsInList;
478 
479  // External inline advisor used to replay inline decision from remarks.
480  std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
481 
482  // A pseudo probe helper to correlate the imported sample counts.
483  std::unique_ptr<PseudoProbeManager> ProbeManager;
484 };
485 
486 class SampleProfileLoaderLegacyPass : public ModulePass {
487 public:
488  // Class identification, replacement for typeinfo
489  static char ID;
490 
491  SampleProfileLoaderLegacyPass(
494  : ModulePass(ID), SampleLoader(
495  Name, SampleProfileRemappingFile, LTOPhase,
496  [&](Function &F) -> AssumptionCache & {
497  return ACT->getAssumptionCache(F);
498  },
499  [&](Function &F) -> TargetTransformInfo & {
500  return TTIWP->getTTI(F);
501  },
502  [&](Function &F) -> TargetLibraryInfo & {
503  return TLIWP->getTLI(F);
504  }) {
507  }
508 
509  void dump() { SampleLoader.dump(); }
510 
511  bool doInitialization(Module &M) override {
512  return SampleLoader.doInitialization(M);
513  }
514 
515  StringRef getPassName() const override { return "Sample profile pass"; }
516  bool runOnModule(Module &M) override;
517 
518  void getAnalysisUsage(AnalysisUsage &AU) const override {
523  }
524 
525 private:
526  SampleProfileLoader SampleLoader;
527  AssumptionCacheTracker *ACT = nullptr;
528  TargetTransformInfoWrapperPass *TTIWP = nullptr;
529  TargetLibraryInfoWrapperPass *TLIWP = nullptr;
530 };
531 
532 } // end anonymous namespace
533 
534 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
536  return getProbeWeight(Inst);
537 
538  const DebugLoc &DLoc = Inst.getDebugLoc();
539  if (!DLoc)
540  return std::error_code();
541 
542  // Ignore all intrinsics, phinodes and branch instructions.
543  // Branch and phinodes instruction usually contains debug info from sources
544  // outside of the residing basic block, thus we ignore them during annotation.
545  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
546  return std::error_code();
547 
548  // For non-CS profile, if a direct call/invoke instruction is inlined in
549  // profile (findCalleeFunctionSamples returns non-empty result), but not
550  // inlined here, it means that the inlined callsite has no sample, thus the
551  // call instruction should have 0 count.
552  // For CS profile, the callsite count of previously inlined callees is
553  // populated with the entry count of the callees.
554  if (!ProfileIsCS)
555  if (const auto *CB = dyn_cast<CallBase>(&Inst))
556  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
557  return 0;
558 
559  return getInstWeightImpl(Inst);
560 }
561 
562 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
563 // of non-probe instruction. So if all instructions of the BB give error_code,
564 // tell the inference algorithm to infer the BB weight.
565 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
567  "Profile is not pseudo probe based");
568  Optional<PseudoProbe> Probe = extractProbe(Inst);
569  // Ignore the non-probe instruction. If none of the instruction in the BB is
570  // probe, we choose to infer the BB's weight.
571  if (!Probe)
572  return std::error_code();
573 
574  const FunctionSamples *FS = findFunctionSamples(Inst);
575  // If none of the instruction has FunctionSample, we choose to return zero
576  // value sample to indicate the BB is cold. This could happen when the
577  // instruction is from inlinee and no profile data is found.
578  // FIXME: This should not be affected by the source drift issue as 1) if the
579  // newly added function is top-level inliner, it won't match the CFG checksum
580  // in the function profile or 2) if it's the inlinee, the inlinee should have
581  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
582  // we can improve it by adding a switch for profile-sample-block-accurate for
583  // block level counts in the future.
584  if (!FS)
585  return 0;
586 
587  // For non-CS profile, If a direct call/invoke instruction is inlined in
588  // profile (findCalleeFunctionSamples returns non-empty result), but not
589  // inlined here, it means that the inlined callsite has no sample, thus the
590  // call instruction should have 0 count.
591  // For CS profile, the callsite count of previously inlined callees is
592  // populated with the entry count of the callees.
593  if (!ProfileIsCS)
594  if (const auto *CB = dyn_cast<CallBase>(&Inst))
595  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
596  return 0;
597 
598  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
599  if (R) {
600  uint64_t Samples = R.get() * Probe->Factor;
601  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
602  if (FirstMark) {
603  ORE->emit([&]() {
604  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
605  Remark << "Applied " << ore::NV("NumSamples", Samples);
606  Remark << " samples from profile (ProbeId=";
607  Remark << ore::NV("ProbeId", Probe->Id);
608  Remark << ", Factor=";
609  Remark << ore::NV("Factor", Probe->Factor);
610  Remark << ", OriginalSamples=";
611  Remark << ore::NV("OriginalSamples", R.get());
612  Remark << ")";
613  return Remark;
614  });
615  }
616  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
617  << " - weight: " << R.get() << " - factor: "
618  << format("%0.2f", Probe->Factor) << ")\n");
619  return Samples;
620  }
621  return R;
622 }
623 
624 /// Get the FunctionSamples for a call instruction.
625 ///
626 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
627 /// instance in which that call instruction is calling to. It contains
628 /// all samples that resides in the inlined instance. We first find the
629 /// inlined instance in which the call instruction is from, then we
630 /// traverse its children to find the callsite with the matching
631 /// location.
632 ///
633 /// \param Inst Call/Invoke instruction to query.
634 ///
635 /// \returns The FunctionSamples pointer to the inlined instance.
636 const FunctionSamples *
637 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
638  const DILocation *DIL = Inst.getDebugLoc();
639  if (!DIL) {
640  return nullptr;
641  }
642 
643  StringRef CalleeName;
644  if (Function *Callee = Inst.getCalledFunction())
645  CalleeName = Callee->getName();
646 
647  if (ProfileIsCS)
648  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
649 
650  const FunctionSamples *FS = findFunctionSamples(Inst);
651  if (FS == nullptr)
652  return nullptr;
653 
654  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
655  CalleeName, Reader->getRemapper());
656 }
657 
658 /// Returns a vector of FunctionSamples that are the indirect call targets
659 /// of \p Inst. The vector is sorted by the total number of samples. Stores
660 /// the total call count of the indirect call in \p Sum.
661 std::vector<const FunctionSamples *>
662 SampleProfileLoader::findIndirectCallFunctionSamples(
663  const Instruction &Inst, uint64_t &Sum) const {
664  const DILocation *DIL = Inst.getDebugLoc();
665  std::vector<const FunctionSamples *> R;
666 
667  if (!DIL) {
668  return R;
669  }
670 
671  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
672  assert(L && R && "Expect non-null FunctionSamples");
673  if (L->getEntrySamples() != R->getEntrySamples())
674  return L->getEntrySamples() > R->getEntrySamples();
675  return FunctionSamples::getGUID(L->getName()) <
676  FunctionSamples::getGUID(R->getName());
677  };
678 
679  if (ProfileIsCS) {
680  auto CalleeSamples =
681  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
682  if (CalleeSamples.empty())
683  return R;
684 
685  // For CSSPGO, we only use target context profile's entry count
686  // as that already includes both inlined callee and non-inlined ones..
687  Sum = 0;
688  for (const auto *const FS : CalleeSamples) {
689  Sum += FS->getEntrySamples();
690  R.push_back(FS);
691  }
692  llvm::sort(R, FSCompare);
693  return R;
694  }
695 
696  const FunctionSamples *FS = findFunctionSamples(Inst);
697  if (FS == nullptr)
698  return R;
699 
701  auto T = FS->findCallTargetMapAt(CallSite);
702  Sum = 0;
703  if (T)
704  for (const auto &T_C : T.get())
705  Sum += T_C.second;
706  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
707  if (M->empty())
708  return R;
709  for (const auto &NameFS : *M) {
710  Sum += NameFS.second.getEntrySamples();
711  R.push_back(&NameFS.second);
712  }
713  llvm::sort(R, FSCompare);
714  }
715  return R;
716 }
717 
718 const FunctionSamples *
719 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
721  Optional<PseudoProbe> Probe = extractProbe(Inst);
722  if (!Probe)
723  return nullptr;
724  }
725 
726  const DILocation *DIL = Inst.getDebugLoc();
727  if (!DIL)
728  return Samples;
729 
730  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
731  if (it.second) {
732  if (ProfileIsCS)
733  it.first->second = ContextTracker->getContextSamplesFor(DIL);
734  else
735  it.first->second =
736  Samples->findFunctionSamples(DIL, Reader->getRemapper());
737  }
738  return it.first->second;
739 }
740 
741 /// Check whether the indirect call promotion history of \p Inst allows
742 /// the promotion for \p Candidate.
743 /// If the profile count for the promotion candidate \p Candidate is
744 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
745 /// for \p Inst. If we already have at least MaxNumPromotions
746 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
747 /// cannot promote for \p Inst anymore.
748 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
749  uint32_t NumVals = 0;
750  uint64_t TotalCount = 0;
751  std::unique_ptr<InstrProfValueData[]> ValueData =
752  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
753  bool Valid =
754  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
755  ValueData.get(), NumVals, TotalCount, true);
756  // No valid value profile so no promoted targets have been recorded
757  // before. Ok to do ICP.
758  if (!Valid)
759  return true;
760 
761  unsigned NumPromoted = 0;
762  for (uint32_t I = 0; I < NumVals; I++) {
763  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
764  continue;
765 
766  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
767  // metadata, it means the candidate has been promoted for this
768  // indirect call.
769  if (ValueData[I].Value == Function::getGUID(Candidate))
770  return false;
771  NumPromoted++;
772  // If already have MaxNumPromotions promotion, don't do it anymore.
773  if (NumPromoted == MaxNumPromotions)
774  return false;
775  }
776  return true;
777 }
778 
779 /// Update indirect call target profile metadata for \p Inst.
780 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
781 /// If it is 0, it means updateIDTMetaData is used to mark a
782 /// certain target to be promoted already. If it is not zero,
783 /// we expect to use it to update the total count in the value profile.
784 static void
786  const SmallVectorImpl<InstrProfValueData> &CallTargets,
787  uint64_t Sum) {
788  uint32_t NumVals = 0;
789  // OldSum is the existing total count in the value profile data.
790  uint64_t OldSum = 0;
791  std::unique_ptr<InstrProfValueData[]> ValueData =
792  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
793  bool Valid =
794  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
795  ValueData.get(), NumVals, OldSum, true);
796 
797  DenseMap<uint64_t, uint64_t> ValueCountMap;
798  if (Sum == 0) {
799  assert((CallTargets.size() == 1 &&
800  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
801  "If sum is 0, assume only one element in CallTargets "
802  "with count being NOMORE_ICP_MAGICNUM");
803  // Initialize ValueCountMap with existing value profile data.
804  if (Valid) {
805  for (uint32_t I = 0; I < NumVals; I++)
806  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
807  }
808  auto Pair =
809  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
810  // If the target already exists in value profile, decrease the total
811  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
812  if (!Pair.second) {
813  OldSum -= Pair.first->second;
814  Pair.first->second = NOMORE_ICP_MAGICNUM;
815  }
816  Sum = OldSum;
817  } else {
818  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
819  // counts in the value profile.
820  if (Valid) {
821  for (uint32_t I = 0; I < NumVals; I++) {
822  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
823  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
824  }
825  }
826 
827  for (const auto &Data : CallTargets) {
828  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
829  if (Pair.second)
830  continue;
831  // The target represented by Data.Value has already been promoted.
832  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
833  // Sum by Data.Count.
834  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
835  Sum -= Data.Count;
836  }
837  }
838 
839  SmallVector<InstrProfValueData, 8> NewCallTargets;
840  for (const auto &ValueCount : ValueCountMap) {
841  NewCallTargets.emplace_back(
842  InstrProfValueData{ValueCount.first, ValueCount.second});
843  }
844 
845  llvm::sort(NewCallTargets,
846  [](const InstrProfValueData &L, const InstrProfValueData &R) {
847  if (L.Count != R.Count)
848  return L.Count > R.Count;
849  return L.Value > R.Value;
850  });
851 
852  uint32_t MaxMDCount =
853  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
854  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
855  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
856 }
857 
858 /// Attempt to promote indirect call and also inline the promoted call.
859 ///
860 /// \param F Caller function.
861 /// \param Candidate ICP and inline candidate.
862 /// \param SumOrigin Original sum of target counts for indirect call before
863 /// promoting given candidate.
864 /// \param Sum Prorated sum of remaining target counts for indirect call
865 /// after promoting given candidate.
866 /// \param InlinedCallSite Output vector for new call sites exposed after
867 /// inlining.
868 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
869  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
870  SmallVector<CallBase *, 8> *InlinedCallSite) {
871  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
872  auto R = SymbolMap.find(CalleeFunctionName);
873  if (R == SymbolMap.end() || !R->getValue())
874  return false;
875 
876  auto &CI = *Candidate.CallInstr;
877  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
878  return false;
879 
880  const char *Reason = "Callee function not available";
881  // R->getValue() != &F is to prevent promoting a recursive call.
882  // If it is a recursive call, we do not inline it as it could bloat
883  // the code exponentially. There is way to better handle this, e.g.
884  // clone the caller first, and inline the cloned caller if it is
885  // recursive. As llvm does not inline recursive calls, we will
886  // simply ignore it instead of handling it explicitly.
887  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
888  R->getValue()->hasFnAttribute("use-sample-profile") &&
889  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
890  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
891  // in the value profile metadata so the target won't be promoted again.
892  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
893  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
894  updateIDTMetaData(CI, SortedCallTargets, 0);
895 
896  auto *DI = &pgo::promoteIndirectCall(
897  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
898  if (DI) {
899  Sum -= Candidate.CallsiteCount;
900  // Do not prorate the indirect callsite distribution since the original
901  // distribution will be used to scale down non-promoted profile target
902  // counts later. By doing this we lose track of the real callsite count
903  // for the leftover indirect callsite as a trade off for accurate call
904  // target counts.
905  // TODO: Ideally we would have two separate factors, one for call site
906  // counts and one is used to prorate call target counts.
907  // Do not update the promoted direct callsite distribution at this
908  // point since the original distribution combined with the callee profile
909  // will be used to prorate callsites from the callee if inlined. Once not
910  // inlined, the direct callsite distribution should be prorated so that
911  // the it will reflect the real callsite counts.
912  Candidate.CallInstr = DI;
913  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
914  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
915  if (!Inlined) {
916  // Prorate the direct callsite distribution so that it reflects real
917  // callsite counts.
919  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
920  }
921  return Inlined;
922  }
923  }
924  } else {
925  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
926  << Candidate.CalleeSamples->getFuncName() << " because "
927  << Reason << "\n");
928  }
929  return false;
930 }
931 
932 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
933  if (!ProfileSizeInline)
934  return false;
935 
937  if (Callee == nullptr)
938  return false;
939 
940  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
941  GetAC, GetTLI);
942 
943  if (Cost.isNever())
944  return false;
945 
946  if (Cost.isAlways())
947  return true;
948 
949  return Cost.getCost() <= SampleColdCallSiteThreshold;
950 }
951 
952 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
953  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
954  bool Hot) {
955  for (auto I : Candidates) {
956  Function *CalledFunction = I->getCalledFunction();
957  if (CalledFunction) {
958  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
959  I->getDebugLoc(), I->getParent())
960  << "previous inlining reattempted for "
961  << (Hot ? "hotness: '" : "size: '")
962  << ore::NV("Callee", CalledFunction) << "' into '"
963  << ore::NV("Caller", &F) << "'");
964  }
965  }
966 }
967 
968 void SampleProfileLoader::findExternalInlineCandidate(
969  const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs,
971  assert(Samples && "expect non-null caller profile");
972 
973  // For AutoFDO profile, retrieve candidate profiles by walking over
974  // the nested inlinee profiles.
975  if (!ProfileIsCS) {
976  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
977  return;
978  }
979 
981  ContextTracker->getContextFor(Samples->getContext());
982  std::queue<ContextTrieNode *> CalleeList;
983  CalleeList.push(Caller);
984  while (!CalleeList.empty()) {
985  ContextTrieNode *Node = CalleeList.front();
986  CalleeList.pop();
987  FunctionSamples *CalleeSample = Node->getFunctionSamples();
988  // For CSSPGO profile, retrieve candidate profile by walking over the
989  // trie built for context profile. Note that also take call targets
990  // even if callee doesn't have a corresponding context profile.
991  if (!CalleeSample)
992  continue;
993 
994  // If pre-inliner decision is used, honor that for importing as well.
995  bool PreInline =
998  if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
999  continue;
1000 
1001  StringRef Name = CalleeSample->getFuncName();
1003  // Add to the import list only when it's defined out of module.
1004  if (!Func || Func->isDeclaration())
1005  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
1006 
1007  // Import hot CallTargets, which may not be available in IR because full
1008  // profile annotation cannot be done until backend compilation in ThinLTO.
1009  for (const auto &BS : CalleeSample->getBodySamples())
1010  for (const auto &TS : BS.second.getCallTargets())
1011  if (TS.getValue() > Threshold) {
1012  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
1013  const Function *Callee = SymbolMap.lookup(CalleeName);
1014  if (!Callee || Callee->isDeclaration())
1015  InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
1016  }
1017 
1018  // Import hot child context profile associted with callees. Note that this
1019  // may have some overlap with the call target loop above, but doing this
1020  // based child context profile again effectively allow us to use the max of
1021  // entry count and call target count to determine importing.
1022  for (auto &Child : Node->getAllChildContext()) {
1023  ContextTrieNode *CalleeNode = &Child.second;
1024  CalleeList.push(CalleeNode);
1025  }
1026  }
1027 }
1028 
1029 /// Iteratively inline hot callsites of a function.
1030 ///
1031 /// Iteratively traverse all callsites of the function \p F, and find if
1032 /// the corresponding inlined instance exists and is hot in profile. If
1033 /// it is hot enough, inline the callsites and adds new callsites of the
1034 /// callee into the caller. If the call is an indirect call, first promote
1035 /// it to direct call. Each indirect call is limited with a single target.
1036 ///
1037 /// \param F function to perform iterative inlining.
1038 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1039 /// inlined in the profiled binary.
1040 ///
1041 /// \returns True if there is any inline happened.
1042 bool SampleProfileLoader::inlineHotFunctions(
1043  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1044  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1045  // Profile symbol list is ignored when profile-sample-accurate is on.
1046  assert((!ProfAccForSymsInList ||
1048  !F.hasFnAttribute("profile-sample-accurate"))) &&
1049  "ProfAccForSymsInList should be false when profile-sample-accurate "
1050  "is enabled");
1051 
1052  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1053  bool Changed = false;
1054  bool LocalChanged = true;
1055  while (LocalChanged) {
1056  LocalChanged = false;
1058  for (auto &BB : F) {
1059  bool Hot = false;
1060  SmallVector<CallBase *, 10> AllCandidates;
1061  SmallVector<CallBase *, 10> ColdCandidates;
1062  for (auto &I : BB.getInstList()) {
1063  const FunctionSamples *FS = nullptr;
1064  if (auto *CB = dyn_cast<CallBase>(&I)) {
1065  if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
1066  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1067  "GUIDToFuncNameMap has to be populated");
1068  AllCandidates.push_back(CB);
1069  if (FS->getEntrySamples() > 0 || ProfileIsCS)
1070  LocalNotInlinedCallSites.try_emplace(CB, FS);
1071  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1072  Hot = true;
1073  else if (shouldInlineColdCallee(*CB))
1074  ColdCandidates.push_back(CB);
1075  }
1076  }
1077  }
1078  if (Hot || ExternalInlineAdvisor) {
1079  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1080  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1081  } else {
1082  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1083  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1084  }
1085  }
1086  for (CallBase *I : CIS) {
1087  Function *CalledFunction = I->getCalledFunction();
1088  InlineCandidate Candidate = {
1089  I,
1090  LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
1091  : nullptr,
1092  0 /* dummy count */, 1.0 /* dummy distribution factor */};
1093  // Do not inline recursive calls.
1094  if (CalledFunction == &F)
1095  continue;
1096  if (I->isIndirectCall()) {
1097  uint64_t Sum;
1098  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1099  uint64_t SumOrigin = Sum;
1100  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1101  findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
1102  PSI->getOrCompHotCountThreshold());
1103  continue;
1104  }
1105  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1106  continue;
1107 
1108  Candidate = {I, FS, FS->getEntrySamples(), 1.0};
1109  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1110  LocalNotInlinedCallSites.erase(I);
1111  LocalChanged = true;
1112  }
1113  }
1114  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1115  !CalledFunction->isDeclaration()) {
1116  if (tryInlineCandidate(Candidate)) {
1117  LocalNotInlinedCallSites.erase(I);
1118  LocalChanged = true;
1119  }
1120  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1121  findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs,
1122  SymbolMap,
1123  PSI->getOrCompHotCountThreshold());
1124  }
1125  }
1126  Changed |= LocalChanged;
1127  }
1128 
1129  // For CS profile, profile for not inlined context will be merged when
1130  // base profile is being trieved
1131  if (ProfileIsCS)
1132  return Changed;
1133 
1134  // Accumulate not inlined callsite information into notInlinedSamples
1135  for (const auto &Pair : LocalNotInlinedCallSites) {
1136  CallBase *I = Pair.getFirst();
1137  Function *Callee = I->getCalledFunction();
1138  if (!Callee || Callee->isDeclaration())
1139  continue;
1140 
1141  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
1142  I->getDebugLoc(), I->getParent())
1143  << "previous inlining not repeated: '"
1144  << ore::NV("Callee", Callee) << "' into '"
1145  << ore::NV("Caller", &F) << "'");
1146 
1147  ++NumCSNotInlined;
1148  const FunctionSamples *FS = Pair.getSecond();
1149  if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
1150  continue;
1151  }
1152 
1153  if (ProfileMergeInlinee) {
1154  // A function call can be replicated by optimizations like callsite
1155  // splitting or jump threading and the replicates end up sharing the
1156  // sample nested callee profile instead of slicing the original inlinee's
1157  // profile. We want to do merge exactly once by filtering out callee
1158  // profiles with a non-zero head sample count.
1159  if (FS->getHeadSamples() == 0) {
1160  // Use entry samples as head samples during the merge, as inlinees
1161  // don't have head samples.
1162  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1163  FS->getEntrySamples());
1164 
1165  // Note that we have to do the merge right after processing function.
1166  // This allows OutlineFS's profile to be used for annotation during
1167  // top-down processing of functions' annotation.
1168  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1169  OutlineFS->merge(*FS);
1170  }
1171  } else {
1172  auto pair =
1173  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1174  pair.first->second.entryCount += FS->getEntrySamples();
1175  }
1176  }
1177  return Changed;
1178 }
1179 
1180 bool SampleProfileLoader::tryInlineCandidate(
1181  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1182 
1183  CallBase &CB = *Candidate.CallInstr;
1184  Function *CalledFunction = CB.getCalledFunction();
1185  assert(CalledFunction && "Expect a callee with definition");
1186  DebugLoc DLoc = CB.getDebugLoc();
1187  BasicBlock *BB = CB.getParent();
1188 
1189  InlineCost Cost = shouldInlineCandidate(Candidate);
1190  if (Cost.isNever()) {
1191  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
1192  << "incompatible inlining");
1193  return false;
1194  }
1195 
1196  if (!Cost)
1197  return false;
1198 
1199  InlineFunctionInfo IFI(nullptr, GetAC);
1200  IFI.UpdateProfile = false;
1201  if (InlineFunction(CB, IFI).isSuccess()) {
1202  // Merge the attributes based on the inlining.
1204  *CalledFunction);
1205 
1206  // The call to InlineFunction erases I, so we can't pass it here.
1207  emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
1208  true, CSINLINE_DEBUG);
1209 
1210  // Now populate the list of newly exposed call sites.
1211  if (InlinedCallSites) {
1212  InlinedCallSites->clear();
1213  for (auto &I : IFI.InlinedCallSites)
1214  InlinedCallSites->push_back(I);
1215  }
1216 
1217  if (ProfileIsCS)
1218  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1219  ++NumCSInlined;
1220 
1221  // Prorate inlined probes for a duplicated inlining callsite which probably
1222  // has a distribution less than 100%. Samples for an inlinee should be
1223  // distributed among the copies of the original callsite based on each
1224  // callsite's distribution factor for counts accuracy. Note that an inlined
1225  // probe may come with its own distribution factor if it has been duplicated
1226  // in the inlinee body. The two factor are multiplied to reflect the
1227  // aggregation of duplication.
1228  if (Candidate.CallsiteDistribution < 1) {
1229  for (auto &I : IFI.InlinedCallSites) {
1230  if (Optional<PseudoProbe> Probe = extractProbe(*I))
1232  Candidate.CallsiteDistribution);
1233  }
1234  NumDuplicatedInlinesite++;
1235  }
1236 
1237  return true;
1238  }
1239  return false;
1240 }
1241 
1242 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1243  CallBase *CB) {
1244  assert(CB && "Expect non-null call instruction");
1245 
1246  if (isa<IntrinsicInst>(CB))
1247  return false;
1248 
1249  // Find the callee's profile. For indirect call, find hottest target profile.
1250  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1251  if (!CalleeSamples)
1252  return false;
1253 
1254  float Factor = 1.0;
1255  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1256  Factor = Probe->Factor;
1257 
1258  uint64_t CallsiteCount = 0;
1259  ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
1260  if (Weight)
1261  CallsiteCount = Weight.get();
1262  if (CalleeSamples)
1263  CallsiteCount = std::max(
1264  CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
1265 
1266  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1267  return true;
1268 }
1269 
1270 InlineCost
1271 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1272  std::unique_ptr<InlineAdvice> Advice = nullptr;
1273  if (ExternalInlineAdvisor) {
1274  Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
1275  if (!Advice->isInliningRecommended()) {
1276  Advice->recordUnattemptedInlining();
1277  return InlineCost::getNever("not previously inlined");
1278  }
1279  Advice->recordInlining();
1280  return InlineCost::getAlways("previously inlined");
1281  }
1282 
1283  // Adjust threshold based on call site hotness, only do this for callsite
1284  // prioritized inliner because otherwise cost-benefit check is done earlier.
1285  int SampleThreshold = SampleColdCallSiteThreshold;
1287  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1288  SampleThreshold = SampleHotCallSiteThreshold;
1289  else if (!ProfileSizeInline)
1290  return InlineCost::getNever("cold callsite");
1291  }
1292 
1293  Function *Callee = Candidate.CallInstr->getCalledFunction();
1294  assert(Callee && "Expect a definition for inline candidate of direct call");
1295 
1296  InlineParams Params = getInlineParams();
1297  // We will ignore the threshold from inline cost, so always get full cost.
1298  Params.ComputeFullInlineCost = true;
1300  // Checks if there is anything in the reachable portion of the callee at
1301  // this callsite that makes this inlining potentially illegal. Need to
1302  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1303  // when cost exceeds threshold without checking all IRs in the callee.
1304  // The acutal cost does not matter because we only checks isNever() to
1305  // see if it is legal to inline the callsite.
1306  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1307  GetTTI(*Callee), GetAC, GetTLI);
1308 
1309  // Honor always inline and never inline from call analyzer
1310  if (Cost.isNever() || Cost.isAlways())
1311  return Cost;
1312 
1313  // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1314  // decisions based on hotness as well as accurate function byte sizes for
1315  // given context using function/inlinee sizes from previous build. It
1316  // stores the decision in profile, and also adjust/merge context profile
1317  // aiming at better context-sensitive post-inline profile quality, assuming
1318  // all inline decision estimates are going to be honored by compiler. Here
1319  // we replay that inline decision under `sample-profile-use-preinliner`.
1320  // Note that we don't need to handle negative decision from preinliner as
1321  // context profile for not inlined calls are merged by preinliner already.
1322  if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1323  // Once two node are merged due to promotion, we're losing some context
1324  // so the original context-sensitive preinliner decision should be ignored
1325  // for SyntheticContext.
1326  SampleContext &Context = Candidate.CalleeSamples->getContext();
1327  if (!Context.hasState(SyntheticContext) &&
1328  Context.hasAttribute(ContextShouldBeInlined))
1329  return InlineCost::getAlways("preinliner");
1330  }
1331 
1332  // For old FDO inliner, we inline the call site as long as cost is not
1333  // "Never". The cost-benefit check is done earlier.
1335  return InlineCost::get(Cost.getCost(), INT_MAX);
1336  }
1337 
1338  // Otherwise only use the cost from call analyzer, but overwite threshold with
1339  // Sample PGO threshold.
1340  return InlineCost::get(Cost.getCost(), SampleThreshold);
1341 }
1342 
1343 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1344  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1345  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
1346 
1347  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1348  // Profile symbol list is ignored when profile-sample-accurate is on.
1349  assert((!ProfAccForSymsInList ||
1351  !F.hasFnAttribute("profile-sample-accurate"))) &&
1352  "ProfAccForSymsInList should be false when profile-sample-accurate "
1353  "is enabled");
1354 
1355  // Populating worklist with initial call sites from root inliner, along
1356  // with call site weights.
1357  CandidateQueue CQueue;
1358  InlineCandidate NewCandidate;
1359  for (auto &BB : F) {
1360  for (auto &I : BB.getInstList()) {
1361  auto *CB = dyn_cast<CallBase>(&I);
1362  if (!CB)
1363  continue;
1364  if (getInlineCandidate(&NewCandidate, CB))
1365  CQueue.push(NewCandidate);
1366  }
1367  }
1368 
1369  // Cap the size growth from profile guided inlining. This is needed even
1370  // though cost of each inline candidate already accounts for callee size,
1371  // because with top-down inlining, we can grow inliner size significantly
1372  // with large number of smaller inlinees each pass the cost check.
1374  "Max inline size limit should not be smaller than min inline size "
1375  "limit.");
1376  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1379  if (ExternalInlineAdvisor)
1381 
1382  // Perform iterative BFS call site prioritized inlining
1383  bool Changed = false;
1384  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1385  InlineCandidate Candidate = CQueue.top();
1386  CQueue.pop();
1387  CallBase *I = Candidate.CallInstr;
1388  Function *CalledFunction = I->getCalledFunction();
1389 
1390  if (CalledFunction == &F)
1391  continue;
1392  if (I->isIndirectCall()) {
1393  uint64_t Sum = 0;
1394  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1395  uint64_t SumOrigin = Sum;
1396  Sum *= Candidate.CallsiteDistribution;
1397  unsigned ICPCount = 0;
1398  for (const auto *FS : CalleeSamples) {
1399  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1400  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1401  findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
1402  PSI->getOrCompHotCountThreshold());
1403  continue;
1404  }
1405  uint64_t EntryCountDistributed =
1406  FS->getEntrySamples() * Candidate.CallsiteDistribution;
1407  // In addition to regular inline cost check, we also need to make sure
1408  // ICP isn't introducing excessive speculative checks even if individual
1409  // target looks beneficial to promote and inline. That means we should
1410  // only do ICP when there's a small number dominant targets.
1411  if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1412  EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1413  break;
1414  // TODO: Fix CallAnalyzer to handle all indirect calls.
1415  // For indirect call, we don't run CallAnalyzer to get InlineCost
1416  // before actual inlining. This is because we could see two different
1417  // types from the same definition, which makes CallAnalyzer choke as
1418  // it's expecting matching parameter type on both caller and callee
1419  // side. See example from PR18962 for the triggering cases (the bug was
1420  // fixed, but we generate different types).
1421  if (!PSI->isHotCount(EntryCountDistributed))
1422  break;
1423  SmallVector<CallBase *, 8> InlinedCallSites;
1424  // Attach function profile for promoted indirect callee, and update
1425  // call site count for the promoted inline candidate too.
1426  Candidate = {I, FS, EntryCountDistributed,
1427  Candidate.CallsiteDistribution};
1428  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1429  &InlinedCallSites)) {
1430  for (auto *CB : InlinedCallSites) {
1431  if (getInlineCandidate(&NewCandidate, CB))
1432  CQueue.emplace(NewCandidate);
1433  }
1434  ICPCount++;
1435  Changed = true;
1436  }
1437  }
1438  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1439  !CalledFunction->isDeclaration()) {
1440  SmallVector<CallBase *, 8> InlinedCallSites;
1441  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1442  for (auto *CB : InlinedCallSites) {
1443  if (getInlineCandidate(&NewCandidate, CB))
1444  CQueue.emplace(NewCandidate);
1445  }
1446  Changed = true;
1447  }
1448  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1449  findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs,
1450  SymbolMap, PSI->getOrCompHotCountThreshold());
1451  }
1452  }
1453 
1454  if (!CQueue.empty()) {
1455  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1456  ++NumCSInlinedHitMaxLimit;
1457  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1458  ++NumCSInlinedHitMinLimit;
1459  else
1460  ++NumCSInlinedHitGrowthLimit;
1461  }
1462 
1463  return Changed;
1464 }
1465 
1466 /// Returns the sorted CallTargetMap \p M by count in descending order.
1470  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1471  R.emplace_back(
1472  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1473  }
1474  return R;
1475 }
1476 
1477 // Generate MD_prof metadata for every branch instruction using the
1478 // edge weights computed during propagation.
1479 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1480  // Generate MD_prof metadata for every branch instruction using the
1481  // edge weights computed during propagation.
1482  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1483  LLVMContext &Ctx = F.getContext();
1484  MDBuilder MDB(Ctx);
1485  for (auto &BI : F) {
1486  BasicBlock *BB = &BI;
1487 
1488  if (BlockWeights[BB]) {
1489  for (auto &I : BB->getInstList()) {
1490  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1491  continue;
1492  if (!cast<CallBase>(I).getCalledFunction()) {
1493  const DebugLoc &DLoc = I.getDebugLoc();
1494  if (!DLoc)
1495  continue;
1496  const DILocation *DIL = DLoc;
1497  const FunctionSamples *FS = findFunctionSamples(I);
1498  if (!FS)
1499  continue;
1501  auto T = FS->findCallTargetMapAt(CallSite);
1502  if (!T || T.get().empty())
1503  continue;
1505  // Prorate the callsite counts based on the pre-ICP distribution
1506  // factor to reflect what is already done to the callsite before
1507  // ICP, such as calliste cloning.
1508  if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1509  if (Probe->Factor < 1)
1510  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1511  }
1512  }
1513  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1515  uint64_t Sum = 0;
1516  for (const auto &C : T.get())
1517  Sum += C.second;
1518  // With CSSPGO all indirect call targets are counted torwards the
1519  // original indirect call site in the profile, including both
1520  // inlined and non-inlined targets.
1522  if (const FunctionSamplesMap *M =
1523  FS->findFunctionSamplesMapAt(CallSite)) {
1524  for (const auto &NameFS : *M)
1525  Sum += NameFS.second.getEntrySamples();
1526  }
1527  }
1528  if (Sum)
1529  updateIDTMetaData(I, SortedCallTargets, Sum);
1530  else if (OverwriteExistingWeights)
1531  I.setMetadata(LLVMContext::MD_prof, nullptr);
1532  } else if (!isa<IntrinsicInst>(&I)) {
1533  I.setMetadata(LLVMContext::MD_prof,
1534  MDB.createBranchWeights(
1535  {static_cast<uint32_t>(BlockWeights[BB])}));
1536  }
1537  }
1539  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1540  // clear it for cold code.
1541  for (auto &I : BB->getInstList()) {
1542  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1543  if (cast<CallBase>(I).isIndirectCall())
1544  I.setMetadata(LLVMContext::MD_prof, nullptr);
1545  else
1546  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1547  }
1548  }
1549  }
1550 
1551  Instruction *TI = BB->getTerminator();
1552  if (TI->getNumSuccessors() == 1)
1553  continue;
1554  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1555  !isa<IndirectBrInst>(TI))
1556  continue;
1557 
1558  DebugLoc BranchLoc = TI->getDebugLoc();
1559  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1560  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1561  : Twine("<UNKNOWN LOCATION>"))
1562  << ".\n");
1563  SmallVector<uint32_t, 4> Weights;
1564  uint32_t MaxWeight = 0;
1565  Instruction *MaxDestInst;
1566  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1567  BasicBlock *Succ = TI->getSuccessor(I);
1568  Edge E = std::make_pair(BB, Succ);
1569  uint64_t Weight = EdgeWeights[E];
1570  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1571  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1572  // if needed. Sample counts in profiles are 64-bit unsigned values,
1573  // but internally branch weights are expressed as 32-bit values.
1574  if (Weight > std::numeric_limits<uint32_t>::max()) {
1575  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1577  }
1578  // Weight is added by one to avoid propagation errors introduced by
1579  // 0 weights.
1580  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1581  if (Weight != 0) {
1582  if (Weight > MaxWeight) {
1583  MaxWeight = Weight;
1584  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1585  }
1586  }
1587  }
1588 
1589  uint64_t TempWeight;
1590  // Only set weights if there is at least one non-zero weight.
1591  // In any other case, let the analyzer set weights.
1592  // Do not set weights if the weights are present unless under
1593  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1594  // twice. If the first annotation already set the weights, the second pass
1595  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1596  // weight should have their existing metadata (possibly annotated by LTO
1597  // prelink) cleared.
1598  if (MaxWeight > 0 &&
1599  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1600  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1601  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1602  ORE->emit([&]() {
1603  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1604  << "most popular destination for conditional branches at "
1605  << ore::NV("CondBranchesLoc", BranchLoc);
1606  });
1607  } else {
1609  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1610  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1611  } else {
1612  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1613  }
1614  }
1615  }
1616 }
1617 
1618 /// Once all the branch weights are computed, we emit the MD_prof
1619 /// metadata on BB using the computed values for each of its branches.
1620 ///
1621 /// \param F The function to query.
1622 ///
1623 /// \returns true if \p F was modified. Returns false, otherwise.
1624 bool SampleProfileLoader::emitAnnotations(Function &F) {
1625  bool Changed = false;
1626 
1628  if (!ProbeManager->profileIsValid(F, *Samples)) {
1629  LLVM_DEBUG(
1630  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1631  << F.getName());
1632  ++NumMismatchedProfile;
1633  return false;
1634  }
1635  ++NumMatchedProfile;
1636  } else {
1637  if (getFunctionLoc(F) == 0)
1638  return false;
1639 
1640  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1641  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1642  }
1643 
1644  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1645  if (ProfileIsCS && CallsitePrioritizedInline)
1646  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1647  else
1648  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1649 
1650  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1651 
1652  if (Changed)
1653  generateMDProfMetadata(F);
1654 
1655  emitCoverageRemarks(F);
1656  return Changed;
1657 }
1658 
1660 
1661 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1662  "Sample Profile loader", false, false)
1667 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1669 
1670 std::unique_ptr<ProfiledCallGraph>
1671 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1672  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1673  if (ProfileIsCS)
1674  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1675  else
1676  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1677 
1678  // Add all functions into the profiled call graph even if they are not in
1679  // the profile. This makes sure functions missing from the profile still
1680  // gets a chance to be processed.
1681  for (auto &Node : CG) {
1682  const auto *F = Node.first;
1683  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1684  continue;
1685  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1686  }
1687 
1688  return ProfiledCG;
1689 }
1690 
1691 std::vector<Function *>
1692 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1693  std::vector<Function *> FunctionOrderList;
1694  FunctionOrderList.reserve(M.size());
1695 
1697  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1698  "together with -sample-profile-top-down-load.\n";
1699 
1700  if (!ProfileTopDownLoad || CG == nullptr) {
1701  if (ProfileMergeInlinee) {
1702  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1703  // because the profile for a function may be used for the profile
1704  // annotation of its outline copy before the profile merging of its
1705  // non-inlined inline instances, and that is not the way how
1706  // ProfileMergeInlinee is supposed to work.
1707  ProfileMergeInlinee = false;
1708  }
1709 
1710  for (Function &F : M)
1711  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1712  FunctionOrderList.push_back(&F);
1713  return FunctionOrderList;
1714  }
1715 
1716  assert(&CG->getModule() == &M);
1717 
1718  if (UseProfiledCallGraph ||
1719  (ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) {
1720  // Use profiled call edges to augment the top-down order. There are cases
1721  // that the top-down order computed based on the static call graph doesn't
1722  // reflect real execution order. For example
1723  //
1724  // 1. Incomplete static call graph due to unknown indirect call targets.
1725  // Adjusting the order by considering indirect call edges from the
1726  // profile can enable the inlining of indirect call targets by allowing
1727  // the caller processed before them.
1728  // 2. Mutual call edges in an SCC. The static processing order computed for
1729  // an SCC may not reflect the call contexts in the context-sensitive
1730  // profile, thus may cause potential inlining to be overlooked. The
1731  // function order in one SCC is being adjusted to a top-down order based
1732  // on the profile to favor more inlining. This is only a problem with CS
1733  // profile.
1734  // 3. Transitive indirect call edges due to inlining. When a callee function
1735  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1736  // every call edge originated from the callee B will be transferred to
1737  // the caller A. If any transferred edge (say A->C) is indirect, the
1738  // original profiled indirect edge B->C, even if considered, would not
1739  // enforce a top-down order from the caller A to the potential indirect
1740  // call target C in LTO postlink since the inlined callee B is gone from
1741  // the static call graph.
1742  // 4. #3 can happen even for direct call targets, due to functions defined
1743  // in header files. A header function (say A), when included into source
1744  // files, is defined multiple times but only one definition survives due
1745  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1746  // definitions can be useless based on a local file scope. More
1747  // importantly, the inlinee (say B), once fully inlined to a
1748  // to-be-dropped A, will have no profile to consume when its outlined
1749  // version is compiled. This can lead to a profile-less prelink
1750  // compilation for the outlined version of B which may be called from
1751  // external modules. while this isn't easy to fix, we rely on the
1752  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1753  // the A can be inlined in its local scope in prelink, it may not exist
1754  // in the merged IR in postlink, and we'll need the profiled call edges
1755  // to enforce a top-down order for the rest of the functions.
1756  //
1757  // Considering those cases, a profiled call graph completely independent of
1758  // the static call graph is constructed based on profile data, where
1759  // function objects are not even needed to handle case #3 and case 4.
1760  //
1761  // Note that static callgraph edges are completely ignored since they
1762  // can be conflicting with profiled edges for cyclic SCCs and may result in
1763  // an SCC order incompatible with profile-defined one. Using strictly
1764  // profile order ensures a maximum inlining experience. On the other hand,
1765  // static call edges are not so important when they don't correspond to a
1766  // context in the profile.
1767 
1768  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1769  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1770  while (!CGI.isAtEnd()) {
1771  for (ProfiledCallGraphNode *Node : *CGI) {
1772  Function *F = SymbolMap.lookup(Node->Name);
1773  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1774  FunctionOrderList.push_back(F);
1775  }
1776  ++CGI;
1777  }
1778  } else {
1780  while (!CGI.isAtEnd()) {
1781  for (CallGraphNode *Node : *CGI) {
1782  auto *F = Node->getFunction();
1783  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1784  FunctionOrderList.push_back(F);
1785  }
1786  ++CGI;
1787  }
1788  }
1789 
1790  LLVM_DEBUG({
1791  dbgs() << "Function processing order:\n";
1792  for (auto F : reverse(FunctionOrderList)) {
1793  dbgs() << F->getName() << "\n";
1794  }
1795  });
1796 
1797  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1798  return FunctionOrderList;
1799 }
1800 
1801 bool SampleProfileLoader::doInitialization(Module &M,
1803  auto &Ctx = M.getContext();
1804 
1805  auto ReaderOrErr = SampleProfileReader::create(
1806  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1807  if (std::error_code EC = ReaderOrErr.getError()) {
1808  std::string Msg = "Could not open profile: " + EC.message();
1809  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1810  return false;
1811  }
1812  Reader = std::move(ReaderOrErr.get());
1814  // set module before reading the profile so reader may be able to only
1815  // read the function profiles which are used by the current module.
1816  Reader->setModule(&M);
1817  if (std::error_code EC = Reader->read()) {
1818  std::string Msg = "profile reading failed: " + EC.message();
1819  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1820  return false;
1821  }
1822 
1823  PSL = Reader->getProfileSymbolList();
1824 
1825  // While profile-sample-accurate is on, ignore symbol list.
1826  ProfAccForSymsInList =
1828  if (ProfAccForSymsInList) {
1829  NamesInProfile.clear();
1830  if (auto NameTable = Reader->getNameTable())
1831  NamesInProfile.insert(NameTable->begin(), NameTable->end());
1832  CoverageTracker.setProfAccForSymsInList(true);
1833  }
1834 
1835  if (FAM && !ProfileInlineReplayFile.empty()) {
1836  ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
1837  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
1838  /*EmitRemarks=*/false);
1839  if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
1840  ExternalInlineAdvisor.reset();
1841  }
1842 
1843  // Apply tweaks if context-sensitive profile is available.
1844  if (Reader->profileIsCS()) {
1845  ProfileIsCS = true;
1847 
1848  // Enable priority-base inliner and size inline by default for CSSPGO.
1850  ProfileSizeInline = true;
1853 
1854  // For CSSPGO, use preinliner decision by default when available.
1856  UsePreInlinerDecision = true;
1857 
1858  // For CSSPGO, we also allow recursive inline to best use context profile.
1860  AllowRecursiveInline = true;
1861 
1862  // Enable iterative-BFI by default for CSSPGO.
1864  UseIterativeBFIInference = true;
1865 
1866  // Tracker for profiles under different context
1867  ContextTracker = std::make_unique<SampleContextTracker>(
1868  Reader->getProfiles(), &GUIDToFuncNameMap);
1869  }
1870 
1871  // Load pseudo probe descriptors for probe-based function samples.
1872  if (Reader->profileIsProbeBased()) {
1873  ProbeManager = std::make_unique<PseudoProbeManager>(M);
1874  if (!ProbeManager->moduleIsProbed(M)) {
1875  const char *Msg =
1876  "Pseudo-probe-based profile requires SampleProfileProbePass";
1877  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1878  return false;
1879  }
1880  }
1881 
1882  return true;
1883 }
1884 
1886  return new SampleProfileLoaderLegacyPass();
1887 }
1888 
1890  return new SampleProfileLoaderLegacyPass(Name);
1891 }
1892 
1893 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1894  ProfileSummaryInfo *_PSI, CallGraph *CG) {
1895  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
1896 
1897  PSI = _PSI;
1898  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
1899  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
1901  PSI->refresh();
1902  }
1903  // Compute the total number of samples collected in this profile.
1904  for (const auto &I : Reader->getProfiles())
1905  TotalCollectedSamples += I.second.getTotalSamples();
1906 
1907  auto Remapper = Reader->getRemapper();
1908  // Populate the symbol map.
1909  for (const auto &N_F : M.getValueSymbolTable()) {
1910  StringRef OrigName = N_F.getKey();
1911  Function *F = dyn_cast<Function>(N_F.getValue());
1912  if (F == nullptr || OrigName.empty())
1913  continue;
1914  SymbolMap[OrigName] = F;
1916  if (OrigName != NewName && !NewName.empty()) {
1917  auto r = SymbolMap.insert(std::make_pair(NewName, F));
1918  // Failiing to insert means there is already an entry in SymbolMap,
1919  // thus there are multiple functions that are mapped to the same
1920  // stripped name. In this case of name conflicting, set the value
1921  // to nullptr to avoid confusion.
1922  if (!r.second)
1923  r.first->second = nullptr;
1924  OrigName = NewName;
1925  }
1926  // Insert the remapped names into SymbolMap.
1927  if (Remapper) {
1928  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
1929  if (*MapName != OrigName && !MapName->empty())
1930  SymbolMap.insert(std::make_pair(*MapName, F));
1931  }
1932  }
1933  }
1934  assert(SymbolMap.count(StringRef()) == 0 &&
1935  "No empty StringRef should be added in SymbolMap");
1936 
1937  bool retval = false;
1938  for (auto F : buildFunctionOrder(M, CG)) {
1939  assert(!F->isDeclaration());
1940  clearFunctionData();
1941  retval |= runOnFunction(*F, AM);
1942  }
1943 
1944  // Account for cold calls not inlined....
1945  if (!ProfileIsCS)
1946  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
1947  notInlinedCallInfo)
1948  updateProfileCallee(pair.first, pair.second.entryCount);
1949 
1950  return retval;
1951 }
1952 
1953 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1954  ACT = &getAnalysis<AssumptionCacheTracker>();
1955  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1956  TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
1957  ProfileSummaryInfo *PSI =
1958  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1959  return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
1960 }
1961 
1963  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
1964  DILocation2SampleMap.clear();
1965  // By default the entry count is initialized to -1, which will be treated
1966  // conservatively by getEntryCount as the same as unknown (None). This is
1967  // to avoid newly added code to be treated as cold. If we have samples
1968  // this will be overwritten in emitAnnotations.
1969  uint64_t initialEntryCount = -1;
1970 
1971  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
1972  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
1973  // initialize all the function entry counts to 0. It means all the
1974  // functions without profile will be regarded as cold.
1975  initialEntryCount = 0;
1976  // profile-sample-accurate is a user assertion which has a higher precedence
1977  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
1978  ProfAccForSymsInList = false;
1979  }
1980  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
1981 
1982  // PSL -- profile symbol list include all the symbols in sampled binary.
1983  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
1984  // old functions without samples being cold, without having to worry
1985  // about new and hot functions being mistakenly treated as cold.
1986  if (ProfAccForSymsInList) {
1987  // Initialize the entry count to 0 for functions in the list.
1988  if (PSL->contains(F.getName()))
1989  initialEntryCount = 0;
1990 
1991  // Function in the symbol list but without sample will be regarded as
1992  // cold. To minimize the potential negative performance impact it could
1993  // have, we want to be a little conservative here saying if a function
1994  // shows up in the profile, no matter as outline function, inline instance
1995  // or call targets, treat the function as not being cold. This will handle
1996  // the cases such as most callsites of a function are inlined in sampled
1997  // binary but not inlined in current build (because of source code drift,
1998  // imprecise debug information, or the callsites are all cold individually
1999  // but not cold accumulatively...), so the outline function showing up as
2000  // cold in sampled binary will actually not be cold after current build.
2002  if (NamesInProfile.count(CanonName))
2003  initialEntryCount = -1;
2004  }
2005 
2006  // Initialize entry count when the function has no existing entry
2007  // count value.
2008  if (!F.getEntryCount().hasValue())
2009  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2010  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2011  if (AM) {
2012  auto &FAM =
2014  .getManager();
2016  } else {
2017  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2018  ORE = OwnedORE.get();
2019  }
2020 
2021  if (ProfileIsCS)
2022  Samples = ContextTracker->getBaseSamplesFor(F);
2023  else
2024  Samples = Reader->getSamplesFor(F);
2025 
2026  if (Samples && !Samples->empty())
2027  return emitAnnotations(F);
2028  return false;
2029 }
2030 
2032  ModuleAnalysisManager &AM) {
2035 
2036  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2037  return FAM.getResult<AssumptionAnalysis>(F);
2038  };
2039  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2040  return FAM.getResult<TargetIRAnalysis>(F);
2041  };
2042  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2044  };
2045 
2046  SampleProfileLoader SampleLoader(
2047  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2048  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2049  : ProfileRemappingFileName,
2050  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2051 
2052  if (!SampleLoader.doInitialization(M, &FAM))
2053  return PreservedAnalyses::all();
2054 
2057  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2058  return PreservedAnalyses::all();
2059 
2060  return PreservedAnalyses::none();
2061 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
Instrumentation.h
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:124
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:824
llvm::InlineCost::getCost
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:130
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:2031
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:1010
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:305
llvm::sampleprof::FunctionSamples::ProfileIsCS
static bool ProfileIsCS
Definition: SampleProf.h:1012
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
DebugInfoMetadata.h
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
T
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:474
llvm::sampleprof::SampleContext::hasAttribute
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:554
llvm::Function
Definition: Function.h:61
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:106
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:77
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1541
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ProfileICPRelativeHotnessSkip
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::createSampleProfileLoaderPass
ModulePass * createSampleProfileLoaderPass()
Definition: SampleProfile.cpp:1885
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::sampleprof::ContextShouldBeInlined
@ ContextShouldBeInlined
Definition: SampleProf.h:412
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:785
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:112
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t entryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding entryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1616
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:185
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::PseudoProbe::Factor
float Factor
Definition: PseudoProbe.h:81
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:885
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:915
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:676
llvm::initializeSampleProfileLoaderLegacyPassPass
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
llvm::InlineCost::isNever
bool isNever() const
Definition: InlineCost.h:125
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:497
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
ProfileSampleBlockAccurate
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::InlineParams::ComputeFullInlineCost
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:212
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::PseudoProbe::Id
uint32_t Id
Definition: PseudoProbe.h:75
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AllowRecursiveInline
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Allow sample loader inliner to inline recursive calls."))
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:918
BlockFrequencyInfoImpl.h
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1430
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::sampleprof::SampleProfileReader::profileIsCS
bool profileIsCS() const
Whether input profile is fully context-sensitive.
Definition: SampleProfReader.h:477
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2753
PostDominators.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:373
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:81
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
UsePreInlinerDecision
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use the preinliner decisions stored in profile context."))
llvm::sampleprof::ProfiledCallGraph
Definition: ProfiledCallGraph.h:40
llvm::sampleprof::SyntheticContext
@ SyntheticContext
Definition: SampleProf.h:403
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:82
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:112
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:1036
SampleProf.h
InlineAdvisor.h
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:105
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
InstrProf.h
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:65
DebugLoc.h
SmallPtrSet.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:249
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:107
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3037
SampleProfileLoaderBaseUtil.h
StringMap.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:485
llvm::sampleprof::SampleContext
Definition: SampleProf.h:469
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:108
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:228
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:786
ValueSymbolTable.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:118
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::emitInlinedInto
void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message.
Definition: InlineAdvisor.cpp:438
SampleProfile.h
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1434
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:46
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:421
ProfiledCallGraph.h
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
uint64_t
ProfileSummaryInfo.h
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::sampleprof::FunctionSamples::getEntrySamples
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:800
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:398
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:42
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:684
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::DenseMap
Definition: DenseMap.h:714
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
PriorityQueue.h
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:185
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:377
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1021
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:107
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1725
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::sampleprof::ProfiledCallGraphNode
Definition: ProfiledCallGraph.h:27
SampleProfileProbe.h
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:116
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:493
llvm::BasicBlock::getFirstNonPHIOrDbgOrLifetime
const Instruction * getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp=false) const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:233
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:285
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:203
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:479
uint32_t
CallPromotionUtils.h
Profile
Load MIR Sample Profile
Definition: MIRSampleProfile.cpp:62
llvm::ContextTrieNode
Definition: SampleContextTracker.h:36
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:775
CallGraphSCCPass.h
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:653
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:955
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:39
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:466
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::sampleprof::SampleProfileReader::getProfiles
SampleProfileMap & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:441
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
profile
sample profile
Definition: SampleProfile.cpp:1667
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
std
Definition: BitVector.h:838
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:408
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
GenericDomTree.h
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:511
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:56
DiagnosticInfo.h
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1492
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:201
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:345
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:845
llvm::cl::value_desc
Definition: CommandLine.h:424
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:499
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:102
llvm::extractProbe
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:48
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:1014
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:304
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:108
CallGraph.h
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:25
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:926
Instructions.h
loader
sample Sample Profile loader
Definition: SampleProfile.cpp:1668
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:368
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
Dominators.h
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1468
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
TargetTransformInfo.h
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1760
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:936
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1000
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
llvm::AttributeFuncs::mergeAttributesForInlining
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Definition: Attributes.cpp:2110
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:254
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:218
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
ProfileICPRelativeHotness
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:748
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773