LLVM  13.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SCCIterator.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringMap.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/Twine.h"
43 #include "llvm/Analysis/LoopInfo.h"
50 #include "llvm/IR/BasicBlock.h"
51 #include "llvm/IR/CFG.h"
53 #include "llvm/IR/DebugLoc.h"
54 #include "llvm/IR/DiagnosticInfo.h"
55 #include "llvm/IR/Dominators.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/GlobalValue.h"
58 #include "llvm/IR/InstrTypes.h"
59 #include "llvm/IR/Instruction.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/MDBuilder.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/PassManager.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorOr.h"
79 #include "llvm/Transforms/IPO.h"
88 #include <algorithm>
89 #include <cassert>
90 #include <cstdint>
91 #include <functional>
92 #include <limits>
93 #include <map>
94 #include <memory>
95 #include <queue>
96 #include <string>
97 #include <system_error>
98 #include <utility>
99 #include <vector>
100 
101 using namespace llvm;
102 using namespace sampleprof;
103 using namespace llvm::sampleprofutil;
105 #define DEBUG_TYPE "sample-profile"
106 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
107 
108 STATISTIC(NumCSInlined,
109  "Number of functions inlined with context sensitive profile");
110 STATISTIC(NumCSNotInlined,
111  "Number of functions not inlined with context sensitive profile");
112 STATISTIC(NumMismatchedProfile,
113  "Number of functions with CFG mismatched profile");
114 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
115 STATISTIC(NumDuplicatedInlinesite,
116  "Number of inlined callsites with a partial distribution factor");
117 
118 STATISTIC(NumCSInlinedHitMinLimit,
119  "Number of functions with FDO inline stopped due to min size limit");
120 STATISTIC(NumCSInlinedHitMaxLimit,
121  "Number of functions with FDO inline stopped due to max size limit");
122 STATISTIC(
123  NumCSInlinedHitGrowthLimit,
124  "Number of functions with FDO inline stopped due to growth size limit");
125 
126 // Command line option to specify the file to read samples from. This is
127 // mainly used for debugging.
129  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
130  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
131 
132 // The named file contains a set of transformations that may have been applied
133 // to the symbol names between the program from which the sample data was
134 // collected and the current program's symbols.
136  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
137  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
138 
140  "profile-sample-accurate", cl::Hidden, cl::init(false),
141  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
142  "callsite and function as having 0 samples. Otherwise, treat "
143  "un-sampled callsites and functions conservatively as unknown. "));
144 
146  "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
147  cl::init(true),
148  cl::desc("For symbols in profile symbol list, regard their profiles to "
149  "be accurate. It may be overriden by profile-sample-accurate. "));
150 
152  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
153  cl::desc("Merge past inlinee's profile to outline version if sample "
154  "profile loader decided not to inline a call site. It will "
155  "only be enabled when top-down order of profile loading is "
156  "enabled. "));
157 
159  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
160  cl::desc("Do profile annotation and inlining for functions in top-down "
161  "order of call graph during sample profile loading. It only "
162  "works for new pass manager. "));
163 
164 static cl::opt<bool>
165  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
166  cl::desc("Process functions in a top-down order "
167  "defined by the profiled call graph when "
168  "-sample-profile-top-down-load is on."));
169 
171  "sample-profile-inline-size", cl::Hidden, cl::init(false),
172  cl::desc("Inline cold call sites in profile loader if it's beneficial "
173  "for code size."));
174 
176  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
177  cl::desc("The size growth ratio limit for proirity-based sample profile "
178  "loader inlining."));
179 
181  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
182  cl::desc("The lower bound of size growth limit for "
183  "proirity-based sample profile loader inlining."));
184 
186  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
187  cl::desc("The upper bound of size growth limit for "
188  "proirity-based sample profile loader inlining."));
189 
191  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
192  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
193  "inlining."));
194 
196  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
197  cl::desc("Threshold for inlining cold callsites"));
198 
200  "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
201  cl::desc(
202  "Relative hotness threshold for indirect "
203  "call promotion in proirity-based sample profile loader inlining."));
204 
206  "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
207  cl::init(false),
208  cl::desc("Use call site prioritized inlining for sample profile loader."
209  "Currently only CSSPGO is supported."));
210 
212  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
213  cl::desc(
214  "Optimization remarks file containing inline remarks to be replayed "
215  "by inlining from sample profile loader."),
216  cl::Hidden);
217 
218 static cl::opt<unsigned>
219  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
221  cl::desc("Max number of promotions for a single indirect "
222  "call callsite in sample profile loader"));
223 
225  "overwrite-existing-weights", cl::Hidden, cl::init(false),
226  cl::desc("Ignore existing branch weights on IR and always overwrite."));
227 
228 namespace {
229 
230 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
231 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
232 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
233 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
234 using BlockEdgeMap =
236 
237 class GUIDToFuncNameMapper {
238 public:
239  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
240  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
241  : CurrentReader(Reader), CurrentModule(M),
242  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
243  if (!CurrentReader.useMD5())
244  return;
245 
246  for (const auto &F : CurrentModule) {
247  StringRef OrigName = F.getName();
248  CurrentGUIDToFuncNameMap.insert(
249  {Function::getGUID(OrigName), OrigName});
250 
251  // Local to global var promotion used by optimization like thinlto
252  // will rename the var and add suffix like ".llvm.xxx" to the
253  // original local name. In sample profile, the suffixes of function
254  // names are all stripped. Since it is possible that the mapper is
255  // built in post-thin-link phase and var promotion has been done,
256  // we need to add the substring of function name without the suffix
257  // into the GUIDToFuncNameMap.
259  if (CanonName != OrigName)
260  CurrentGUIDToFuncNameMap.insert(
261  {Function::getGUID(CanonName), CanonName});
262  }
263 
264  // Update GUIDToFuncNameMap for each function including inlinees.
265  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
266  }
267 
268  ~GUIDToFuncNameMapper() {
269  if (!CurrentReader.useMD5())
270  return;
271 
272  CurrentGUIDToFuncNameMap.clear();
273 
274  // Reset GUIDToFuncNameMap for of each function as they're no
275  // longer valid at this point.
276  SetGUIDToFuncNameMapForAll(nullptr);
277  }
278 
279 private:
280  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
281  std::queue<FunctionSamples *> FSToUpdate;
282  for (auto &IFS : CurrentReader.getProfiles()) {
283  FSToUpdate.push(&IFS.second);
284  }
285 
286  while (!FSToUpdate.empty()) {
287  FunctionSamples *FS = FSToUpdate.front();
288  FSToUpdate.pop();
289  FS->GUIDToFuncNameMap = Map;
290  for (const auto &ICS : FS->getCallsiteSamples()) {
291  const FunctionSamplesMap &FSMap = ICS.second;
292  for (auto &IFS : FSMap) {
293  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
294  FSToUpdate.push(&FS);
295  }
296  }
297  }
298  }
299 
300  SampleProfileReader &CurrentReader;
301  Module &CurrentModule;
302  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
303 };
304 
305 // Inline candidate used by iterative callsite prioritized inliner
306 struct InlineCandidate {
307  CallBase *CallInstr;
308  const FunctionSamples *CalleeSamples;
309  // Prorated callsite count, which will be used to guide inlining. For example,
310  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
311  // copies will get their own distribution factors and their prorated counts
312  // will be used to decide if they should be inlined independently.
313  uint64_t CallsiteCount;
314  // Call site distribution factor to prorate the profile samples for a
315  // duplicated callsite. Default value is 1.0.
316  float CallsiteDistribution;
317 };
318 
319 // Inline candidate comparer using call site weight
320 struct CandidateComparer {
321  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
322  if (LHS.CallsiteCount != RHS.CallsiteCount)
323  return LHS.CallsiteCount < RHS.CallsiteCount;
324 
325  const FunctionSamples *LCS = LHS.CalleeSamples;
326  const FunctionSamples *RCS = RHS.CalleeSamples;
327  assert(LCS && RCS && "Expect non-null FunctionSamples");
328 
329  // Tie breaker using number of samples try to favor smaller functions first
330  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
331  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
332 
333  // Tie breaker using GUID so we have stable/deterministic inlining order
334  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
335  }
336 };
337 
338 using CandidateQueue =
340  CandidateComparer>;
341 
342 /// Sample profile pass.
343 ///
344 /// This pass reads profile data from the file specified by
345 /// -sample-profile-file and annotates every affected function with the
346 /// profile information found in that file.
347 class SampleProfileLoader final
348  : public SampleProfileLoaderBaseImpl<BasicBlock> {
349 public:
350  SampleProfileLoader(
351  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
352  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
353  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
354  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
356  GetAC(std::move(GetAssumptionCache)),
357  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
358  RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
359 
360  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
361  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
362  ProfileSummaryInfo *_PSI, CallGraph *CG);
363 
364 protected:
366  bool emitAnnotations(Function &F);
367  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
368  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
369  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
370  const FunctionSamples *
371  findFunctionSamples(const Instruction &I) const override;
372  std::vector<const FunctionSamples *>
373  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
374  void findExternalInlineCandidate(const FunctionSamples *Samples,
375  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
377  uint64_t Threshold);
378  // Attempt to promote indirect call and also inline the promoted call
379  bool tryPromoteAndInlineCandidate(
380  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
381  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
382  bool inlineHotFunctions(Function &F,
383  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
384  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
385  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
386  bool
387  tryInlineCandidate(InlineCandidate &Candidate,
388  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
389  bool
390  inlineHotFunctionsWithPriority(Function &F,
391  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
392  // Inline cold/small functions in addition to hot ones
393  bool shouldInlineColdCallee(CallBase &CallInst);
394  void emitOptimizationRemarksForInlineCandidates(
395  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
396  bool Hot);
397  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
398  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
399  void generateMDProfMetadata(Function &F);
400 
401  /// Map from function name to Function *. Used to find the function from
402  /// the function name. If the function name contains suffix, additional
403  /// entry is added to map from the stripped name to the function if there
404  /// is one-to-one mapping.
406 
409  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
410 
411  /// Profile tracker for different context.
412  std::unique_ptr<SampleContextTracker> ContextTracker;
413 
414  /// Name of the profile remapping file to load.
415  std::string RemappingFilename;
416 
417  /// Flag indicating whether the profile input loaded successfully.
418  bool ProfileIsValid = false;
419 
420  /// Flag indicating whether input profile is context-sensitive
421  bool ProfileIsCS = false;
422 
423  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
424  ///
425  /// We need to know the LTO phase because for example in ThinLTOPrelink
426  /// phase, in annotation, we should not promote indirect calls. Instead,
427  /// we will mark GUIDs that needs to be annotated to the function.
428  ThinOrFullLTOPhase LTOPhase;
429 
430  /// Profle Symbol list tells whether a function name appears in the binary
431  /// used to generate the current profile.
432  std::unique_ptr<ProfileSymbolList> PSL;
433 
434  /// Total number of samples collected in this profile.
435  ///
436  /// This is the sum of all the samples collected in all the functions executed
437  /// at runtime.
438  uint64_t TotalCollectedSamples = 0;
439 
440  // Information recorded when we declined to inline a call site
441  // because we have determined it is too cold is accumulated for
442  // each callee function. Initially this is just the entry count.
443  struct NotInlinedProfileInfo {
444  uint64_t entryCount;
445  };
447 
448  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
449  // all the function symbols defined or declared in current module.
450  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
451 
452  // All the Names used in FunctionSamples including outline function
453  // names, inline instance names and call target names.
454  StringSet<> NamesInProfile;
455 
456  // For symbol in profile symbol list, whether to regard their profiles
457  // to be accurate. It is mainly decided by existance of profile symbol
458  // list and -profile-accurate-for-symsinlist flag, but it can be
459  // overriden by -profile-sample-accurate or profile-sample-accurate
460  // attribute.
461  bool ProfAccForSymsInList;
462 
463  // External inline advisor used to replay inline decision from remarks.
464  std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
465 
466  // A pseudo probe helper to correlate the imported sample counts.
467  std::unique_ptr<PseudoProbeManager> ProbeManager;
468 };
469 
470 class SampleProfileLoaderLegacyPass : public ModulePass {
471 public:
472  // Class identification, replacement for typeinfo
473  static char ID;
474 
475  SampleProfileLoaderLegacyPass(
478  : ModulePass(ID), SampleLoader(
479  Name, SampleProfileRemappingFile, LTOPhase,
480  [&](Function &F) -> AssumptionCache & {
481  return ACT->getAssumptionCache(F);
482  },
483  [&](Function &F) -> TargetTransformInfo & {
484  return TTIWP->getTTI(F);
485  },
486  [&](Function &F) -> TargetLibraryInfo & {
487  return TLIWP->getTLI(F);
488  }) {
491  }
492 
493  void dump() { SampleLoader.dump(); }
494 
495  bool doInitialization(Module &M) override {
496  return SampleLoader.doInitialization(M);
497  }
498 
499  StringRef getPassName() const override { return "Sample profile pass"; }
500  bool runOnModule(Module &M) override;
501 
502  void getAnalysisUsage(AnalysisUsage &AU) const override {
507  }
508 
509 private:
510  SampleProfileLoader SampleLoader;
511  AssumptionCacheTracker *ACT = nullptr;
512  TargetTransformInfoWrapperPass *TTIWP = nullptr;
513  TargetLibraryInfoWrapperPass *TLIWP = nullptr;
514 };
515 
516 } // end anonymous namespace
517 
518 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
520  return getProbeWeight(Inst);
521 
522  const DebugLoc &DLoc = Inst.getDebugLoc();
523  if (!DLoc)
524  return std::error_code();
525 
526  // Ignore all intrinsics, phinodes and branch instructions.
527  // Branch and phinodes instruction usually contains debug info from sources
528  // outside of the residing basic block, thus we ignore them during annotation.
529  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
530  return std::error_code();
531 
532  // For non-CS profile, if a direct call/invoke instruction is inlined in
533  // profile (findCalleeFunctionSamples returns non-empty result), but not
534  // inlined here, it means that the inlined callsite has no sample, thus the
535  // call instruction should have 0 count.
536  // For CS profile, the callsite count of previously inlined callees is
537  // populated with the entry count of the callees.
538  if (!ProfileIsCS)
539  if (const auto *CB = dyn_cast<CallBase>(&Inst))
540  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
541  return 0;
542 
543  return getInstWeightImpl(Inst);
544 }
545 
546 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
547 // of non-probe instruction. So if all instructions of the BB give error_code,
548 // tell the inference algorithm to infer the BB weight.
549 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
551  "Profile is not pseudo probe based");
552  Optional<PseudoProbe> Probe = extractProbe(Inst);
553  // Ignore the non-probe instruction. If none of the instruction in the BB is
554  // probe, we choose to infer the BB's weight.
555  if (!Probe)
556  return std::error_code();
557 
558  // This is not the dangling probe from the training pass but generated by the
559  // current compilation. Ignore this since they are logically deleted and
560  // should not consume any profile samples.
561  if (Probe->isDangling())
562  return std::error_code();
563 
564  const FunctionSamples *FS = findFunctionSamples(Inst);
565  // If none of the instruction has FunctionSample, we choose to return zero
566  // value sample to indicate the BB is cold. This could happen when the
567  // instruction is from inlinee and no profile data is found.
568  // FIXME: This should not be affected by the source drift issue as 1) if the
569  // newly added function is top-level inliner, it won't match the CFG checksum
570  // in the function profile or 2) if it's the inlinee, the inlinee should have
571  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
572  // we can improve it by adding a switch for profile-sample-block-accurate for
573  // block level counts in the future.
574  if (!FS)
575  return 0;
576 
577  // For non-CS profile, If a direct call/invoke instruction is inlined in
578  // profile (findCalleeFunctionSamples returns non-empty result), but not
579  // inlined here, it means that the inlined callsite has no sample, thus the
580  // call instruction should have 0 count.
581  // For CS profile, the callsite count of previously inlined callees is
582  // populated with the entry count of the callees.
583  if (!ProfileIsCS)
584  if (const auto *CB = dyn_cast<CallBase>(&Inst))
585  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
586  return 0;
587 
588  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
589  if (R) {
590  uint64_t Samples = R.get() * Probe->Factor;
591  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
592  if (FirstMark) {
593  ORE->emit([&]() {
594  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
595  Remark << "Applied " << ore::NV("NumSamples", Samples);
596  Remark << " samples from profile (ProbeId=";
597  Remark << ore::NV("ProbeId", Probe->Id);
598  Remark << ", Factor=";
599  Remark << ore::NV("Factor", Probe->Factor);
600  Remark << ", OriginalSamples=";
601  Remark << ore::NV("OriginalSamples", R.get());
602  Remark << ")";
603  return Remark;
604  });
605  }
606  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
607  << " - weight: " << R.get() << " - factor: "
608  << format("%0.2f", Probe->Factor) << ")\n");
609  return Samples;
610  }
611  return R;
612 }
613 
614 /// Get the FunctionSamples for a call instruction.
615 ///
616 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
617 /// instance in which that call instruction is calling to. It contains
618 /// all samples that resides in the inlined instance. We first find the
619 /// inlined instance in which the call instruction is from, then we
620 /// traverse its children to find the callsite with the matching
621 /// location.
622 ///
623 /// \param Inst Call/Invoke instruction to query.
624 ///
625 /// \returns The FunctionSamples pointer to the inlined instance.
626 const FunctionSamples *
627 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
628  const DILocation *DIL = Inst.getDebugLoc();
629  if (!DIL) {
630  return nullptr;
631  }
632 
633  StringRef CalleeName;
634  if (Function *Callee = Inst.getCalledFunction())
635  CalleeName = Callee->getName();
636 
637  if (ProfileIsCS)
638  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
639 
640  const FunctionSamples *FS = findFunctionSamples(Inst);
641  if (FS == nullptr)
642  return nullptr;
643 
644  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
645  CalleeName, Reader->getRemapper());
646 }
647 
648 /// Returns a vector of FunctionSamples that are the indirect call targets
649 /// of \p Inst. The vector is sorted by the total number of samples. Stores
650 /// the total call count of the indirect call in \p Sum.
651 std::vector<const FunctionSamples *>
652 SampleProfileLoader::findIndirectCallFunctionSamples(
653  const Instruction &Inst, uint64_t &Sum) const {
654  const DILocation *DIL = Inst.getDebugLoc();
655  std::vector<const FunctionSamples *> R;
656 
657  if (!DIL) {
658  return R;
659  }
660 
661  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
662  assert(L && R && "Expect non-null FunctionSamples");
663  if (L->getEntrySamples() != R->getEntrySamples())
664  return L->getEntrySamples() > R->getEntrySamples();
665  return FunctionSamples::getGUID(L->getName()) <
666  FunctionSamples::getGUID(R->getName());
667  };
668 
669  if (ProfileIsCS) {
670  auto CalleeSamples =
671  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
672  if (CalleeSamples.empty())
673  return R;
674 
675  // For CSSPGO, we only use target context profile's entry count
676  // as that already includes both inlined callee and non-inlined ones..
677  Sum = 0;
678  for (const auto *const FS : CalleeSamples) {
679  Sum += FS->getEntrySamples();
680  R.push_back(FS);
681  }
682  llvm::sort(R, FSCompare);
683  return R;
684  }
685 
686  const FunctionSamples *FS = findFunctionSamples(Inst);
687  if (FS == nullptr)
688  return R;
689 
691  auto T = FS->findCallTargetMapAt(CallSite);
692  Sum = 0;
693  if (T)
694  for (const auto &T_C : T.get())
695  Sum += T_C.second;
696  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
697  if (M->empty())
698  return R;
699  for (const auto &NameFS : *M) {
700  Sum += NameFS.second.getEntrySamples();
701  R.push_back(&NameFS.second);
702  }
703  llvm::sort(R, FSCompare);
704  }
705  return R;
706 }
707 
708 const FunctionSamples *
709 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
711  Optional<PseudoProbe> Probe = extractProbe(Inst);
712  if (!Probe)
713  return nullptr;
714  }
715 
716  const DILocation *DIL = Inst.getDebugLoc();
717  if (!DIL)
718  return Samples;
719 
720  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
721  if (it.second) {
722  if (ProfileIsCS)
723  it.first->second = ContextTracker->getContextSamplesFor(DIL);
724  else
725  it.first->second =
726  Samples->findFunctionSamples(DIL, Reader->getRemapper());
727  }
728  return it.first->second;
729 }
730 
731 /// Check whether the indirect call promotion history of \p Inst allows
732 /// the promotion for \p Candidate.
733 /// If the profile count for the promotion candidate \p Candidate is
734 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
735 /// for \p Inst. If we already have at least MaxNumPromotions
736 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
737 /// cannot promote for \p Inst anymore.
738 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
739  uint32_t NumVals = 0;
740  uint64_t TotalCount = 0;
741  std::unique_ptr<InstrProfValueData[]> ValueData =
742  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
743  bool Valid =
744  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
745  ValueData.get(), NumVals, TotalCount, true);
746  // No valid value profile so no promoted targets have been recorded
747  // before. Ok to do ICP.
748  if (!Valid)
749  return true;
750 
751  unsigned NumPromoted = 0;
752  for (uint32_t I = 0; I < NumVals; I++) {
753  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
754  continue;
755 
756  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
757  // metadata, it means the candidate has been promoted for this
758  // indirect call.
759  if (ValueData[I].Value == Function::getGUID(Candidate))
760  return false;
761  NumPromoted++;
762  // If already have MaxNumPromotions promotion, don't do it anymore.
763  if (NumPromoted == MaxNumPromotions)
764  return false;
765  }
766  return true;
767 }
768 
769 /// Update indirect call target profile metadata for \p Inst.
770 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
771 /// If it is 0, it means updateIDTMetaData is used to mark a
772 /// certain target to be promoted already. If it is not zero,
773 /// we expect to use it to update the total count in the value profile.
774 static void
776  const SmallVectorImpl<InstrProfValueData> &CallTargets,
777  uint64_t Sum) {
778  uint32_t NumVals = 0;
779  // OldSum is the existing total count in the value profile data.
780  uint64_t OldSum = 0;
781  std::unique_ptr<InstrProfValueData[]> ValueData =
782  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
783  bool Valid =
784  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
785  ValueData.get(), NumVals, OldSum, true);
786 
787  DenseMap<uint64_t, uint64_t> ValueCountMap;
788  if (Sum == 0) {
789  assert((CallTargets.size() == 1 &&
790  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
791  "If sum is 0, assume only one element in CallTargets "
792  "with count being NOMORE_ICP_MAGICNUM");
793  // Initialize ValueCountMap with existing value profile data.
794  if (Valid) {
795  for (uint32_t I = 0; I < NumVals; I++)
796  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
797  }
798  auto Pair =
799  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
800  // If the target already exists in value profile, decrease the total
801  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
802  if (!Pair.second) {
803  OldSum -= Pair.first->second;
804  Pair.first->second = NOMORE_ICP_MAGICNUM;
805  }
806  Sum = OldSum;
807  } else {
808  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
809  // counts in the value profile.
810  if (Valid) {
811  for (uint32_t I = 0; I < NumVals; I++) {
812  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
813  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
814  }
815  }
816 
817  for (const auto &Data : CallTargets) {
818  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
819  if (Pair.second)
820  continue;
821  // The target represented by Data.Value has already been promoted.
822  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
823  // Sum by Data.Count.
824  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
825  Sum -= Data.Count;
826  }
827  }
828 
829  SmallVector<InstrProfValueData, 8> NewCallTargets;
830  for (const auto &ValueCount : ValueCountMap) {
831  NewCallTargets.emplace_back(
832  InstrProfValueData{ValueCount.first, ValueCount.second});
833  }
834 
835  llvm::sort(NewCallTargets,
836  [](const InstrProfValueData &L, const InstrProfValueData &R) {
837  if (L.Count != R.Count)
838  return L.Count > R.Count;
839  return L.Value > R.Value;
840  });
841 
842  uint32_t MaxMDCount =
843  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
844  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
845  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
846 }
847 
848 /// Attempt to promote indirect call and also inline the promoted call.
849 ///
850 /// \param F Caller function.
851 /// \param Candidate ICP and inline candidate.
852 /// \param SumOrigin Original sum of target counts for indirect call before
853 /// promoting given candidate.
854 /// \param Sum Prorated sum of remaining target counts for indirect call
855 /// after promoting given candidate.
856 /// \param InlinedCallSite Output vector for new call sites exposed after
857 /// inlining.
858 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
859  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
860  SmallVector<CallBase *, 8> *InlinedCallSite) {
861  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
862  auto R = SymbolMap.find(CalleeFunctionName);
863  if (R == SymbolMap.end() || !R->getValue())
864  return false;
865 
866  auto &CI = *Candidate.CallInstr;
867  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
868  return false;
869 
870  const char *Reason = "Callee function not available";
871  // R->getValue() != &F is to prevent promoting a recursive call.
872  // If it is a recursive call, we do not inline it as it could bloat
873  // the code exponentially. There is way to better handle this, e.g.
874  // clone the caller first, and inline the cloned caller if it is
875  // recursive. As llvm does not inline recursive calls, we will
876  // simply ignore it instead of handling it explicitly.
877  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
878  R->getValue()->hasFnAttribute("use-sample-profile") &&
879  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
880  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
881  // in the value profile metadata so the target won't be promoted again.
882  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
883  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
884  updateIDTMetaData(CI, SortedCallTargets, 0);
885 
886  auto *DI = &pgo::promoteIndirectCall(
887  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
888  if (DI) {
889  Sum -= Candidate.CallsiteCount;
890  // Do not prorate the indirect callsite distribution since the original
891  // distribution will be used to scale down non-promoted profile target
892  // counts later. By doing this we lose track of the real callsite count
893  // for the leftover indirect callsite as a trade off for accurate call
894  // target counts.
895  // TODO: Ideally we would have two separate factors, one for call site
896  // counts and one is used to prorate call target counts.
897  // Do not update the promoted direct callsite distribution at this
898  // point since the original distribution combined with the callee profile
899  // will be used to prorate callsites from the callee if inlined. Once not
900  // inlined, the direct callsite distribution should be prorated so that
901  // the it will reflect the real callsite counts.
902  Candidate.CallInstr = DI;
903  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
904  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
905  if (!Inlined) {
906  // Prorate the direct callsite distribution so that it reflects real
907  // callsite counts.
909  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
910  }
911  return Inlined;
912  }
913  }
914  } else {
915  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
916  << Candidate.CalleeSamples->getFuncName() << " because "
917  << Reason << "\n");
918  }
919  return false;
920 }
921 
922 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
923  if (!ProfileSizeInline)
924  return false;
925 
927  if (Callee == nullptr)
928  return false;
929 
930  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
931  GetAC, GetTLI);
932 
933  if (Cost.isNever())
934  return false;
935 
936  if (Cost.isAlways())
937  return true;
938 
939  return Cost.getCost() <= SampleColdCallSiteThreshold;
940 }
941 
942 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
943  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
944  bool Hot) {
945  for (auto I : Candidates) {
946  Function *CalledFunction = I->getCalledFunction();
947  if (CalledFunction) {
948  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
949  I->getDebugLoc(), I->getParent())
950  << "previous inlining reattempted for "
951  << (Hot ? "hotness: '" : "size: '")
952  << ore::NV("Callee", CalledFunction) << "' into '"
953  << ore::NV("Caller", &F) << "'");
954  }
955  }
956 }
957 
958 void SampleProfileLoader::findExternalInlineCandidate(
959  const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs,
960  const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
961  assert(Samples && "expect non-null caller profile");
962 
963  // For AutoFDO profile, retrieve candidate profiles by walking over
964  // the nested inlinee profiles.
965  if (!ProfileIsCS) {
966  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
967  return;
968  }
969 
971  ContextTracker->getContextFor(Samples->getContext());
972  std::queue<ContextTrieNode *> CalleeList;
973  CalleeList.push(Caller);
974  while (!CalleeList.empty()) {
975  ContextTrieNode *Node = CalleeList.front();
976  CalleeList.pop();
977  FunctionSamples *CalleeSample = Node->getFunctionSamples();
978  // For CSSPGO profile, retrieve candidate profile by walking over the
979  // trie built for context profile. Note that also take call targets
980  // even if callee doesn't have a corresponding context profile.
981  if (!CalleeSample || CalleeSample->getEntrySamples() < Threshold)
982  continue;
983 
984  StringRef Name = CalleeSample->getFuncName();
986  // Add to the import list only when it's defined out of module.
987  if (!Func || Func->isDeclaration())
988  InlinedGUIDs.insert(FunctionSamples::getGUID(Name));
989 
990  // Import hot CallTargets, which may not be available in IR because full
991  // profile annotation cannot be done until backend compilation in ThinLTO.
992  for (const auto &BS : CalleeSample->getBodySamples())
993  for (const auto &TS : BS.second.getCallTargets())
994  if (TS.getValue() > Threshold) {
995  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
996  const Function *Callee = SymbolMap.lookup(CalleeName);
997  if (!Callee || Callee->isDeclaration())
998  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName));
999  }
1000 
1001  // Import hot child context profile associted with callees. Note that this
1002  // may have some overlap with the call target loop above, but doing this
1003  // based child context profile again effectively allow us to use the max of
1004  // entry count and call target count to determine importing.
1005  for (auto &Child : Node->getAllChildContext()) {
1006  ContextTrieNode *CalleeNode = &Child.second;
1007  CalleeList.push(CalleeNode);
1008  }
1009  }
1010 }
1011 
1012 /// Iteratively inline hot callsites of a function.
1013 ///
1014 /// Iteratively traverse all callsites of the function \p F, and find if
1015 /// the corresponding inlined instance exists and is hot in profile. If
1016 /// it is hot enough, inline the callsites and adds new callsites of the
1017 /// callee into the caller. If the call is an indirect call, first promote
1018 /// it to direct call. Each indirect call is limited with a single target.
1019 ///
1020 /// \param F function to perform iterative inlining.
1021 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1022 /// inlined in the profiled binary.
1023 ///
1024 /// \returns True if there is any inline happened.
1025 bool SampleProfileLoader::inlineHotFunctions(
1026  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1027  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1028  // Profile symbol list is ignored when profile-sample-accurate is on.
1029  assert((!ProfAccForSymsInList ||
1031  !F.hasFnAttribute("profile-sample-accurate"))) &&
1032  "ProfAccForSymsInList should be false when profile-sample-accurate "
1033  "is enabled");
1034 
1035  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1036  bool Changed = false;
1037  bool LocalChanged = true;
1038  while (LocalChanged) {
1039  LocalChanged = false;
1041  for (auto &BB : F) {
1042  bool Hot = false;
1043  SmallVector<CallBase *, 10> AllCandidates;
1044  SmallVector<CallBase *, 10> ColdCandidates;
1045  for (auto &I : BB.getInstList()) {
1046  const FunctionSamples *FS = nullptr;
1047  if (auto *CB = dyn_cast<CallBase>(&I)) {
1048  if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
1049  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1050  "GUIDToFuncNameMap has to be populated");
1051  AllCandidates.push_back(CB);
1052  if (FS->getEntrySamples() > 0 || ProfileIsCS)
1053  LocalNotInlinedCallSites.try_emplace(CB, FS);
1054  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1055  Hot = true;
1056  else if (shouldInlineColdCallee(*CB))
1057  ColdCandidates.push_back(CB);
1058  }
1059  }
1060  }
1061  if (Hot || ExternalInlineAdvisor) {
1062  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1063  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1064  } else {
1065  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1066  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1067  }
1068  }
1069  for (CallBase *I : CIS) {
1070  Function *CalledFunction = I->getCalledFunction();
1071  InlineCandidate Candidate = {
1072  I,
1073  LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
1074  : nullptr,
1075  0 /* dummy count */, 1.0 /* dummy distribution factor */};
1076  // Do not inline recursive calls.
1077  if (CalledFunction == &F)
1078  continue;
1079  if (I->isIndirectCall()) {
1080  uint64_t Sum;
1081  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1082  uint64_t SumOrigin = Sum;
1083  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1084  findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
1085  PSI->getOrCompHotCountThreshold());
1086  continue;
1087  }
1088  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1089  continue;
1090 
1091  Candidate = {I, FS, FS->getEntrySamples(), 1.0};
1092  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1093  LocalNotInlinedCallSites.erase(I);
1094  LocalChanged = true;
1095  }
1096  }
1097  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1098  !CalledFunction->isDeclaration()) {
1099  if (tryInlineCandidate(Candidate)) {
1100  LocalNotInlinedCallSites.erase(I);
1101  LocalChanged = true;
1102  }
1103  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1104  findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs,
1105  SymbolMap,
1106  PSI->getOrCompHotCountThreshold());
1107  }
1108  }
1109  Changed |= LocalChanged;
1110  }
1111 
1112  // For CS profile, profile for not inlined context will be merged when
1113  // base profile is being trieved
1114  if (ProfileIsCS)
1115  return Changed;
1116 
1117  // Accumulate not inlined callsite information into notInlinedSamples
1118  for (const auto &Pair : LocalNotInlinedCallSites) {
1119  CallBase *I = Pair.getFirst();
1120  Function *Callee = I->getCalledFunction();
1121  if (!Callee || Callee->isDeclaration())
1122  continue;
1123 
1124  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
1125  I->getDebugLoc(), I->getParent())
1126  << "previous inlining not repeated: '"
1127  << ore::NV("Callee", Callee) << "' into '"
1128  << ore::NV("Caller", &F) << "'");
1129 
1130  ++NumCSNotInlined;
1131  const FunctionSamples *FS = Pair.getSecond();
1132  if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
1133  continue;
1134  }
1135 
1136  if (ProfileMergeInlinee) {
1137  // A function call can be replicated by optimizations like callsite
1138  // splitting or jump threading and the replicates end up sharing the
1139  // sample nested callee profile instead of slicing the original inlinee's
1140  // profile. We want to do merge exactly once by filtering out callee
1141  // profiles with a non-zero head sample count.
1142  if (FS->getHeadSamples() == 0) {
1143  // Use entry samples as head samples during the merge, as inlinees
1144  // don't have head samples.
1145  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1146  FS->getEntrySamples());
1147 
1148  // Note that we have to do the merge right after processing function.
1149  // This allows OutlineFS's profile to be used for annotation during
1150  // top-down processing of functions' annotation.
1151  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1152  OutlineFS->merge(*FS);
1153  }
1154  } else {
1155  auto pair =
1156  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1157  pair.first->second.entryCount += FS->getEntrySamples();
1158  }
1159  }
1160  return Changed;
1161 }
1162 
1163 bool SampleProfileLoader::tryInlineCandidate(
1164  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1165 
1166  CallBase &CB = *Candidate.CallInstr;
1167  Function *CalledFunction = CB.getCalledFunction();
1168  assert(CalledFunction && "Expect a callee with definition");
1169  DebugLoc DLoc = CB.getDebugLoc();
1170  BasicBlock *BB = CB.getParent();
1171 
1172  InlineCost Cost = shouldInlineCandidate(Candidate);
1173  if (Cost.isNever()) {
1174  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
1175  << "incompatible inlining");
1176  return false;
1177  }
1178 
1179  if (!Cost)
1180  return false;
1181 
1182  InlineFunctionInfo IFI(nullptr, GetAC);
1183  IFI.UpdateProfile = false;
1184  if (InlineFunction(CB, IFI).isSuccess()) {
1185  // The call to InlineFunction erases I, so we can't pass it here.
1186  emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
1187  true, CSINLINE_DEBUG);
1188 
1189  // Now populate the list of newly exposed call sites.
1190  if (InlinedCallSites) {
1191  InlinedCallSites->clear();
1192  for (auto &I : IFI.InlinedCallSites)
1193  InlinedCallSites->push_back(I);
1194  }
1195 
1196  if (ProfileIsCS)
1197  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1198  ++NumCSInlined;
1199 
1200  // Prorate inlined probes for a duplicated inlining callsite which probably
1201  // has a distribution less than 100%. Samples for an inlinee should be
1202  // distributed among the copies of the original callsite based on each
1203  // callsite's distribution factor for counts accuracy. Note that an inlined
1204  // probe may come with its own distribution factor if it has been duplicated
1205  // in the inlinee body. The two factor are multiplied to reflect the
1206  // aggregation of duplication.
1207  if (Candidate.CallsiteDistribution < 1) {
1208  for (auto &I : IFI.InlinedCallSites) {
1209  if (Optional<PseudoProbe> Probe = extractProbe(*I))
1211  Candidate.CallsiteDistribution);
1212  }
1213  NumDuplicatedInlinesite++;
1214  }
1215 
1216  return true;
1217  }
1218  return false;
1219 }
1220 
1221 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1222  CallBase *CB) {
1223  assert(CB && "Expect non-null call instruction");
1224 
1225  if (isa<IntrinsicInst>(CB))
1226  return false;
1227 
1228  // Find the callee's profile. For indirect call, find hottest target profile.
1229  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1230  if (!CalleeSamples)
1231  return false;
1232 
1233  float Factor = 1.0;
1234  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1235  Factor = Probe->Factor;
1236 
1237  uint64_t CallsiteCount = 0;
1238  ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
1239  if (Weight)
1240  CallsiteCount = Weight.get();
1241  if (CalleeSamples)
1242  CallsiteCount = std::max(
1243  CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
1244 
1245  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1246  return true;
1247 }
1248 
1249 InlineCost
1250 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1251  std::unique_ptr<InlineAdvice> Advice = nullptr;
1252  if (ExternalInlineAdvisor) {
1253  Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
1254  if (!Advice->isInliningRecommended()) {
1255  Advice->recordUnattemptedInlining();
1256  return InlineCost::getNever("not previously inlined");
1257  }
1258  Advice->recordInlining();
1259  return InlineCost::getAlways("previously inlined");
1260  }
1261 
1262  // Adjust threshold based on call site hotness, only do this for callsite
1263  // prioritized inliner because otherwise cost-benefit check is done earlier.
1264  int SampleThreshold = SampleColdCallSiteThreshold;
1266  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1267  SampleThreshold = SampleHotCallSiteThreshold;
1268  else if (!ProfileSizeInline)
1269  return InlineCost::getNever("cold callsite");
1270  }
1271 
1272  Function *Callee = Candidate.CallInstr->getCalledFunction();
1273  assert(Callee && "Expect a definition for inline candidate of direct call");
1274 
1275  InlineParams Params = getInlineParams();
1276  Params.ComputeFullInlineCost = true;
1277  // Checks if there is anything in the reachable portion of the callee at
1278  // this callsite that makes this inlining potentially illegal. Need to
1279  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1280  // when cost exceeds threshold without checking all IRs in the callee.
1281  // The acutal cost does not matter because we only checks isNever() to
1282  // see if it is legal to inline the callsite.
1283  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1284  GetTTI(*Callee), GetAC, GetTLI);
1285 
1286  // Honor always inline and never inline from call analyzer
1287  if (Cost.isNever() || Cost.isAlways())
1288  return Cost;
1289 
1290  // For old FDO inliner, we inline the call site as long as cost is not
1291  // "Never". The cost-benefit check is done earlier.
1293  return InlineCost::get(Cost.getCost(), INT_MAX);
1294  }
1295 
1296  // Otherwise only use the cost from call analyzer, but overwite threshold with
1297  // Sample PGO threshold.
1298  return InlineCost::get(Cost.getCost(), SampleThreshold);
1299 }
1300 
1301 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1302  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1303  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
1304 
1305  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1306  // Profile symbol list is ignored when profile-sample-accurate is on.
1307  assert((!ProfAccForSymsInList ||
1309  !F.hasFnAttribute("profile-sample-accurate"))) &&
1310  "ProfAccForSymsInList should be false when profile-sample-accurate "
1311  "is enabled");
1312 
1313  // Populating worklist with initial call sites from root inliner, along
1314  // with call site weights.
1315  CandidateQueue CQueue;
1316  InlineCandidate NewCandidate;
1317  for (auto &BB : F) {
1318  for (auto &I : BB.getInstList()) {
1319  auto *CB = dyn_cast<CallBase>(&I);
1320  if (!CB)
1321  continue;
1322  if (getInlineCandidate(&NewCandidate, CB))
1323  CQueue.push(NewCandidate);
1324  }
1325  }
1326 
1327  // Cap the size growth from profile guided inlining. This is needed even
1328  // though cost of each inline candidate already accounts for callee size,
1329  // because with top-down inlining, we can grow inliner size significantly
1330  // with large number of smaller inlinees each pass the cost check.
1332  "Max inline size limit should not be smaller than min inline size "
1333  "limit.");
1334  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1337  if (ExternalInlineAdvisor)
1339 
1340  // Perform iterative BFS call site prioritized inlining
1341  bool Changed = false;
1342  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1343  InlineCandidate Candidate = CQueue.top();
1344  CQueue.pop();
1345  CallBase *I = Candidate.CallInstr;
1346  Function *CalledFunction = I->getCalledFunction();
1347 
1348  if (CalledFunction == &F)
1349  continue;
1350  if (I->isIndirectCall()) {
1351  uint64_t Sum = 0;
1352  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1353  uint64_t SumOrigin = Sum;
1354  Sum *= Candidate.CallsiteDistribution;
1355  for (const auto *FS : CalleeSamples) {
1356  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1357  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1358  findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
1359  PSI->getOrCompHotCountThreshold());
1360  continue;
1361  }
1362  uint64_t EntryCountDistributed =
1363  FS->getEntrySamples() * Candidate.CallsiteDistribution;
1364  // In addition to regular inline cost check, we also need to make sure
1365  // ICP isn't introducing excessive speculative checks even if individual
1366  // target looks beneficial to promote and inline. That means we should
1367  // only do ICP when there's a small number dominant targets.
1368  if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
1369  break;
1370  // TODO: Fix CallAnalyzer to handle all indirect calls.
1371  // For indirect call, we don't run CallAnalyzer to get InlineCost
1372  // before actual inlining. This is because we could see two different
1373  // types from the same definition, which makes CallAnalyzer choke as
1374  // it's expecting matching parameter type on both caller and callee
1375  // side. See example from PR18962 for the triggering cases (the bug was
1376  // fixed, but we generate different types).
1377  if (!PSI->isHotCount(EntryCountDistributed))
1378  break;
1379  SmallVector<CallBase *, 8> InlinedCallSites;
1380  // Attach function profile for promoted indirect callee, and update
1381  // call site count for the promoted inline candidate too.
1382  Candidate = {I, FS, EntryCountDistributed,
1383  Candidate.CallsiteDistribution};
1384  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1385  &InlinedCallSites)) {
1386  for (auto *CB : InlinedCallSites) {
1387  if (getInlineCandidate(&NewCandidate, CB))
1388  CQueue.emplace(NewCandidate);
1389  }
1390  Changed = true;
1391  }
1392  }
1393  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1394  !CalledFunction->isDeclaration()) {
1395  SmallVector<CallBase *, 8> InlinedCallSites;
1396  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1397  for (auto *CB : InlinedCallSites) {
1398  if (getInlineCandidate(&NewCandidate, CB))
1399  CQueue.emplace(NewCandidate);
1400  }
1401  Changed = true;
1402  }
1403  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1404  findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs,
1405  SymbolMap, PSI->getOrCompHotCountThreshold());
1406  }
1407  }
1408 
1409  if (!CQueue.empty()) {
1410  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1411  ++NumCSInlinedHitMaxLimit;
1412  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1413  ++NumCSInlinedHitMinLimit;
1414  else
1415  ++NumCSInlinedHitGrowthLimit;
1416  }
1417 
1418  return Changed;
1419 }
1420 
1421 /// Returns the sorted CallTargetMap \p M by count in descending order.
1425  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1426  R.emplace_back(
1427  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1428  }
1429  return R;
1430 }
1431 
1432 // Generate MD_prof metadata for every branch instruction using the
1433 // edge weights computed during propagation.
1434 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1435  // Generate MD_prof metadata for every branch instruction using the
1436  // edge weights computed during propagation.
1437  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1438  LLVMContext &Ctx = F.getContext();
1439  MDBuilder MDB(Ctx);
1440  for (auto &BI : F) {
1441  BasicBlock *BB = &BI;
1442 
1443  if (BlockWeights[BB]) {
1444  for (auto &I : BB->getInstList()) {
1445  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1446  continue;
1447  if (!cast<CallBase>(I).getCalledFunction()) {
1448  const DebugLoc &DLoc = I.getDebugLoc();
1449  if (!DLoc)
1450  continue;
1451  const DILocation *DIL = DLoc;
1452  const FunctionSamples *FS = findFunctionSamples(I);
1453  if (!FS)
1454  continue;
1456  auto T = FS->findCallTargetMapAt(CallSite);
1457  if (!T || T.get().empty())
1458  continue;
1460  // Prorate the callsite counts based on the pre-ICP distribution
1461  // factor to reflect what is already done to the callsite before
1462  // ICP, such as calliste cloning.
1463  if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1464  if (Probe->Factor < 1)
1465  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1466  }
1467  }
1468  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1470  uint64_t Sum = 0;
1471  for (const auto &C : T.get())
1472  Sum += C.second;
1473  // With CSSPGO all indirect call targets are counted torwards the
1474  // original indirect call site in the profile, including both
1475  // inlined and non-inlined targets.
1477  if (const FunctionSamplesMap *M =
1478  FS->findFunctionSamplesMapAt(CallSite)) {
1479  for (const auto &NameFS : *M)
1480  Sum += NameFS.second.getEntrySamples();
1481  }
1482  }
1483  if (Sum)
1484  updateIDTMetaData(I, SortedCallTargets, Sum);
1485  else if (OverwriteExistingWeights)
1486  I.setMetadata(LLVMContext::MD_prof, nullptr);
1487  } else if (!isa<IntrinsicInst>(&I)) {
1488  I.setMetadata(LLVMContext::MD_prof,
1489  MDB.createBranchWeights(
1490  {static_cast<uint32_t>(BlockWeights[BB])}));
1491  }
1492  }
1493  } else if (OverwriteExistingWeights) {
1494  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1495  // clear it for cold code.
1496  for (auto &I : BB->getInstList()) {
1497  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1498  if (cast<CallBase>(I).isIndirectCall())
1499  I.setMetadata(LLVMContext::MD_prof, nullptr);
1500  else
1501  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1502  }
1503  }
1504  }
1505 
1506  Instruction *TI = BB->getTerminator();
1507  if (TI->getNumSuccessors() == 1)
1508  continue;
1509  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1510  !isa<IndirectBrInst>(TI))
1511  continue;
1512 
1513  DebugLoc BranchLoc = TI->getDebugLoc();
1514  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1515  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1516  : Twine("<UNKNOWN LOCATION>"))
1517  << ".\n");
1518  SmallVector<uint32_t, 4> Weights;
1519  uint32_t MaxWeight = 0;
1520  Instruction *MaxDestInst;
1521  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1522  BasicBlock *Succ = TI->getSuccessor(I);
1523  Edge E = std::make_pair(BB, Succ);
1524  uint64_t Weight = EdgeWeights[E];
1525  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1526  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1527  // if needed. Sample counts in profiles are 64-bit unsigned values,
1528  // but internally branch weights are expressed as 32-bit values.
1529  if (Weight > std::numeric_limits<uint32_t>::max()) {
1530  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1532  }
1533  // Weight is added by one to avoid propagation errors introduced by
1534  // 0 weights.
1535  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1536  if (Weight != 0) {
1537  if (Weight > MaxWeight) {
1538  MaxWeight = Weight;
1539  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1540  }
1541  }
1542  }
1543 
1544  uint64_t TempWeight;
1545  // Only set weights if there is at least one non-zero weight.
1546  // In any other case, let the analyzer set weights.
1547  // Do not set weights if the weights are present unless under
1548  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1549  // twice. If the first annotation already set the weights, the second pass
1550  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1551  // weight should have their existing metadata (possibly annotated by LTO
1552  // prelink) cleared.
1553  if (MaxWeight > 0 &&
1554  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1555  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1556  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1557  ORE->emit([&]() {
1558  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1559  << "most popular destination for conditional branches at "
1560  << ore::NV("CondBranchesLoc", BranchLoc);
1561  });
1562  } else {
1564  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1565  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1566  } else {
1567  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1568  }
1569  }
1570  }
1571 }
1572 
1573 /// Once all the branch weights are computed, we emit the MD_prof
1574 /// metadata on BB using the computed values for each of its branches.
1575 ///
1576 /// \param F The function to query.
1577 ///
1578 /// \returns true if \p F was modified. Returns false, otherwise.
1579 bool SampleProfileLoader::emitAnnotations(Function &F) {
1580  bool Changed = false;
1581 
1583  if (!ProbeManager->profileIsValid(F, *Samples)) {
1584  LLVM_DEBUG(
1585  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1586  << F.getName());
1587  ++NumMismatchedProfile;
1588  return false;
1589  }
1590  ++NumMatchedProfile;
1591  } else {
1592  if (getFunctionLoc(F) == 0)
1593  return false;
1594 
1595  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1596  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1597  }
1598 
1599  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1600  if (ProfileIsCS && CallsitePrioritizedInline)
1601  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1602  else
1603  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1604 
1605  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1606 
1607  if (Changed)
1608  generateMDProfMetadata(F);
1609 
1610  emitCoverageRemarks(F);
1611  return Changed;
1612 }
1613 
1615 
1616 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1617  "Sample Profile loader", false, false)
1622 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1623  "Sample Profile loader", false, false)
1624 
1625 std::unique_ptr<ProfiledCallGraph>
1626 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1627  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1628  if (ProfileIsCS)
1629  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1630  else
1631  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1632 
1633  // Add all functions into the profiled call graph even if they are not in
1634  // the profile. This makes sure functions missing from the profile still
1635  // gets a chance to be processed.
1636  for (auto &Node : CG) {
1637  const auto *F = Node.first;
1638  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1639  continue;
1640  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1641  }
1642 
1643  return ProfiledCG;
1644 }
1645 
1646 std::vector<Function *>
1647 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1648  std::vector<Function *> FunctionOrderList;
1649  FunctionOrderList.reserve(M.size());
1650 
1652  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1653  "together with -sample-profile-top-down-load.\n";
1654 
1655  if (!ProfileTopDownLoad || CG == nullptr) {
1656  if (ProfileMergeInlinee) {
1657  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1658  // because the profile for a function may be used for the profile
1659  // annotation of its outline copy before the profile merging of its
1660  // non-inlined inline instances, and that is not the way how
1661  // ProfileMergeInlinee is supposed to work.
1662  ProfileMergeInlinee = false;
1663  }
1664 
1665  for (Function &F : M)
1666  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1667  FunctionOrderList.push_back(&F);
1668  return FunctionOrderList;
1669  }
1670 
1671  assert(&CG->getModule() == &M);
1672 
1673  if (UseProfiledCallGraph ||
1674  (ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) {
1675  // Use profiled call edges to augment the top-down order. There are cases
1676  // that the top-down order computed based on the static call graph doesn't
1677  // reflect real execution order. For example
1678  //
1679  // 1. Incomplete static call graph due to unknown indirect call targets.
1680  // Adjusting the order by considering indirect call edges from the
1681  // profile can enable the inlining of indirect call targets by allowing
1682  // the caller processed before them.
1683  // 2. Mutual call edges in an SCC. The static processing order computed for
1684  // an SCC may not reflect the call contexts in the context-sensitive
1685  // profile, thus may cause potential inlining to be overlooked. The
1686  // function order in one SCC is being adjusted to a top-down order based
1687  // on the profile to favor more inlining. This is only a problem with CS
1688  // profile.
1689  // 3. Transitive indirect call edges due to inlining. When a callee function
1690  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1691  // every call edge originated from the callee B will be transferred to
1692  // the caller A. If any transferred edge (say A->C) is indirect, the
1693  // original profiled indirect edge B->C, even if considered, would not
1694  // enforce a top-down order from the caller A to the potential indirect
1695  // call target C in LTO postlink since the inlined callee B is gone from
1696  // the static call graph.
1697  // 4. #3 can happen even for direct call targets, due to functions defined
1698  // in header files. A header function (say A), when included into source
1699  // files, is defined multiple times but only one definition survives due
1700  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1701  // definitions can be useless based on a local file scope. More
1702  // importantly, the inlinee (say B), once fully inlined to a
1703  // to-be-dropped A, will have no profile to consume when its outlined
1704  // version is compiled. This can lead to a profile-less prelink
1705  // compilation for the outlined version of B which may be called from
1706  // external modules. while this isn't easy to fix, we rely on the
1707  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1708  // the A can be inlined in its local scope in prelink, it may not exist
1709  // in the merged IR in postlink, and we'll need the profiled call edges
1710  // to enforce a top-down order for the rest of the functions.
1711  //
1712  // Considering those cases, a profiled call graph completely independent of
1713  // the static call graph is constructed based on profile data, where
1714  // function objects are not even needed to handle case #3 and case 4.
1715  //
1716  // Note that static callgraph edges are completely ignored since they
1717  // can be conflicting with profiled edges for cyclic SCCs and may result in
1718  // an SCC order incompatible with profile-defined one. Using strictly
1719  // profile order ensures a maximum inlining experience. On the other hand,
1720  // static call edges are not so important when they don't correspond to a
1721  // context in the profile.
1722 
1723  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1724  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1725  while (!CGI.isAtEnd()) {
1726  for (ProfiledCallGraphNode *Node : *CGI) {
1727  Function *F = SymbolMap.lookup(Node->Name);
1728  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1729  FunctionOrderList.push_back(F);
1730  }
1731  ++CGI;
1732  }
1733  } else {
1735  while (!CGI.isAtEnd()) {
1736  for (CallGraphNode *Node : *CGI) {
1737  auto *F = Node->getFunction();
1738  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1739  FunctionOrderList.push_back(F);
1740  }
1741  ++CGI;
1742  }
1743  }
1744 
1745  LLVM_DEBUG({
1746  dbgs() << "Function processing order:\n";
1747  for (auto F : reverse(FunctionOrderList)) {
1748  dbgs() << F->getName() << "\n";
1749  }
1750  });
1751 
1752  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1753  return FunctionOrderList;
1754 }
1755 
1756 bool SampleProfileLoader::doInitialization(Module &M,
1758  auto &Ctx = M.getContext();
1759 
1760  auto ReaderOrErr = SampleProfileReader::create(
1761  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1762  if (std::error_code EC = ReaderOrErr.getError()) {
1763  std::string Msg = "Could not open profile: " + EC.message();
1764  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1765  return false;
1766  }
1767  Reader = std::move(ReaderOrErr.get());
1769  // set module before reading the profile so reader may be able to only
1770  // read the function profiles which are used by the current module.
1771  Reader->setModule(&M);
1772  if (std::error_code EC = Reader->read()) {
1773  std::string Msg = "profile reading failed: " + EC.message();
1774  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1775  return false;
1776  }
1777 
1778  PSL = Reader->getProfileSymbolList();
1779 
1780  // While profile-sample-accurate is on, ignore symbol list.
1781  ProfAccForSymsInList =
1783  if (ProfAccForSymsInList) {
1784  NamesInProfile.clear();
1785  if (auto NameTable = Reader->getNameTable())
1786  NamesInProfile.insert(NameTable->begin(), NameTable->end());
1787  CoverageTracker.setProfAccForSymsInList(true);
1788  }
1789 
1790  if (FAM && !ProfileInlineReplayFile.empty()) {
1791  ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
1792  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
1793  /*EmitRemarks=*/false);
1794  if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
1795  ExternalInlineAdvisor.reset();
1796  }
1797 
1798  // Apply tweaks if context-sensitive profile is available.
1799  if (Reader->profileIsCS()) {
1800  ProfileIsCS = true;
1802 
1803  // Enable priority-base inliner and size inline by default for CSSPGO.
1805  ProfileSizeInline = true;
1808 
1809  // Tracker for profiles under different context
1810  ContextTracker =
1811  std::make_unique<SampleContextTracker>(Reader->getProfiles());
1812  }
1813 
1814  // Load pseudo probe descriptors for probe-based function samples.
1815  if (Reader->profileIsProbeBased()) {
1816  ProbeManager = std::make_unique<PseudoProbeManager>(M);
1817  if (!ProbeManager->moduleIsProbed(M)) {
1818  const char *Msg =
1819  "Pseudo-probe-based profile requires SampleProfileProbePass";
1820  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1821  return false;
1822  }
1823  }
1824 
1825  return true;
1826 }
1827 
1829  return new SampleProfileLoaderLegacyPass();
1830 }
1831 
1833  return new SampleProfileLoaderLegacyPass(Name);
1834 }
1835 
1836 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1837  ProfileSummaryInfo *_PSI, CallGraph *CG) {
1838  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
1839 
1840  PSI = _PSI;
1841  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
1842  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
1844  PSI->refresh();
1845  }
1846  // Compute the total number of samples collected in this profile.
1847  for (const auto &I : Reader->getProfiles())
1848  TotalCollectedSamples += I.second.getTotalSamples();
1849 
1850  auto Remapper = Reader->getRemapper();
1851  // Populate the symbol map.
1852  for (const auto &N_F : M.getValueSymbolTable()) {
1853  StringRef OrigName = N_F.getKey();
1854  Function *F = dyn_cast<Function>(N_F.getValue());
1855  if (F == nullptr || OrigName.empty())
1856  continue;
1857  SymbolMap[OrigName] = F;
1859  if (OrigName != NewName && !NewName.empty()) {
1860  auto r = SymbolMap.insert(std::make_pair(NewName, F));
1861  // Failiing to insert means there is already an entry in SymbolMap,
1862  // thus there are multiple functions that are mapped to the same
1863  // stripped name. In this case of name conflicting, set the value
1864  // to nullptr to avoid confusion.
1865  if (!r.second)
1866  r.first->second = nullptr;
1867  OrigName = NewName;
1868  }
1869  // Insert the remapped names into SymbolMap.
1870  if (Remapper) {
1871  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
1872  if (*MapName != OrigName && !MapName->empty())
1873  SymbolMap.insert(std::make_pair(*MapName, F));
1874  }
1875  }
1876  }
1877  assert(SymbolMap.count(StringRef()) == 0 &&
1878  "No empty StringRef should be added in SymbolMap");
1879 
1880  bool retval = false;
1881  for (auto F : buildFunctionOrder(M, CG)) {
1882  assert(!F->isDeclaration());
1883  clearFunctionData();
1884  retval |= runOnFunction(*F, AM);
1885  }
1886 
1887  // Account for cold calls not inlined....
1888  if (!ProfileIsCS)
1889  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
1890  notInlinedCallInfo)
1891  updateProfileCallee(pair.first, pair.second.entryCount);
1892 
1893  return retval;
1894 }
1895 
1896 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1897  ACT = &getAnalysis<AssumptionCacheTracker>();
1898  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1899  TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
1900  ProfileSummaryInfo *PSI =
1901  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1902  return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
1903 }
1904 
1906  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
1907  DILocation2SampleMap.clear();
1908  // By default the entry count is initialized to -1, which will be treated
1909  // conservatively by getEntryCount as the same as unknown (None). This is
1910  // to avoid newly added code to be treated as cold. If we have samples
1911  // this will be overwritten in emitAnnotations.
1912  uint64_t initialEntryCount = -1;
1913 
1914  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
1915  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
1916  // initialize all the function entry counts to 0. It means all the
1917  // functions without profile will be regarded as cold.
1918  initialEntryCount = 0;
1919  // profile-sample-accurate is a user assertion which has a higher precedence
1920  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
1921  ProfAccForSymsInList = false;
1922  }
1923  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
1924 
1925  // PSL -- profile symbol list include all the symbols in sampled binary.
1926  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
1927  // old functions without samples being cold, without having to worry
1928  // about new and hot functions being mistakenly treated as cold.
1929  if (ProfAccForSymsInList) {
1930  // Initialize the entry count to 0 for functions in the list.
1931  if (PSL->contains(F.getName()))
1932  initialEntryCount = 0;
1933 
1934  // Function in the symbol list but without sample will be regarded as
1935  // cold. To minimize the potential negative performance impact it could
1936  // have, we want to be a little conservative here saying if a function
1937  // shows up in the profile, no matter as outline function, inline instance
1938  // or call targets, treat the function as not being cold. This will handle
1939  // the cases such as most callsites of a function are inlined in sampled
1940  // binary but not inlined in current build (because of source code drift,
1941  // imprecise debug information, or the callsites are all cold individually
1942  // but not cold accumulatively...), so the outline function showing up as
1943  // cold in sampled binary will actually not be cold after current build.
1945  if (NamesInProfile.count(CanonName))
1946  initialEntryCount = -1;
1947  }
1948 
1949  // Initialize entry count when the function has no existing entry
1950  // count value.
1951  if (!F.getEntryCount().hasValue())
1952  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
1953  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1954  if (AM) {
1955  auto &FAM =
1957  .getManager();
1959  } else {
1960  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
1961  ORE = OwnedORE.get();
1962  }
1963 
1964  if (ProfileIsCS)
1965  Samples = ContextTracker->getBaseSamplesFor(F);
1966  else
1967  Samples = Reader->getSamplesFor(F);
1968 
1969  if (Samples && !Samples->empty())
1970  return emitAnnotations(F);
1971  return false;
1972 }
1973 
1975  ModuleAnalysisManager &AM) {
1978 
1979  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
1980  return FAM.getResult<AssumptionAnalysis>(F);
1981  };
1982  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
1983  return FAM.getResult<TargetIRAnalysis>(F);
1984  };
1985  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
1987  };
1988 
1989  SampleProfileLoader SampleLoader(
1990  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1991  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1992  : ProfileRemappingFileName,
1993  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
1994 
1995  if (!SampleLoader.doInitialization(M, &FAM))
1996  return PreservedAnalyses::all();
1997 
2000  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2001  return PreservedAnalyses::all();
2002 
2003  return PreservedAnalyses::none();
2004 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
Instrumentation.h
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:102
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:685
llvm::InlineCost::getCost
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:108
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2320
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:1974
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:881
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:305
llvm::sampleprof::FunctionSamples::ProfileIsCS
static bool ProfileIsCS
Definition: SampleProf.h:883
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
DebugInfoMetadata.h
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:465
llvm::Function
Definition: Function.h:61
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:105
it
Reference model for inliner Oz decision policy Note this model is also referenced by test Transforms Inline ML tests if replacing it
Definition: README.txt:3
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
Statistic.h
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:77
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1526
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::createSampleProfileLoaderPass
ModulePass * createSampleProfileLoaderPass()
Definition: SampleProfile.cpp:1828
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1562
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:775
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t entryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding entryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1618
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason)
Definition: InlineCost.h:95
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:160
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::PseudoProbe::Factor
float Factor
Definition: PseudoProbe.h:86
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:747
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:777
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:525
llvm::initializeSampleProfileLoaderLegacyPassPass
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
llvm::InlineCost::isNever
bool isNever() const
Definition: InlineCost.h:103
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:488
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:180
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1330
llvm::InlineParams::ComputeFullInlineCost
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:187
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::PseudoProbe::Id
uint32_t Id
Definition: PseudoProbe.h:80
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:740
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:785
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1415
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::sampleprof::SampleProfileReader::profileIsCS
bool profileIsCS() const
Whether input profile is fully context-sensitive.
Definition: SampleProfReader.h:468
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2473
PostDominators.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:371
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:81
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
llvm::sampleprof::ProfiledCallGraph
Definition: ProfiledCallGraph.h:41
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:67
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:111
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:907
SampleProf.h
InlineAdvisor.h
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:104
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
InstrProf.h
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:65
DebugLoc.h
SmallPtrSet.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:287
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:752
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:87
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:2743
SampleProfileLoaderBaseUtil.h
StringMap.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:476
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:108
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:228
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:647
ValueSymbolTable.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:117
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::emitInlinedInto
void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message.
Definition: InlineAdvisor.cpp:435
SampleProfile.h
llvm::DenseSet< GlobalValue::GUID >
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1422
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:46
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:421
ProfiledCallGraph.h
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
ProfileSummaryInfo.h
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2376
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::sampleprof::FunctionSamples::getEntrySamples
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:661
ProfileICPThreshold
static cl::opt< int > ProfileICPThreshold("sample-profile-icp-threshold", cl::Hidden, cl::init(5), cl::desc("Relative hotness threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:396
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:42
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:533
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::DenseMap
Definition: DenseMap.h:714
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
PriorityQueue.h
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:185
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:361
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:215
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
llvm::elfabi::ELFSymbolType::Func
@ Func
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:892
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:106
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1629
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::sampleprof::ProfiledCallGraphNode
Definition: ProfiledCallGraph.h:28
SampleProfileProbe.h
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:484
llvm::BasicBlock::getFirstNonPHIOrDbgOrLifetime
const Instruction * getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp=false) const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:233
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:285
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:203
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:470
llvm::PseudoProbe::isDangling
bool isDangling() const
Definition: PseudoProbe.h:88
uint32_t
CallPromotionUtils.h
llvm::ContextTrieNode
Definition: SampleContextTracker.h:36
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:775
CallGraphSCCPass.h
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:659
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:954
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:37
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:457
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
profile
sample profile
Definition: SampleProfile.cpp:1622
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
std
Definition: BitVector.h:838
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:406
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
GenericDomTree.h
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason)
Definition: InlineCost.h:92
llvm::sampleprof::SampleProfileReader::getProfiles
StringMap< FunctionSamples > & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:432
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:511
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:56
DiagnosticInfo.h
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1423
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:193
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:343
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:706
llvm::cl::value_desc
Definition: CommandLine.h:424
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:490
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:102
llvm::extractProbe
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:48
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:885
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:304
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:108
CallGraph.h
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:25
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:793
Instructions.h
loader
sample Sample Profile loader
Definition: SampleProfile.cpp:1623
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:352
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
Dominators.h
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1423
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
TargetTransformInfo.h
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::X86AS::FS
@ FS
Definition: X86.h:188
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1757
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1164
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:926
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:999
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:292
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:738
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:907
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:772