LLVM  10.0.0svn
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringMap.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
37 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CFG.h"
44 #include "llvm/IR/CallSite.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/MDBuilder.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/PassManager.h"
60 #include "llvm/Pass.h"
64 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/ErrorOr.h"
71 #include "llvm/Transforms/IPO.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <functional>
79 #include <limits>
80 #include <map>
81 #include <memory>
82 #include <queue>
83 #include <string>
84 #include <system_error>
85 #include <utility>
86 #include <vector>
87 
88 using namespace llvm;
89 using namespace sampleprof;
91 #define DEBUG_TYPE "sample-profile"
92 
93 // Command line option to specify the file to read samples from. This is
94 // mainly used for debugging.
96  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
97  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
98 
99 // The named file contains a set of transformations that may have been applied
100 // to the symbol names between the program from which the sample data was
101 // collected and the current program's symbols.
103  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
104  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
105 
107  "sample-profile-max-propagate-iterations", cl::init(100),
108  cl::desc("Maximum number of iterations to go through when propagating "
109  "sample block/edge weights through the CFG."));
110 
112  "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
113  cl::desc("Emit a warning if less than N% of records in the input profile "
114  "are matched to the IR."));
115 
117  "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
118  cl::desc("Emit a warning if less than N% of samples in the input profile "
119  "are matched to the IR."));
120 
122  "no-warn-sample-unused", cl::init(false), cl::Hidden,
123  cl::desc("Use this option to turn off/on warnings about function with "
124  "samples but without debug information to use those samples. "));
125 
127  "profile-sample-accurate", cl::Hidden, cl::init(false),
128  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
129  "callsite and function as having 0 samples. Otherwise, treat "
130  "un-sampled callsites and functions conservatively as unknown. "));
131 
132 namespace {
133 
134 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
135 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
136 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
137 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
138 using BlockEdgeMap =
140 
141 class SampleCoverageTracker {
142 public:
143  SampleCoverageTracker() = default;
144 
145  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
146  uint32_t Discriminator, uint64_t Samples);
147  unsigned computeCoverage(unsigned Used, unsigned Total) const;
148  unsigned countUsedRecords(const FunctionSamples *FS,
149  ProfileSummaryInfo *PSI) const;
150  unsigned countBodyRecords(const FunctionSamples *FS,
151  ProfileSummaryInfo *PSI) const;
152  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
153  uint64_t countBodySamples(const FunctionSamples *FS,
154  ProfileSummaryInfo *PSI) const;
155 
156  void clear() {
157  SampleCoverage.clear();
158  TotalUsedSamples = 0;
159  }
160 
161 private:
162  using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
163  using FunctionSamplesCoverageMap =
165 
166  /// Coverage map for sampling records.
167  ///
168  /// This map keeps a record of sampling records that have been matched to
169  /// an IR instruction. This is used to detect some form of staleness in
170  /// profiles (see flag -sample-profile-check-coverage).
171  ///
172  /// Each entry in the map corresponds to a FunctionSamples instance. This is
173  /// another map that counts how many times the sample record at the
174  /// given location has been used.
175  FunctionSamplesCoverageMap SampleCoverage;
176 
177  /// Number of samples used from the profile.
178  ///
179  /// When a sampling record is used for the first time, the samples from
180  /// that record are added to this accumulator. Coverage is later computed
181  /// based on the total number of samples available in this function and
182  /// its callsites.
183  ///
184  /// Note that this accumulator tracks samples used from a single function
185  /// and all the inlined callsites. Strictly, we should have a map of counters
186  /// keyed by FunctionSamples pointers, but these stats are cleared after
187  /// every function, so we just need to keep a single counter.
188  uint64_t TotalUsedSamples = 0;
189 };
190 
191 class GUIDToFuncNameMapper {
192 public:
193  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
194  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
195  : CurrentReader(Reader), CurrentModule(M),
196  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
197  if (CurrentReader.getFormat() != SPF_Compact_Binary)
198  return;
199 
200  for (const auto &F : CurrentModule) {
201  StringRef OrigName = F.getName();
202  CurrentGUIDToFuncNameMap.insert(
203  {Function::getGUID(OrigName), OrigName});
204 
205  // Local to global var promotion used by optimization like thinlto
206  // will rename the var and add suffix like ".llvm.xxx" to the
207  // original local name. In sample profile, the suffixes of function
208  // names are all stripped. Since it is possible that the mapper is
209  // built in post-thin-link phase and var promotion has been done,
210  // we need to add the substring of function name without the suffix
211  // into the GUIDToFuncNameMap.
213  if (CanonName != OrigName)
214  CurrentGUIDToFuncNameMap.insert(
215  {Function::getGUID(CanonName), CanonName});
216  }
217 
218  // Update GUIDToFuncNameMap for each function including inlinees.
219  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
220  }
221 
222  ~GUIDToFuncNameMapper() {
223  if (CurrentReader.getFormat() != SPF_Compact_Binary)
224  return;
225 
226  CurrentGUIDToFuncNameMap.clear();
227 
228  // Reset GUIDToFuncNameMap for of each function as they're no
229  // longer valid at this point.
230  SetGUIDToFuncNameMapForAll(nullptr);
231  }
232 
233 private:
234  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
235  std::queue<FunctionSamples *> FSToUpdate;
236  for (auto &IFS : CurrentReader.getProfiles()) {
237  FSToUpdate.push(&IFS.second);
238  }
239 
240  while (!FSToUpdate.empty()) {
241  FunctionSamples *FS = FSToUpdate.front();
242  FSToUpdate.pop();
243  FS->GUIDToFuncNameMap = Map;
244  for (const auto &ICS : FS->getCallsiteSamples()) {
245  const FunctionSamplesMap &FSMap = ICS.second;
246  for (auto &IFS : FSMap) {
247  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
248  FSToUpdate.push(&FS);
249  }
250  }
251  }
252  }
253 
254  SampleProfileReader &CurrentReader;
255  Module &CurrentModule;
256  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
257 };
258 
259 /// Sample profile pass.
260 ///
261 /// This pass reads profile data from the file specified by
262 /// -sample-profile-file and annotates every affected function with the
263 /// profile information found in that file.
264 class SampleProfileLoader {
265 public:
266  SampleProfileLoader(
267  StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
268  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
269  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
270  : GetAC(std::move(GetAssumptionCache)),
271  GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
272  RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
273 
274  bool doInitialization(Module &M);
275  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
276  ProfileSummaryInfo *_PSI);
277 
278  void dump() { Reader->dump(); }
279 
280 protected:
282  unsigned getFunctionLoc(Function &F);
283  bool emitAnnotations(Function &F);
284  ErrorOr<uint64_t> getInstWeight(const Instruction &I);
285  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
286  const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
287  std::vector<const FunctionSamples *>
288  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
289  mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
290  const FunctionSamples *findFunctionSamples(const Instruction &I) const;
291  bool inlineCallInstruction(Instruction *I);
292  bool inlineHotFunctions(Function &F,
293  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
294  void printEdgeWeight(raw_ostream &OS, Edge E);
295  void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
296  void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
297  bool computeBlockWeights(Function &F);
298  void findEquivalenceClasses(Function &F);
299  template <bool IsPostDom>
300  void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
302 
303  void propagateWeights(Function &F);
304  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
305  void buildEdges(Function &F);
306  bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
307  void computeDominanceAndLoopInfo(Function &F);
308  void clearFunctionData();
309 
310  /// Map basic blocks to their computed weights.
311  ///
312  /// The weight of a basic block is defined to be the maximum
313  /// of all the instruction weights in that block.
314  BlockWeightMap BlockWeights;
315 
316  /// Map edges to their computed weights.
317  ///
318  /// Edge weights are computed by propagating basic block weights in
319  /// SampleProfile::propagateWeights.
320  EdgeWeightMap EdgeWeights;
321 
322  /// Set of visited blocks during propagation.
324 
325  /// Set of visited edges during propagation.
326  SmallSet<Edge, 32> VisitedEdges;
327 
328  /// Equivalence classes for block weights.
329  ///
330  /// Two blocks BB1 and BB2 are in the same equivalence class if they
331  /// dominate and post-dominate each other, and they are in the same loop
332  /// nest. When this happens, the two blocks are guaranteed to execute
333  /// the same number of times.
334  EquivalenceClassMap EquivalenceClass;
335 
336  /// Map from function name to Function *. Used to find the function from
337  /// the function name. If the function name contains suffix, additional
338  /// entry is added to map from the stripped name to the function if there
339  /// is one-to-one mapping.
341 
342  /// Dominance, post-dominance and loop information.
343  std::unique_ptr<DominatorTree> DT;
344  std::unique_ptr<PostDominatorTree> PDT;
345  std::unique_ptr<LoopInfo> LI;
346 
347  std::function<AssumptionCache &(Function &)> GetAC;
348  std::function<TargetTransformInfo &(Function &)> GetTTI;
349 
350  /// Predecessors for each basic block in the CFG.
351  BlockEdgeMap Predecessors;
352 
353  /// Successors for each basic block in the CFG.
354  BlockEdgeMap Successors;
355 
356  SampleCoverageTracker CoverageTracker;
357 
358  /// Profile reader object.
359  std::unique_ptr<SampleProfileReader> Reader;
360 
361  /// Samples collected for the body of this function.
362  FunctionSamples *Samples = nullptr;
363 
364  /// Name of the profile file to load.
365  std::string Filename;
366 
367  /// Name of the profile remapping file to load.
368  std::string RemappingFilename;
369 
370  /// Flag indicating whether the profile input loaded successfully.
371  bool ProfileIsValid = false;
372 
373  /// Flag indicating if the pass is invoked in ThinLTO compile phase.
374  ///
375  /// In this phase, in annotation, we should not promote indirect calls.
376  /// Instead, we will mark GUIDs that needs to be annotated to the function.
377  bool IsThinLTOPreLink;
378 
379  /// Profile Summary Info computed from sample profile.
380  ProfileSummaryInfo *PSI = nullptr;
381 
382  /// Total number of samples collected in this profile.
383  ///
384  /// This is the sum of all the samples collected in all the functions executed
385  /// at runtime.
386  uint64_t TotalCollectedSamples = 0;
387 
388  /// Optimization Remark Emitter used to emit diagnostic remarks.
389  OptimizationRemarkEmitter *ORE = nullptr;
390 
391  // Information recorded when we declined to inline a call site
392  // because we have determined it is too cold is accumulated for
393  // each callee function. Initially this is just the entry count.
394  struct NotInlinedProfileInfo {
395  uint64_t entryCount;
396  };
398 
399  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
400  // all the function symbols defined or declared in current module.
401  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
402 };
403 
404 class SampleProfileLoaderLegacyPass : public ModulePass {
405 public:
406  // Class identification, replacement for typeinfo
407  static char ID;
408 
409  SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
410  bool IsThinLTOPreLink = false)
411  : ModulePass(ID),
412  SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
413  [&](Function &F) -> AssumptionCache & {
414  return ACT->getAssumptionCache(F);
415  },
416  [&](Function &F) -> TargetTransformInfo & {
417  return TTIWP->getTTI(F);
418  }) {
421  }
422 
423  void dump() { SampleLoader.dump(); }
424 
425  bool doInitialization(Module &M) override {
426  return SampleLoader.doInitialization(M);
427  }
428 
429  StringRef getPassName() const override { return "Sample profile pass"; }
430  bool runOnModule(Module &M) override;
431 
432  void getAnalysisUsage(AnalysisUsage &AU) const override {
436  }
437 
438 private:
439  SampleProfileLoader SampleLoader;
440  AssumptionCacheTracker *ACT = nullptr;
441  TargetTransformInfoWrapperPass *TTIWP = nullptr;
442 };
443 
444 } // end anonymous namespace
445 
446 /// Return true if the given callsite is hot wrt to hot cutoff threshold.
447 ///
448 /// Functions that were inlined in the original binary will be represented
449 /// in the inline stack in the sample profile. If the profile shows that
450 /// the original inline decision was "good" (i.e., the callsite is executed
451 /// frequently), then we will recreate the inline decision and apply the
452 /// profile from the inlined callsite.
453 ///
454 /// To decide whether an inlined callsite is hot, we compare the callsite
455 /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
456 /// regarded as hot if the count is above the cutoff value.
457 static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
458  ProfileSummaryInfo *PSI) {
459  if (!CallsiteFS)
460  return false; // The callsite was not inlined in the original binary.
461 
462  assert(PSI && "PSI is expected to be non null");
463  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
464  return PSI->isHotCount(CallsiteTotalSamples);
465 }
466 
467 /// Mark as used the sample record for the given function samples at
468 /// (LineOffset, Discriminator).
469 ///
470 /// \returns true if this is the first time we mark the given record.
471 bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
472  uint32_t LineOffset,
473  uint32_t Discriminator,
474  uint64_t Samples) {
475  LineLocation Loc(LineOffset, Discriminator);
476  unsigned &Count = SampleCoverage[FS][Loc];
477  bool FirstTime = (++Count == 1);
478  if (FirstTime)
479  TotalUsedSamples += Samples;
480  return FirstTime;
481 }
482 
483 /// Return the number of sample records that were applied from this profile.
484 ///
485 /// This count does not include records from cold inlined callsites.
486 unsigned
487 SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
488  ProfileSummaryInfo *PSI) const {
489  auto I = SampleCoverage.find(FS);
490 
491  // The size of the coverage map for FS represents the number of records
492  // that were marked used at least once.
493  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
494 
495  // If there are inlined callsites in this function, count the samples found
496  // in the respective bodies. However, do not bother counting callees with 0
497  // total samples, these are callees that were never invoked at runtime.
498  for (const auto &I : FS->getCallsiteSamples())
499  for (const auto &J : I.second) {
500  const FunctionSamples *CalleeSamples = &J.second;
501  if (callsiteIsHot(CalleeSamples, PSI))
502  Count += countUsedRecords(CalleeSamples, PSI);
503  }
504 
505  return Count;
506 }
507 
508 /// Return the number of sample records in the body of this profile.
509 ///
510 /// This count does not include records from cold inlined callsites.
511 unsigned
512 SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
513  ProfileSummaryInfo *PSI) const {
514  unsigned Count = FS->getBodySamples().size();
515 
516  // Only count records in hot callsites.
517  for (const auto &I : FS->getCallsiteSamples())
518  for (const auto &J : I.second) {
519  const FunctionSamples *CalleeSamples = &J.second;
520  if (callsiteIsHot(CalleeSamples, PSI))
521  Count += countBodyRecords(CalleeSamples, PSI);
522  }
523 
524  return Count;
525 }
526 
527 /// Return the number of samples collected in the body of this profile.
528 ///
529 /// This count does not include samples from cold inlined callsites.
530 uint64_t
531 SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
532  ProfileSummaryInfo *PSI) const {
533  uint64_t Total = 0;
534  for (const auto &I : FS->getBodySamples())
535  Total += I.second.getSamples();
536 
537  // Only count samples in hot callsites.
538  for (const auto &I : FS->getCallsiteSamples())
539  for (const auto &J : I.second) {
540  const FunctionSamples *CalleeSamples = &J.second;
541  if (callsiteIsHot(CalleeSamples, PSI))
542  Total += countBodySamples(CalleeSamples, PSI);
543  }
544 
545  return Total;
546 }
547 
548 /// Return the fraction of sample records used in this profile.
549 ///
550 /// The returned value is an unsigned integer in the range 0-100 indicating
551 /// the percentage of sample records that were used while applying this
552 /// profile to the associated function.
553 unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
554  unsigned Total) const {
555  assert(Used <= Total &&
556  "number of used records cannot exceed the total number of records");
557  return Total > 0 ? Used * 100 / Total : 100;
558 }
559 
560 /// Clear all the per-function data used to load samples and propagate weights.
561 void SampleProfileLoader::clearFunctionData() {
562  BlockWeights.clear();
563  EdgeWeights.clear();
564  VisitedBlocks.clear();
565  VisitedEdges.clear();
566  EquivalenceClass.clear();
567  DT = nullptr;
568  PDT = nullptr;
569  LI = nullptr;
570  Predecessors.clear();
571  Successors.clear();
572  CoverageTracker.clear();
573 }
574 
575 #ifndef NDEBUG
576 /// Print the weight of edge \p E on stream \p OS.
577 ///
578 /// \param OS Stream to emit the output to.
579 /// \param E Edge to print.
580 void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
581  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
582  << "]: " << EdgeWeights[E] << "\n";
583 }
584 
585 /// Print the equivalence class of block \p BB on stream \p OS.
586 ///
587 /// \param OS Stream to emit the output to.
588 /// \param BB Block to print.
589 void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
590  const BasicBlock *BB) {
591  const BasicBlock *Equiv = EquivalenceClass[BB];
592  OS << "equivalence[" << BB->getName()
593  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
594 }
595 
596 /// Print the weight of block \p BB on stream \p OS.
597 ///
598 /// \param OS Stream to emit the output to.
599 /// \param BB Block to print.
600 void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
601  const BasicBlock *BB) const {
602  const auto &I = BlockWeights.find(BB);
603  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
604  OS << "weight[" << BB->getName() << "]: " << W << "\n";
605 }
606 #endif
607 
608 /// Get the weight for an instruction.
609 ///
610 /// The "weight" of an instruction \p Inst is the number of samples
611 /// collected on that instruction at runtime. To retrieve it, we
612 /// need to compute the line number of \p Inst relative to the start of its
613 /// function. We use HeaderLineno to compute the offset. We then
614 /// look up the samples collected for \p Inst using BodySamples.
615 ///
616 /// \param Inst Instruction to query.
617 ///
618 /// \returns the weight of \p Inst.
619 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
620  const DebugLoc &DLoc = Inst.getDebugLoc();
621  if (!DLoc)
622  return std::error_code();
623 
624  const FunctionSamples *FS = findFunctionSamples(Inst);
625  if (!FS)
626  return std::error_code();
627 
628  // Ignore all intrinsics, phinodes and branch instructions.
629  // Branch and phinodes instruction usually contains debug info from sources outside of
630  // the residing basic block, thus we ignore them during annotation.
631  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
632  return std::error_code();
633 
634  // If a direct call/invoke instruction is inlined in profile
635  // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
636  // it means that the inlined callsite has no sample, thus the call
637  // instruction should have 0 count.
638  if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
639  !ImmutableCallSite(&Inst).isIndirectCall() &&
640  findCalleeFunctionSamples(Inst))
641  return 0;
642 
643  const DILocation *DIL = DLoc;
644  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
645  uint32_t Discriminator = DIL->getBaseDiscriminator();
646  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
647  if (R) {
648  bool FirstMark =
649  CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
650  if (FirstMark) {
651  ORE->emit([&]() {
652  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
653  Remark << "Applied " << ore::NV("NumSamples", *R);
654  Remark << " samples from profile (offset: ";
655  Remark << ore::NV("LineOffset", LineOffset);
656  if (Discriminator) {
657  Remark << ".";
658  Remark << ore::NV("Discriminator", Discriminator);
659  }
660  Remark << ")";
661  return Remark;
662  });
663  }
664  LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "."
665  << DIL->getBaseDiscriminator() << ":" << Inst
666  << " (line offset: " << LineOffset << "."
667  << DIL->getBaseDiscriminator() << " - weight: " << R.get()
668  << ")\n");
669  }
670  return R;
671 }
672 
673 /// Compute the weight of a basic block.
674 ///
675 /// The weight of basic block \p BB is the maximum weight of all the
676 /// instructions in BB.
677 ///
678 /// \param BB The basic block to query.
679 ///
680 /// \returns the weight for \p BB.
681 ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
682  uint64_t Max = 0;
683  bool HasWeight = false;
684  for (auto &I : BB->getInstList()) {
685  const ErrorOr<uint64_t> &R = getInstWeight(I);
686  if (R) {
687  Max = std::max(Max, R.get());
688  HasWeight = true;
689  }
690  }
691  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
692 }
693 
694 /// Compute and store the weights of every basic block.
695 ///
696 /// This populates the BlockWeights map by computing
697 /// the weights of every basic block in the CFG.
698 ///
699 /// \param F The function to query.
700 bool SampleProfileLoader::computeBlockWeights(Function &F) {
701  bool Changed = false;
702  LLVM_DEBUG(dbgs() << "Block weights\n");
703  for (const auto &BB : F) {
704  ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
705  if (Weight) {
706  BlockWeights[&BB] = Weight.get();
707  VisitedBlocks.insert(&BB);
708  Changed = true;
709  }
710  LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
711  }
712 
713  return Changed;
714 }
715 
716 /// Get the FunctionSamples for a call instruction.
717 ///
718 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
719 /// instance in which that call instruction is calling to. It contains
720 /// all samples that resides in the inlined instance. We first find the
721 /// inlined instance in which the call instruction is from, then we
722 /// traverse its children to find the callsite with the matching
723 /// location.
724 ///
725 /// \param Inst Call/Invoke instruction to query.
726 ///
727 /// \returns The FunctionSamples pointer to the inlined instance.
728 const FunctionSamples *
729 SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
730  const DILocation *DIL = Inst.getDebugLoc();
731  if (!DIL) {
732  return nullptr;
733  }
734 
735  StringRef CalleeName;
736  if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
737  if (Function *Callee = CI->getCalledFunction())
738  CalleeName = Callee->getName();
739 
740  const FunctionSamples *FS = findFunctionSamples(Inst);
741  if (FS == nullptr)
742  return nullptr;
743 
745  DIL->getBaseDiscriminator()),
746  CalleeName);
747 }
748 
749 /// Returns a vector of FunctionSamples that are the indirect call targets
750 /// of \p Inst. The vector is sorted by the total number of samples. Stores
751 /// the total call count of the indirect call in \p Sum.
752 std::vector<const FunctionSamples *>
753 SampleProfileLoader::findIndirectCallFunctionSamples(
754  const Instruction &Inst, uint64_t &Sum) const {
755  const DILocation *DIL = Inst.getDebugLoc();
756  std::vector<const FunctionSamples *> R;
757 
758  if (!DIL) {
759  return R;
760  }
761 
762  const FunctionSamples *FS = findFunctionSamples(Inst);
763  if (FS == nullptr)
764  return R;
765 
766  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
767  uint32_t Discriminator = DIL->getBaseDiscriminator();
768 
769  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
770  Sum = 0;
771  if (T)
772  for (const auto &T_C : T.get())
773  Sum += T_C.second;
776  if (M->empty())
777  return R;
778  for (const auto &NameFS : *M) {
779  Sum += NameFS.second.getEntrySamples();
780  R.push_back(&NameFS.second);
781  }
782  llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
783  if (L->getEntrySamples() != R->getEntrySamples())
784  return L->getEntrySamples() > R->getEntrySamples();
785  return FunctionSamples::getGUID(L->getName()) <
787  });
788  }
789  return R;
790 }
791 
792 /// Get the FunctionSamples for an instruction.
793 ///
794 /// The FunctionSamples of an instruction \p Inst is the inlined instance
795 /// in which that instruction is coming from. We traverse the inline stack
796 /// of that instruction, and match it with the tree nodes in the profile.
797 ///
798 /// \param Inst Instruction to query.
799 ///
800 /// \returns the FunctionSamples pointer to the inlined instance.
801 const FunctionSamples *
802 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
803  const DILocation *DIL = Inst.getDebugLoc();
804  if (!DIL)
805  return Samples;
806 
807  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
808  if (it.second)
809  it.first->second = Samples->findFunctionSamples(DIL);
810  return it.first->second;
811 }
812 
813 bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
814  assert(isa<CallInst>(I) || isa<InvokeInst>(I));
815  CallSite CS(I);
816  Function *CalledFunction = CS.getCalledFunction();
817  assert(CalledFunction);
818  DebugLoc DLoc = I->getDebugLoc();
819  BasicBlock *BB = I->getParent();
820  InlineParams Params = getInlineParams();
821  Params.ComputeFullInlineCost = true;
822  // Checks if there is anything in the reachable portion of the callee at
823  // this callsite that makes this inlining potentially illegal. Need to
824  // set ComputeFullInlineCost, otherwise getInlineCost may return early
825  // when cost exceeds threshold without checking all IRs in the callee.
826  // The acutal cost does not matter because we only checks isNever() to
827  // see if it is legal to inline the callsite.
828  InlineCost Cost =
829  getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
830  None, nullptr, nullptr);
831  if (Cost.isNever()) {
832  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
833  << "incompatible inlining");
834  return false;
835  }
836  InlineFunctionInfo IFI(nullptr, &GetAC);
837  if (InlineFunction(CS, IFI)) {
838  // The call to InlineFunction erases I, so we can't pass it here.
839  ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
840  << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
841  << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
842  return true;
843  }
844  return false;
845 }
846 
847 /// Iteratively inline hot callsites of a function.
848 ///
849 /// Iteratively traverse all callsites of the function \p F, and find if
850 /// the corresponding inlined instance exists and is hot in profile. If
851 /// it is hot enough, inline the callsites and adds new callsites of the
852 /// callee into the caller. If the call is an indirect call, first promote
853 /// it to direct call. Each indirect call is limited with a single target.
854 ///
855 /// \param F function to perform iterative inlining.
856 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
857 /// inlined in the profiled binary.
858 ///
859 /// \returns True if there is any inline happened.
860 bool SampleProfileLoader::inlineHotFunctions(
861  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
862  DenseSet<Instruction *> PromotedInsns;
863 
864  DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
865  bool Changed = false;
866  while (true) {
867  bool LocalChanged = false;
869  for (auto &BB : F) {
870  bool Hot = false;
872  for (auto &I : BB.getInstList()) {
873  const FunctionSamples *FS = nullptr;
874  if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
875  !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
876  Candidates.push_back(&I);
877  if (FS->getEntrySamples() > 0)
878  localNotInlinedCallSites.try_emplace(&I, FS);
879  if (callsiteIsHot(FS, PSI))
880  Hot = true;
881  }
882  }
883  if (Hot) {
884  CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
885  }
886  }
887  for (auto I : CIS) {
888  Function *CalledFunction = CallSite(I).getCalledFunction();
889  // Do not inline recursive calls.
890  if (CalledFunction == &F)
891  continue;
892  if (CallSite(I).isIndirectCall()) {
893  if (PromotedInsns.count(I))
894  continue;
895  uint64_t Sum;
896  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
897  if (IsThinLTOPreLink) {
898  FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
900  continue;
901  }
902  auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
903  // If it is a recursive call, we do not inline it as it could bloat
904  // the code exponentially. There is way to better handle this, e.g.
905  // clone the caller first, and inline the cloned caller if it is
906  // recursive. As llvm does not inline recursive calls, we will
907  // simply ignore it instead of handling it explicitly.
908  if (CalleeFunctionName == F.getName())
909  continue;
910 
911  if (!callsiteIsHot(FS, PSI))
912  continue;
913 
914  const char *Reason = "Callee function not available";
915  auto R = SymbolMap.find(CalleeFunctionName);
916  if (R != SymbolMap.end() && R->getValue() &&
917  !R->getValue()->isDeclaration() &&
918  R->getValue()->getSubprogram() &&
919  isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
920  uint64_t C = FS->getEntrySamples();
921  Instruction *DI =
922  pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
923  Sum -= C;
924  PromotedInsns.insert(I);
925  // If profile mismatches, we should not attempt to inline DI.
926  if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
927  inlineCallInstruction(DI)) {
928  localNotInlinedCallSites.erase(I);
929  LocalChanged = true;
930  }
931  } else {
932  LLVM_DEBUG(dbgs()
933  << "\nFailed to promote indirect call to "
934  << CalleeFunctionName << " because " << Reason << "\n");
935  }
936  }
937  } else if (CalledFunction && CalledFunction->getSubprogram() &&
938  !CalledFunction->isDeclaration()) {
939  if (inlineCallInstruction(I)) {
940  localNotInlinedCallSites.erase(I);
941  LocalChanged = true;
942  }
943  } else if (IsThinLTOPreLink) {
944  findCalleeFunctionSamples(*I)->findInlinedFunctions(
945  InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
946  }
947  }
948  if (LocalChanged) {
949  Changed = true;
950  } else {
951  break;
952  }
953  }
954 
955  // Accumulate not inlined callsite information into notInlinedSamples
956  for (const auto &Pair : localNotInlinedCallSites) {
957  Instruction *I = Pair.getFirst();
959  if (!Callee || Callee->isDeclaration())
960  continue;
961  const FunctionSamples *FS = Pair.getSecond();
962  auto pair =
963  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
964  pair.first->second.entryCount += FS->getEntrySamples();
965  }
966  return Changed;
967 }
968 
969 /// Find equivalence classes for the given block.
970 ///
971 /// This finds all the blocks that are guaranteed to execute the same
972 /// number of times as \p BB1. To do this, it traverses all the
973 /// descendants of \p BB1 in the dominator or post-dominator tree.
974 ///
975 /// A block BB2 will be in the same equivalence class as \p BB1 if
976 /// the following holds:
977 ///
978 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
979 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
980 /// dominate BB1 in the post-dominator tree.
981 ///
982 /// 2- Both BB2 and \p BB1 must be in the same loop.
983 ///
984 /// For every block BB2 that meets those two requirements, we set BB2's
985 /// equivalence class to \p BB1.
986 ///
987 /// \param BB1 Block to check.
988 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
989 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
990 /// with blocks from \p BB1's dominator tree, then
991 /// this is the post-dominator tree, and vice versa.
992 template <bool IsPostDom>
993 void SampleProfileLoader::findEquivalencesFor(
994  BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
996  const BasicBlock *EC = EquivalenceClass[BB1];
997  uint64_t Weight = BlockWeights[EC];
998  for (const auto *BB2 : Descendants) {
999  bool IsDomParent = DomTree->dominates(BB2, BB1);
1000  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
1001  if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
1002  EquivalenceClass[BB2] = EC;
1003  // If BB2 is visited, then the entire EC should be marked as visited.
1004  if (VisitedBlocks.count(BB2)) {
1005  VisitedBlocks.insert(EC);
1006  }
1007 
1008  // If BB2 is heavier than BB1, make BB2 have the same weight
1009  // as BB1.
1010  //
1011  // Note that we don't worry about the opposite situation here
1012  // (when BB2 is lighter than BB1). We will deal with this
1013  // during the propagation phase. Right now, we just want to
1014  // make sure that BB1 has the largest weight of all the
1015  // members of its equivalence set.
1016  Weight = std::max(Weight, BlockWeights[BB2]);
1017  }
1018  }
1019  if (EC == &EC->getParent()->getEntryBlock()) {
1020  BlockWeights[EC] = Samples->getHeadSamples() + 1;
1021  } else {
1022  BlockWeights[EC] = Weight;
1023  }
1024 }
1025 
1026 /// Find equivalence classes.
1027 ///
1028 /// Since samples may be missing from blocks, we can fill in the gaps by setting
1029 /// the weights of all the blocks in the same equivalence class to the same
1030 /// weight. To compute the concept of equivalence, we use dominance and loop
1031 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
1032 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
1033 ///
1034 /// \param F The function to query.
1035 void SampleProfileLoader::findEquivalenceClasses(Function &F) {
1036  SmallVector<BasicBlock *, 8> DominatedBBs;
1037  LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
1038  // Find equivalence sets based on dominance and post-dominance information.
1039  for (auto &BB : F) {
1040  BasicBlock *BB1 = &BB;
1041 
1042  // Compute BB1's equivalence class once.
1043  if (EquivalenceClass.count(BB1)) {
1044  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
1045  continue;
1046  }
1047 
1048  // By default, blocks are in their own equivalence class.
1049  EquivalenceClass[BB1] = BB1;
1050 
1051  // Traverse all the blocks dominated by BB1. We are looking for
1052  // every basic block BB2 such that:
1053  //
1054  // 1- BB1 dominates BB2.
1055  // 2- BB2 post-dominates BB1.
1056  // 3- BB1 and BB2 are in the same loop nest.
1057  //
1058  // If all those conditions hold, it means that BB2 is executed
1059  // as many times as BB1, so they are placed in the same equivalence
1060  // class by making BB2's equivalence class be BB1.
1061  DominatedBBs.clear();
1062  DT->getDescendants(BB1, DominatedBBs);
1063  findEquivalencesFor(BB1, DominatedBBs, PDT.get());
1064 
1065  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
1066  }
1067 
1068  // Assign weights to equivalence classes.
1069  //
1070  // All the basic blocks in the same equivalence class will execute
1071  // the same number of times. Since we know that the head block in
1072  // each equivalence class has the largest weight, assign that weight
1073  // to all the blocks in that equivalence class.
1074  LLVM_DEBUG(
1075  dbgs() << "\nAssign the same weight to all blocks in the same class\n");
1076  for (auto &BI : F) {
1077  const BasicBlock *BB = &BI;
1078  const BasicBlock *EquivBB = EquivalenceClass[BB];
1079  if (BB != EquivBB)
1080  BlockWeights[BB] = BlockWeights[EquivBB];
1081  LLVM_DEBUG(printBlockWeight(dbgs(), BB));
1082  }
1083 }
1084 
1085 /// Visit the given edge to decide if it has a valid weight.
1086 ///
1087 /// If \p E has not been visited before, we copy to \p UnknownEdge
1088 /// and increment the count of unknown edges.
1089 ///
1090 /// \param E Edge to visit.
1091 /// \param NumUnknownEdges Current number of unknown edges.
1092 /// \param UnknownEdge Set if E has not been visited before.
1093 ///
1094 /// \returns E's weight, if known. Otherwise, return 0.
1095 uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
1096  Edge *UnknownEdge) {
1097  if (!VisitedEdges.count(E)) {
1098  (*NumUnknownEdges)++;
1099  *UnknownEdge = E;
1100  return 0;
1101  }
1102 
1103  return EdgeWeights[E];
1104 }
1105 
1106 /// Propagate weights through incoming/outgoing edges.
1107 ///
1108 /// If the weight of a basic block is known, and there is only one edge
1109 /// with an unknown weight, we can calculate the weight of that edge.
1110 ///
1111 /// Similarly, if all the edges have a known count, we can calculate the
1112 /// count of the basic block, if needed.
1113 ///
1114 /// \param F Function to process.
1115 /// \param UpdateBlockCount Whether we should update basic block counts that
1116 /// has already been annotated.
1117 ///
1118 /// \returns True if new weights were assigned to edges or blocks.
1119 bool SampleProfileLoader::propagateThroughEdges(Function &F,
1120  bool UpdateBlockCount) {
1121  bool Changed = false;
1122  LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
1123  for (const auto &BI : F) {
1124  const BasicBlock *BB = &BI;
1125  const BasicBlock *EC = EquivalenceClass[BB];
1126 
1127  // Visit all the predecessor and successor edges to determine
1128  // which ones have a weight assigned already. Note that it doesn't
1129  // matter that we only keep track of a single unknown edge. The
1130  // only case we are interested in handling is when only a single
1131  // edge is unknown (see setEdgeOrBlockWeight).
1132  for (unsigned i = 0; i < 2; i++) {
1133  uint64_t TotalWeight = 0;
1134  unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
1135  Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
1136 
1137  if (i == 0) {
1138  // First, visit all predecessor edges.
1139  NumTotalEdges = Predecessors[BB].size();
1140  for (auto *Pred : Predecessors[BB]) {
1141  Edge E = std::make_pair(Pred, BB);
1142  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1143  if (E.first == E.second)
1144  SelfReferentialEdge = E;
1145  }
1146  if (NumTotalEdges == 1) {
1147  SingleEdge = std::make_pair(Predecessors[BB][0], BB);
1148  }
1149  } else {
1150  // On the second round, visit all successor edges.
1151  NumTotalEdges = Successors[BB].size();
1152  for (auto *Succ : Successors[BB]) {
1153  Edge E = std::make_pair(BB, Succ);
1154  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1155  }
1156  if (NumTotalEdges == 1) {
1157  SingleEdge = std::make_pair(BB, Successors[BB][0]);
1158  }
1159  }
1160 
1161  // After visiting all the edges, there are three cases that we
1162  // can handle immediately:
1163  //
1164  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
1165  // In this case, we simply check that the sum of all the edges
1166  // is the same as BB's weight. If not, we change BB's weight
1167  // to match. Additionally, if BB had not been visited before,
1168  // we mark it visited.
1169  //
1170  // - Only one edge is unknown and BB has already been visited.
1171  // In this case, we can compute the weight of the edge by
1172  // subtracting the total block weight from all the known
1173  // edge weights. If the edges weight more than BB, then the
1174  // edge of the last remaining edge is set to zero.
1175  //
1176  // - There exists a self-referential edge and the weight of BB is
1177  // known. In this case, this edge can be based on BB's weight.
1178  // We add up all the other known edges and set the weight on
1179  // the self-referential edge as we did in the previous case.
1180  //
1181  // In any other case, we must continue iterating. Eventually,
1182  // all edges will get a weight, or iteration will stop when
1183  // it reaches SampleProfileMaxPropagateIterations.
1184  if (NumUnknownEdges <= 1) {
1185  uint64_t &BBWeight = BlockWeights[EC];
1186  if (NumUnknownEdges == 0) {
1187  if (!VisitedBlocks.count(EC)) {
1188  // If we already know the weight of all edges, the weight of the
1189  // basic block can be computed. It should be no larger than the sum
1190  // of all edge weights.
1191  if (TotalWeight > BBWeight) {
1192  BBWeight = TotalWeight;
1193  Changed = true;
1194  LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
1195  << " known. Set weight for block: ";
1196  printBlockWeight(dbgs(), BB););
1197  }
1198  } else if (NumTotalEdges == 1 &&
1199  EdgeWeights[SingleEdge] < BlockWeights[EC]) {
1200  // If there is only one edge for the visited basic block, use the
1201  // block weight to adjust edge weight if edge weight is smaller.
1202  EdgeWeights[SingleEdge] = BlockWeights[EC];
1203  Changed = true;
1204  }
1205  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
1206  // If there is a single unknown edge and the block has been
1207  // visited, then we can compute E's weight.
1208  if (BBWeight >= TotalWeight)
1209  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
1210  else
1211  EdgeWeights[UnknownEdge] = 0;
1212  const BasicBlock *OtherEC;
1213  if (i == 0)
1214  OtherEC = EquivalenceClass[UnknownEdge.first];
1215  else
1216  OtherEC = EquivalenceClass[UnknownEdge.second];
1217  // Edge weights should never exceed the BB weights it connects.
1218  if (VisitedBlocks.count(OtherEC) &&
1219  EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
1220  EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
1221  VisitedEdges.insert(UnknownEdge);
1222  Changed = true;
1223  LLVM_DEBUG(dbgs() << "Set weight for edge: ";
1224  printEdgeWeight(dbgs(), UnknownEdge));
1225  }
1226  } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
1227  // If a block Weights 0, all its in/out edges should weight 0.
1228  if (i == 0) {
1229  for (auto *Pred : Predecessors[BB]) {
1230  Edge E = std::make_pair(Pred, BB);
1231  EdgeWeights[E] = 0;
1232  VisitedEdges.insert(E);
1233  }
1234  } else {
1235  for (auto *Succ : Successors[BB]) {
1236  Edge E = std::make_pair(BB, Succ);
1237  EdgeWeights[E] = 0;
1238  VisitedEdges.insert(E);
1239  }
1240  }
1241  } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
1242  uint64_t &BBWeight = BlockWeights[BB];
1243  // We have a self-referential edge and the weight of BB is known.
1244  if (BBWeight >= TotalWeight)
1245  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
1246  else
1247  EdgeWeights[SelfReferentialEdge] = 0;
1248  VisitedEdges.insert(SelfReferentialEdge);
1249  Changed = true;
1250  LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
1251  printEdgeWeight(dbgs(), SelfReferentialEdge));
1252  }
1253  if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
1254  BlockWeights[EC] = TotalWeight;
1255  VisitedBlocks.insert(EC);
1256  Changed = true;
1257  }
1258  }
1259  }
1260 
1261  return Changed;
1262 }
1263 
1264 /// Build in/out edge lists for each basic block in the CFG.
1265 ///
1266 /// We are interested in unique edges. If a block B1 has multiple
1267 /// edges to another block B2, we only add a single B1->B2 edge.
1268 void SampleProfileLoader::buildEdges(Function &F) {
1269  for (auto &BI : F) {
1270  BasicBlock *B1 = &BI;
1271 
1272  // Add predecessors for B1.
1274  if (!Predecessors[B1].empty())
1275  llvm_unreachable("Found a stale predecessors list in a basic block.");
1276  for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
1277  BasicBlock *B2 = *PI;
1278  if (Visited.insert(B2).second)
1279  Predecessors[B1].push_back(B2);
1280  }
1281 
1282  // Add successors for B1.
1283  Visited.clear();
1284  if (!Successors[B1].empty())
1285  llvm_unreachable("Found a stale successors list in a basic block.");
1286  for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
1287  BasicBlock *B2 = *SI;
1288  if (Visited.insert(B2).second)
1289  Successors[B1].push_back(B2);
1290  }
1291  }
1292 }
1293 
1294 /// Returns the sorted CallTargetMap \p M by count in descending order.
1296  const SampleRecord::CallTargetMap &M) {
1298  for (auto I = M.begin(); I != M.end(); ++I)
1299  R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
1300  llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
1301  if (L.Count == R.Count)
1302  return L.Value > R.Value;
1303  else
1304  return L.Count > R.Count;
1305  });
1306  return R;
1307 }
1308 
1309 /// Propagate weights into edges
1310 ///
1311 /// The following rules are applied to every block BB in the CFG:
1312 ///
1313 /// - If BB has a single predecessor/successor, then the weight
1314 /// of that edge is the weight of the block.
1315 ///
1316 /// - If all incoming or outgoing edges are known except one, and the
1317 /// weight of the block is already known, the weight of the unknown
1318 /// edge will be the weight of the block minus the sum of all the known
1319 /// edges. If the sum of all the known edges is larger than BB's weight,
1320 /// we set the unknown edge weight to zero.
1321 ///
1322 /// - If there is a self-referential edge, and the weight of the block is
1323 /// known, the weight for that edge is set to the weight of the block
1324 /// minus the weight of the other incoming edges to that block (if
1325 /// known).
1326 void SampleProfileLoader::propagateWeights(Function &F) {
1327  bool Changed = true;
1328  unsigned I = 0;
1329 
1330  // If BB weight is larger than its corresponding loop's header BB weight,
1331  // use the BB weight to replace the loop header BB weight.
1332  for (auto &BI : F) {
1333  BasicBlock *BB = &BI;
1334  Loop *L = LI->getLoopFor(BB);
1335  if (!L) {
1336  continue;
1337  }
1338  BasicBlock *Header = L->getHeader();
1339  if (Header && BlockWeights[BB] > BlockWeights[Header]) {
1340  BlockWeights[Header] = BlockWeights[BB];
1341  }
1342  }
1343 
1344  // Before propagation starts, build, for each block, a list of
1345  // unique predecessors and successors. This is necessary to handle
1346  // identical edges in multiway branches. Since we visit all blocks and all
1347  // edges of the CFG, it is cleaner to build these lists once at the start
1348  // of the pass.
1349  buildEdges(F);
1350 
1351  // Propagate until we converge or we go past the iteration limit.
1352  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1353  Changed = propagateThroughEdges(F, false);
1354  }
1355 
1356  // The first propagation propagates BB counts from annotated BBs to unknown
1357  // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
1358  // to propagate edge weights.
1359  VisitedEdges.clear();
1360  Changed = true;
1361  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1362  Changed = propagateThroughEdges(F, false);
1363  }
1364 
1365  // The 3rd propagation pass allows adjust annotated BB weights that are
1366  // obviously wrong.
1367  Changed = true;
1368  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1369  Changed = propagateThroughEdges(F, true);
1370  }
1371 
1372  // Generate MD_prof metadata for every branch instruction using the
1373  // edge weights computed during propagation.
1374  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1375  LLVMContext &Ctx = F.getContext();
1376  MDBuilder MDB(Ctx);
1377  for (auto &BI : F) {
1378  BasicBlock *BB = &BI;
1379 
1380  if (BlockWeights[BB]) {
1381  for (auto &I : BB->getInstList()) {
1382  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1383  continue;
1384  CallSite CS(&I);
1385  if (!CS.getCalledFunction()) {
1386  const DebugLoc &DLoc = I.getDebugLoc();
1387  if (!DLoc)
1388  continue;
1389  const DILocation *DIL = DLoc;
1390  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
1391  uint32_t Discriminator = DIL->getBaseDiscriminator();
1392 
1393  const FunctionSamples *FS = findFunctionSamples(I);
1394  if (!FS)
1395  continue;
1396  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
1397  if (!T || T.get().empty())
1398  continue;
1399  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1400  SortCallTargets(T.get());
1401  uint64_t Sum;
1402  findIndirectCallFunctionSamples(I, Sum);
1403  annotateValueSite(*I.getParent()->getParent()->getParent(), I,
1404  SortedCallTargets, Sum, IPVK_IndirectCallTarget,
1405  SortedCallTargets.size());
1406  } else if (!isa<IntrinsicInst>(&I)) {
1407  I.setMetadata(LLVMContext::MD_prof,
1408  MDB.createBranchWeights(
1409  {static_cast<uint32_t>(BlockWeights[BB])}));
1410  }
1411  }
1412  }
1413  Instruction *TI = BB->getTerminator();
1414  if (TI->getNumSuccessors() == 1)
1415  continue;
1416  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
1417  continue;
1418 
1419  DebugLoc BranchLoc = TI->getDebugLoc();
1420  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1421  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1422  : Twine("<UNKNOWN LOCATION>"))
1423  << ".\n");
1424  SmallVector<uint32_t, 4> Weights;
1425  uint32_t MaxWeight = 0;
1426  Instruction *MaxDestInst;
1427  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1428  BasicBlock *Succ = TI->getSuccessor(I);
1429  Edge E = std::make_pair(BB, Succ);
1430  uint64_t Weight = EdgeWeights[E];
1431  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1432  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1433  // if needed. Sample counts in profiles are 64-bit unsigned values,
1434  // but internally branch weights are expressed as 32-bit values.
1435  if (Weight > std::numeric_limits<uint32_t>::max()) {
1436  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1438  }
1439  // Weight is added by one to avoid propagation errors introduced by
1440  // 0 weights.
1441  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1442  if (Weight != 0) {
1443  if (Weight > MaxWeight) {
1444  MaxWeight = Weight;
1445  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1446  }
1447  }
1448  }
1449 
1450  uint64_t TempWeight;
1451  // Only set weights if there is at least one non-zero weight.
1452  // In any other case, let the analyzer set weights.
1453  // Do not set weights if the weights are present. In ThinLTO, the profile
1454  // annotation is done twice. If the first annotation already set the
1455  // weights, the second pass does not need to set it.
1456  if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
1457  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1458  TI->setMetadata(LLVMContext::MD_prof,
1459  MDB.createBranchWeights(Weights));
1460  ORE->emit([&]() {
1461  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1462  << "most popular destination for conditional branches at "
1463  << ore::NV("CondBranchesLoc", BranchLoc);
1464  });
1465  } else {
1466  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1467  }
1468  }
1469 }
1470 
1471 /// Get the line number for the function header.
1472 ///
1473 /// This looks up function \p F in the current compilation unit and
1474 /// retrieves the line number where the function is defined. This is
1475 /// line 0 for all the samples read from the profile file. Every line
1476 /// number is relative to this line.
1477 ///
1478 /// \param F Function object to query.
1479 ///
1480 /// \returns the line number where \p F is defined. If it returns 0,
1481 /// it means that there is no debug information available for \p F.
1482 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
1483  if (DISubprogram *S = F.getSubprogram())
1484  return S->getLine();
1485 
1486  if (NoWarnSampleUnused)
1487  return 0;
1488 
1489  // If the start of \p F is missing, emit a diagnostic to inform the user
1490  // about the missed opportunity.
1492  "No debug information found in function " + F.getName() +
1493  ": Function profile not used",
1494  DS_Warning));
1495  return 0;
1496 }
1497 
1498 void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
1499  DT.reset(new DominatorTree);
1500  DT->recalculate(F);
1501 
1502  PDT.reset(new PostDominatorTree(F));
1503 
1504  LI.reset(new LoopInfo);
1505  LI->analyze(*DT);
1506 }
1507 
1508 /// Generate branch weight metadata for all branches in \p F.
1509 ///
1510 /// Branch weights are computed out of instruction samples using a
1511 /// propagation heuristic. Propagation proceeds in 3 phases:
1512 ///
1513 /// 1- Assignment of block weights. All the basic blocks in the function
1514 /// are initial assigned the same weight as their most frequently
1515 /// executed instruction.
1516 ///
1517 /// 2- Creation of equivalence classes. Since samples may be missing from
1518 /// blocks, we can fill in the gaps by setting the weights of all the
1519 /// blocks in the same equivalence class to the same weight. To compute
1520 /// the concept of equivalence, we use dominance and loop information.
1521 /// Two blocks B1 and B2 are in the same equivalence class if B1
1522 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
1523 ///
1524 /// 3- Propagation of block weights into edges. This uses a simple
1525 /// propagation heuristic. The following rules are applied to every
1526 /// block BB in the CFG:
1527 ///
1528 /// - If BB has a single predecessor/successor, then the weight
1529 /// of that edge is the weight of the block.
1530 ///
1531 /// - If all the edges are known except one, and the weight of the
1532 /// block is already known, the weight of the unknown edge will
1533 /// be the weight of the block minus the sum of all the known
1534 /// edges. If the sum of all the known edges is larger than BB's weight,
1535 /// we set the unknown edge weight to zero.
1536 ///
1537 /// - If there is a self-referential edge, and the weight of the block is
1538 /// known, the weight for that edge is set to the weight of the block
1539 /// minus the weight of the other incoming edges to that block (if
1540 /// known).
1541 ///
1542 /// Since this propagation is not guaranteed to finalize for every CFG, we
1543 /// only allow it to proceed for a limited number of iterations (controlled
1544 /// by -sample-profile-max-propagate-iterations).
1545 ///
1546 /// FIXME: Try to replace this propagation heuristic with a scheme
1547 /// that is guaranteed to finalize. A work-list approach similar to
1548 /// the standard value propagation algorithm used by SSA-CCP might
1549 /// work here.
1550 ///
1551 /// Once all the branch weights are computed, we emit the MD_prof
1552 /// metadata on BB using the computed values for each of its branches.
1553 ///
1554 /// \param F The function to query.
1555 ///
1556 /// \returns true if \p F was modified. Returns false, otherwise.
1557 bool SampleProfileLoader::emitAnnotations(Function &F) {
1558  bool Changed = false;
1559 
1560  if (getFunctionLoc(F) == 0)
1561  return false;
1562 
1563  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1564  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1565 
1566  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1567  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1568 
1569  // Compute basic block weights.
1570  Changed |= computeBlockWeights(F);
1571 
1572  if (Changed) {
1573  // Add an entry count to the function using the samples gathered at the
1574  // function entry.
1575  // Sets the GUIDs that are inlined in the profiled binary. This is used
1576  // for ThinLink to make correct liveness analysis, and also make the IR
1577  // match the profiled binary before annotation.
1578  F.setEntryCount(
1579  ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
1580  &InlinedGUIDs);
1581 
1582  // Compute dominance and loop info needed for propagation.
1583  computeDominanceAndLoopInfo(F);
1584 
1585  // Find equivalence classes.
1586  findEquivalenceClasses(F);
1587 
1588  // Propagate weights to all edges.
1589  propagateWeights(F);
1590  }
1591 
1592  // If coverage checking was requested, compute it now.
1594  unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1595  unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1596  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1597  if (Coverage < SampleProfileRecordCoverage) {
1599  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1600  Twine(Used) + " of " + Twine(Total) + " available profile records (" +
1601  Twine(Coverage) + "%) were applied",
1602  DS_Warning));
1603  }
1604  }
1605 
1607  uint64_t Used = CoverageTracker.getTotalUsedSamples();
1608  uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1609  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1610  if (Coverage < SampleProfileSampleCoverage) {
1612  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1613  Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
1614  Twine(Coverage) + "%) were applied",
1615  DS_Warning));
1616  }
1617  }
1618  return Changed;
1619 }
1620 
1622 
1623 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1624  "Sample Profile loader", false, false)
1628 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1629  "Sample Profile loader", false, false)
1630 
1631 bool SampleProfileLoader::doInitialization(Module &M) {
1632  auto &Ctx = M.getContext();
1633  auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
1634  if (std::error_code EC = ReaderOrErr.getError()) {
1635  std::string Msg = "Could not open profile: " + EC.message();
1636  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1637  return false;
1638  }
1639  Reader = std::move(ReaderOrErr.get());
1640  Reader->collectFuncsToUse(M);
1641  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1642 
1643  if (!RemappingFilename.empty()) {
1644  // Apply profile remappings to the loaded profile data if requested.
1645  // For now, we only support remapping symbols encoded using the Itanium
1646  // C++ ABI's name mangling scheme.
1648  RemappingFilename, Ctx, std::move(Reader));
1649  if (std::error_code EC = ReaderOrErr.getError()) {
1650  std::string Msg = "Could not open profile remapping file: " + EC.message();
1651  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1652  return false;
1653  }
1654  Reader = std::move(ReaderOrErr.get());
1655  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1656  }
1657  return true;
1658 }
1659 
1661  return new SampleProfileLoaderLegacyPass();
1662 }
1663 
1665  return new SampleProfileLoaderLegacyPass(Name);
1666 }
1667 
1668 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1669  ProfileSummaryInfo *_PSI) {
1670  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
1671  if (!ProfileIsValid)
1672  return false;
1673 
1674  PSI = _PSI;
1675  if (M.getProfileSummary(/* IsCS */ false) == nullptr)
1676  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
1678 
1679  // Compute the total number of samples collected in this profile.
1680  for (const auto &I : Reader->getProfiles())
1681  TotalCollectedSamples += I.second.getTotalSamples();
1682 
1683  // Populate the symbol map.
1684  for (const auto &N_F : M.getValueSymbolTable()) {
1685  StringRef OrigName = N_F.getKey();
1686  Function *F = dyn_cast<Function>(N_F.getValue());
1687  if (F == nullptr)
1688  continue;
1689  SymbolMap[OrigName] = F;
1690  auto pos = OrigName.find('.');
1691  if (pos != StringRef::npos) {
1692  StringRef NewName = OrigName.substr(0, pos);
1693  auto r = SymbolMap.insert(std::make_pair(NewName, F));
1694  // Failiing to insert means there is already an entry in SymbolMap,
1695  // thus there are multiple functions that are mapped to the same
1696  // stripped name. In this case of name conflicting, set the value
1697  // to nullptr to avoid confusion.
1698  if (!r.second)
1699  r.first->second = nullptr;
1700  }
1701  }
1702 
1703  bool retval = false;
1704  for (auto &F : M)
1705  if (!F.isDeclaration()) {
1706  clearFunctionData();
1707  retval |= runOnFunction(F, AM);
1708  }
1709 
1710  // Account for cold calls not inlined....
1711  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
1712  notInlinedCallInfo)
1713  updateProfileCallee(pair.first, pair.second.entryCount);
1714 
1715  return retval;
1716 }
1717 
1718 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1719  ACT = &getAnalysis<AssumptionCacheTracker>();
1720  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1721  ProfileSummaryInfo *PSI =
1722  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1723  return SampleLoader.runOnModule(M, nullptr, PSI);
1724 }
1725 
1727 
1728  DILocation2SampleMap.clear();
1729  // By default the entry count is initialized to -1, which will be treated
1730  // conservatively by getEntryCount as the same as unknown (None). This is
1731  // to avoid newly added code to be treated as cold. If we have samples
1732  // this will be overwritten in emitAnnotations.
1733  // If ProfileSampleAccurate is true or F has profile-sample-accurate
1734  // attribute, initialize the entry count to 0 so callsites or functions
1735  // unsampled will be treated as cold.
1736  uint64_t initialEntryCount =
1737  (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
1738  ? 0
1739  : -1;
1740  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
1741  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1742  if (AM) {
1743  auto &FAM =
1745  .getManager();
1746  ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1747  } else {
1748  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
1749  ORE = OwnedORE.get();
1750  }
1751  Samples = Reader->getSamplesFor(F);
1752  if (Samples && !Samples->empty())
1753  return emitAnnotations(F);
1754  return false;
1755 }
1756 
1758  ModuleAnalysisManager &AM) {
1760  AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1761 
1762  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
1763  return FAM.getResult<AssumptionAnalysis>(F);
1764  };
1765  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
1766  return FAM.getResult<TargetIRAnalysis>(F);
1767  };
1768 
1769  SampleProfileLoader SampleLoader(
1770  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1771  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1772  : ProfileRemappingFileName,
1773  IsThinLTOPreLink, GetAssumptionCache, GetTTI);
1774 
1775  SampleLoader.doInitialization(M);
1776 
1778  if (!SampleLoader.runOnModule(M, &AM, PSI))
1779  return PreservedAnalyses::all();
1780 
1781  return PreservedAnalyses::none();
1782 }
const NoneType None
Definition: None.h:23
uint64_t CallInst * C
const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const
Returns the FunctionSamplesMap at the given Loc.
Definition: SampleProf.h:283
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:482
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
Represents either an error or a value T.
Definition: ErrorOr.h:56
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:104
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
sample profile
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const ValueSymbolTable & getValueSymbolTable() const
Get the symbol table of global variable and function identifiers.
Definition: Module.h:568
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
Metadata * getProfileSummary(bool IsCS)
Returns profile summary metadata.
Definition: Module.cpp:541
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C)
Create a sample profile reader appropriate to the file format.
unsigned getLine() const
Definition: DebugLoc.cpp:25
DenseMap< uint64_t, StringRef > * GUIDToFuncNameMap
GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for all the function symbols define...
Definition: SampleProf.h:477
ErrorOr< uint64_t > findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const
Return the number of samples collected at the given location.
Definition: SampleProf.h:256
A debug info location.
Definition: DebugLoc.h:33
F(f)
uint64_t getOrCompHotCountThreshold()
Returns HotCountThreshold if set.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const Module *M, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:382
static SmallVector< InstrProfValueData, 2 > SortCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
bool isHotCount(uint64_t C)
Returns true if count C is considered hot.
Represents the cost of inlining a function.
Definition: InlineCost.h:63
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
virtual void collectFuncsToUse(const Module &M)
Metadata * getMD(LLVMContext &Context)
Return summary information as metadata.
Representation of the samples collected for a function.
Definition: SampleProf.h:216
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Definition: BitVector.h:937
virtual std::error_code read()=0
Read sample profiles from the associated file.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:1448
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
Diagnostic information for optimization analysis remarks.
static StringRef getName(Value *V)
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:578
StringRef getFilename() const
BlockT * getHeader() const
Definition: LoopInfo.h:105
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Subprogram description.
const Instruction * getFirstNonPHIOrDbgOrLifetime() const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic...
Definition: BasicBlock.cpp:203
bool empty() const
Definition: Module.h:607
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1336
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
Debug location.
ModulePass * createSampleProfileLoaderPass()
Instruction * promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
static cl::opt< unsigned > SampleProfileRecordCoverage("sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of records in the input profile " "are matched to the IR."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
const BasicBlock & getEntryBlock() const
Definition: Function.h:664
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static cl::opt< unsigned > SampleProfileMaxPropagateIterations("sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG."))
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1504
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:350
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes...
Definition: SampleProf.h:415
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:285
Function::ProfileCount ProfileCount
Represent the analysis usage information of a pass.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:49
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Used in the streaming interface as the general argument type.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:931
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Class to represent profile counts.
Definition: Function.h:260
size_t size() const
Definition: SmallVector.h:52
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
StringRef getFuncNameInModule(const Module *M) const
Return the original function name if it exists in Module M.
Definition: SampleProf.h:409
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:180
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1107
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName...
Definition: SampleProf.h:294
A function analysis which provides an AssumptionCache.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:162
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
ErrorOr< SampleRecord::CallTargetMap > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
Definition: SampleProf.h:269
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
StringMap - This is an unconventional map that is specialized for handling keys that are "strings"...
Definition: StringMap.h:219
StringMap< FunctionSamples > & getProfiles()
Return all the profiles.
amdgpu Simplify well known AMD library false FunctionCallee Callee
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:510
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:225
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
#define DEBUG_TYPE
static const size_t npos
Definition: StringRef.h:50
iterator begin()
Definition: StringMap.h:314
Represents the relative location of an instruction.
Definition: SampleProf.h:117
static cl::opt< unsigned > SampleProfileSampleCoverage("sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of samples in the input profile " "are matched to the IR."))
static bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Establish a view to a call site for examination.
Definition: CallSite.h:897
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
iterator end()
Definition: DenseMap.h:108
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
std::map< std::string, FunctionSamples > FunctionSamplesMap
Definition: SampleProf.h:208
StringRef getName() const
Return the function name.
Definition: SampleProf.h:406
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
void updateProfileCallee(Function *Callee, int64_t entryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding entryDelta then scaling callsite i...
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
Provides ErrorOr<T> smart pointer.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
Definition: SampleProf.h:320
sample Sample Profile loader
const CallsiteSampleMap & getCallsiteSamples() const
Return all the callsite samples collected in the body of the function.
Definition: SampleProf.h:353
Sample-based profile reader.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
print Print MemDeps of function
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:331
static cl::opt< bool > NoWarnSampleUnused("no-warn-sample-unused", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. "))
This header defines various interfaces for pass management in LLVM.
Diagnostic information for the sample profiler.
void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind)
Attach profile summary metadata to this module.
Definition: Module.cpp:534
#define LLVM_DEBUG(X)
Definition: Debug.h:122
void dump(raw_ostream &OS=dbgs())
Print all the profiles on stream OS.
The optimization diagnostic interface.
iterator end()
Definition: StringMap.h:317
ProfileSummary & getSummary()
Return the profile summary.
This file provides the interface for the sampled PGO loader pass.
reference get()
Definition: ErrorOr.h:156
const BasicBlock * getParent() const
Definition: Instruction.h:66
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C, std::unique_ptr< SampleProfileReader > Underlying)
Create a remapped sample profile from the given remapping file and underlying samples.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044