LLVM  9.0.0svn
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringMap.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
37 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CFG.h"
44 #include "llvm/IR/CallSite.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/MDBuilder.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/PassManager.h"
60 #include "llvm/Pass.h"
64 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/ErrorOr.h"
71 #include "llvm/Transforms/IPO.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <functional>
79 #include <limits>
80 #include <map>
81 #include <memory>
82 #include <string>
83 #include <system_error>
84 #include <utility>
85 #include <vector>
86 
87 using namespace llvm;
88 using namespace sampleprof;
90 #define DEBUG_TYPE "sample-profile"
91 
92 // Command line option to specify the file to read samples from. This is
93 // mainly used for debugging.
95  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
96  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
97 
98 // The named file contains a set of transformations that may have been applied
99 // to the symbol names between the program from which the sample data was
100 // collected and the current program's symbols.
102  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
103  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
104 
106  "sample-profile-max-propagate-iterations", cl::init(100),
107  cl::desc("Maximum number of iterations to go through when propagating "
108  "sample block/edge weights through the CFG."));
109 
111  "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
112  cl::desc("Emit a warning if less than N% of records in the input profile "
113  "are matched to the IR."));
114 
116  "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
117  cl::desc("Emit a warning if less than N% of samples in the input profile "
118  "are matched to the IR."));
119 
121  "no-warn-sample-unused", cl::init(false), cl::Hidden,
122  cl::desc("Use this option to turn off/on warnings about function with "
123  "samples but without debug information to use those samples. "));
124 
126  "profile-sample-accurate", cl::Hidden, cl::init(false),
127  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
128  "callsite and function as having 0 samples. Otherwise, treat "
129  "un-sampled callsites and functions conservatively as unknown. "));
130 
131 namespace {
132 
133 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
134 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
135 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
136 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
137 using BlockEdgeMap =
139 
140 class SampleCoverageTracker {
141 public:
142  SampleCoverageTracker() = default;
143 
144  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
145  uint32_t Discriminator, uint64_t Samples);
146  unsigned computeCoverage(unsigned Used, unsigned Total) const;
147  unsigned countUsedRecords(const FunctionSamples *FS,
148  ProfileSummaryInfo *PSI) const;
149  unsigned countBodyRecords(const FunctionSamples *FS,
150  ProfileSummaryInfo *PSI) const;
151  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
152  uint64_t countBodySamples(const FunctionSamples *FS,
153  ProfileSummaryInfo *PSI) const;
154 
155  void clear() {
156  SampleCoverage.clear();
157  TotalUsedSamples = 0;
158  }
159 
160 private:
161  using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
162  using FunctionSamplesCoverageMap =
164 
165  /// Coverage map for sampling records.
166  ///
167  /// This map keeps a record of sampling records that have been matched to
168  /// an IR instruction. This is used to detect some form of staleness in
169  /// profiles (see flag -sample-profile-check-coverage).
170  ///
171  /// Each entry in the map corresponds to a FunctionSamples instance. This is
172  /// another map that counts how many times the sample record at the
173  /// given location has been used.
174  FunctionSamplesCoverageMap SampleCoverage;
175 
176  /// Number of samples used from the profile.
177  ///
178  /// When a sampling record is used for the first time, the samples from
179  /// that record are added to this accumulator. Coverage is later computed
180  /// based on the total number of samples available in this function and
181  /// its callsites.
182  ///
183  /// Note that this accumulator tracks samples used from a single function
184  /// and all the inlined callsites. Strictly, we should have a map of counters
185  /// keyed by FunctionSamples pointers, but these stats are cleared after
186  /// every function, so we just need to keep a single counter.
187  uint64_t TotalUsedSamples = 0;
188 };
189 
190 /// Sample profile pass.
191 ///
192 /// This pass reads profile data from the file specified by
193 /// -sample-profile-file and annotates every affected function with the
194 /// profile information found in that file.
195 class SampleProfileLoader {
196 public:
197  SampleProfileLoader(
198  StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
199  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
200  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
201  : GetAC(std::move(GetAssumptionCache)),
202  GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
203  RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
204 
205  bool doInitialization(Module &M);
206  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
207  ProfileSummaryInfo *_PSI);
208 
209  void dump() { Reader->dump(); }
210 
211 protected:
213  unsigned getFunctionLoc(Function &F);
214  bool emitAnnotations(Function &F);
215  ErrorOr<uint64_t> getInstWeight(const Instruction &I);
216  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
217  const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
218  std::vector<const FunctionSamples *>
219  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
220  mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
221  const FunctionSamples *findFunctionSamples(const Instruction &I) const;
222  bool inlineCallInstruction(Instruction *I);
223  bool inlineHotFunctions(Function &F,
224  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
225  void printEdgeWeight(raw_ostream &OS, Edge E);
226  void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
227  void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
228  bool computeBlockWeights(Function &F);
229  void findEquivalenceClasses(Function &F);
230  template <bool IsPostDom>
231  void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
233 
234  void propagateWeights(Function &F);
235  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
236  void buildEdges(Function &F);
237  bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
238  void computeDominanceAndLoopInfo(Function &F);
239  void clearFunctionData();
240 
241  /// Map basic blocks to their computed weights.
242  ///
243  /// The weight of a basic block is defined to be the maximum
244  /// of all the instruction weights in that block.
245  BlockWeightMap BlockWeights;
246 
247  /// Map edges to their computed weights.
248  ///
249  /// Edge weights are computed by propagating basic block weights in
250  /// SampleProfile::propagateWeights.
251  EdgeWeightMap EdgeWeights;
252 
253  /// Set of visited blocks during propagation.
255 
256  /// Set of visited edges during propagation.
257  SmallSet<Edge, 32> VisitedEdges;
258 
259  /// Equivalence classes for block weights.
260  ///
261  /// Two blocks BB1 and BB2 are in the same equivalence class if they
262  /// dominate and post-dominate each other, and they are in the same loop
263  /// nest. When this happens, the two blocks are guaranteed to execute
264  /// the same number of times.
265  EquivalenceClassMap EquivalenceClass;
266 
267  /// Map from function name to Function *. Used to find the function from
268  /// the function name. If the function name contains suffix, additional
269  /// entry is added to map from the stripped name to the function if there
270  /// is one-to-one mapping.
272 
273  /// Dominance, post-dominance and loop information.
274  std::unique_ptr<DominatorTree> DT;
275  std::unique_ptr<PostDominatorTree> PDT;
276  std::unique_ptr<LoopInfo> LI;
277 
278  std::function<AssumptionCache &(Function &)> GetAC;
279  std::function<TargetTransformInfo &(Function &)> GetTTI;
280 
281  /// Predecessors for each basic block in the CFG.
282  BlockEdgeMap Predecessors;
283 
284  /// Successors for each basic block in the CFG.
285  BlockEdgeMap Successors;
286 
287  SampleCoverageTracker CoverageTracker;
288 
289  /// Profile reader object.
290  std::unique_ptr<SampleProfileReader> Reader;
291 
292  /// Samples collected for the body of this function.
293  FunctionSamples *Samples = nullptr;
294 
295  /// Name of the profile file to load.
296  std::string Filename;
297 
298  /// Name of the profile remapping file to load.
299  std::string RemappingFilename;
300 
301  /// Flag indicating whether the profile input loaded successfully.
302  bool ProfileIsValid = false;
303 
304  /// Flag indicating if the pass is invoked in ThinLTO compile phase.
305  ///
306  /// In this phase, in annotation, we should not promote indirect calls.
307  /// Instead, we will mark GUIDs that needs to be annotated to the function.
308  bool IsThinLTOPreLink;
309 
310  /// Profile Summary Info computed from sample profile.
311  ProfileSummaryInfo *PSI = nullptr;
312 
313  /// Total number of samples collected in this profile.
314  ///
315  /// This is the sum of all the samples collected in all the functions executed
316  /// at runtime.
317  uint64_t TotalCollectedSamples = 0;
318 
319  /// Optimization Remark Emitter used to emit diagnostic remarks.
320  OptimizationRemarkEmitter *ORE = nullptr;
321 };
322 
323 class SampleProfileLoaderLegacyPass : public ModulePass {
324 public:
325  // Class identification, replacement for typeinfo
326  static char ID;
327 
328  SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
329  bool IsThinLTOPreLink = false)
330  : ModulePass(ID),
331  SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
332  [&](Function &F) -> AssumptionCache & {
333  return ACT->getAssumptionCache(F);
334  },
335  [&](Function &F) -> TargetTransformInfo & {
336  return TTIWP->getTTI(F);
337  }) {
340  }
341 
342  void dump() { SampleLoader.dump(); }
343 
344  bool doInitialization(Module &M) override {
345  return SampleLoader.doInitialization(M);
346  }
347 
348  StringRef getPassName() const override { return "Sample profile pass"; }
349  bool runOnModule(Module &M) override;
350 
351  void getAnalysisUsage(AnalysisUsage &AU) const override {
355  }
356 
357 private:
358  SampleProfileLoader SampleLoader;
359  AssumptionCacheTracker *ACT = nullptr;
360  TargetTransformInfoWrapperPass *TTIWP = nullptr;
361 };
362 
363 } // end anonymous namespace
364 
365 /// Return true if the given callsite is hot wrt to hot cutoff threshold.
366 ///
367 /// Functions that were inlined in the original binary will be represented
368 /// in the inline stack in the sample profile. If the profile shows that
369 /// the original inline decision was "good" (i.e., the callsite is executed
370 /// frequently), then we will recreate the inline decision and apply the
371 /// profile from the inlined callsite.
372 ///
373 /// To decide whether an inlined callsite is hot, we compare the callsite
374 /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
375 /// regarded as hot if the count is above the cutoff value.
376 static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
377  ProfileSummaryInfo *PSI) {
378  if (!CallsiteFS)
379  return false; // The callsite was not inlined in the original binary.
380 
381  assert(PSI && "PSI is expected to be non null");
382  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
383  return PSI->isHotCount(CallsiteTotalSamples);
384 }
385 
386 /// Mark as used the sample record for the given function samples at
387 /// (LineOffset, Discriminator).
388 ///
389 /// \returns true if this is the first time we mark the given record.
390 bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
391  uint32_t LineOffset,
392  uint32_t Discriminator,
393  uint64_t Samples) {
394  LineLocation Loc(LineOffset, Discriminator);
395  unsigned &Count = SampleCoverage[FS][Loc];
396  bool FirstTime = (++Count == 1);
397  if (FirstTime)
398  TotalUsedSamples += Samples;
399  return FirstTime;
400 }
401 
402 /// Return the number of sample records that were applied from this profile.
403 ///
404 /// This count does not include records from cold inlined callsites.
405 unsigned
406 SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
407  ProfileSummaryInfo *PSI) const {
408  auto I = SampleCoverage.find(FS);
409 
410  // The size of the coverage map for FS represents the number of records
411  // that were marked used at least once.
412  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
413 
414  // If there are inlined callsites in this function, count the samples found
415  // in the respective bodies. However, do not bother counting callees with 0
416  // total samples, these are callees that were never invoked at runtime.
417  for (const auto &I : FS->getCallsiteSamples())
418  for (const auto &J : I.second) {
419  const FunctionSamples *CalleeSamples = &J.second;
420  if (callsiteIsHot(CalleeSamples, PSI))
421  Count += countUsedRecords(CalleeSamples, PSI);
422  }
423 
424  return Count;
425 }
426 
427 /// Return the number of sample records in the body of this profile.
428 ///
429 /// This count does not include records from cold inlined callsites.
430 unsigned
431 SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
432  ProfileSummaryInfo *PSI) const {
433  unsigned Count = FS->getBodySamples().size();
434 
435  // Only count records in hot callsites.
436  for (const auto &I : FS->getCallsiteSamples())
437  for (const auto &J : I.second) {
438  const FunctionSamples *CalleeSamples = &J.second;
439  if (callsiteIsHot(CalleeSamples, PSI))
440  Count += countBodyRecords(CalleeSamples, PSI);
441  }
442 
443  return Count;
444 }
445 
446 /// Return the number of samples collected in the body of this profile.
447 ///
448 /// This count does not include samples from cold inlined callsites.
449 uint64_t
450 SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
451  ProfileSummaryInfo *PSI) const {
452  uint64_t Total = 0;
453  for (const auto &I : FS->getBodySamples())
454  Total += I.second.getSamples();
455 
456  // Only count samples in hot callsites.
457  for (const auto &I : FS->getCallsiteSamples())
458  for (const auto &J : I.second) {
459  const FunctionSamples *CalleeSamples = &J.second;
460  if (callsiteIsHot(CalleeSamples, PSI))
461  Total += countBodySamples(CalleeSamples, PSI);
462  }
463 
464  return Total;
465 }
466 
467 /// Return the fraction of sample records used in this profile.
468 ///
469 /// The returned value is an unsigned integer in the range 0-100 indicating
470 /// the percentage of sample records that were used while applying this
471 /// profile to the associated function.
472 unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
473  unsigned Total) const {
474  assert(Used <= Total &&
475  "number of used records cannot exceed the total number of records");
476  return Total > 0 ? Used * 100 / Total : 100;
477 }
478 
479 /// Clear all the per-function data used to load samples and propagate weights.
480 void SampleProfileLoader::clearFunctionData() {
481  BlockWeights.clear();
482  EdgeWeights.clear();
483  VisitedBlocks.clear();
484  VisitedEdges.clear();
485  EquivalenceClass.clear();
486  DT = nullptr;
487  PDT = nullptr;
488  LI = nullptr;
489  Predecessors.clear();
490  Successors.clear();
491  CoverageTracker.clear();
492 }
493 
494 #ifndef NDEBUG
495 /// Print the weight of edge \p E on stream \p OS.
496 ///
497 /// \param OS Stream to emit the output to.
498 /// \param E Edge to print.
499 void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
500  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
501  << "]: " << EdgeWeights[E] << "\n";
502 }
503 
504 /// Print the equivalence class of block \p BB on stream \p OS.
505 ///
506 /// \param OS Stream to emit the output to.
507 /// \param BB Block to print.
508 void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
509  const BasicBlock *BB) {
510  const BasicBlock *Equiv = EquivalenceClass[BB];
511  OS << "equivalence[" << BB->getName()
512  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
513 }
514 
515 /// Print the weight of block \p BB on stream \p OS.
516 ///
517 /// \param OS Stream to emit the output to.
518 /// \param BB Block to print.
519 void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
520  const BasicBlock *BB) const {
521  const auto &I = BlockWeights.find(BB);
522  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
523  OS << "weight[" << BB->getName() << "]: " << W << "\n";
524 }
525 #endif
526 
527 /// Get the weight for an instruction.
528 ///
529 /// The "weight" of an instruction \p Inst is the number of samples
530 /// collected on that instruction at runtime. To retrieve it, we
531 /// need to compute the line number of \p Inst relative to the start of its
532 /// function. We use HeaderLineno to compute the offset. We then
533 /// look up the samples collected for \p Inst using BodySamples.
534 ///
535 /// \param Inst Instruction to query.
536 ///
537 /// \returns the weight of \p Inst.
538 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
539  const DebugLoc &DLoc = Inst.getDebugLoc();
540  if (!DLoc)
541  return std::error_code();
542 
543  const FunctionSamples *FS = findFunctionSamples(Inst);
544  if (!FS)
545  return std::error_code();
546 
547  // Ignore all intrinsics, phinodes and branch instructions.
548  // Branch and phinodes instruction usually contains debug info from sources outside of
549  // the residing basic block, thus we ignore them during annotation.
550  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
551  return std::error_code();
552 
553  // If a direct call/invoke instruction is inlined in profile
554  // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
555  // it means that the inlined callsite has no sample, thus the call
556  // instruction should have 0 count.
557  if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
558  !ImmutableCallSite(&Inst).isIndirectCall() &&
559  findCalleeFunctionSamples(Inst))
560  return 0;
561 
562  const DILocation *DIL = DLoc;
563  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
564  uint32_t Discriminator = DIL->getBaseDiscriminator();
565  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
566  if (R) {
567  bool FirstMark =
568  CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
569  if (FirstMark) {
570  ORE->emit([&]() {
571  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
572  Remark << "Applied " << ore::NV("NumSamples", *R);
573  Remark << " samples from profile (offset: ";
574  Remark << ore::NV("LineOffset", LineOffset);
575  if (Discriminator) {
576  Remark << ".";
577  Remark << ore::NV("Discriminator", Discriminator);
578  }
579  Remark << ")";
580  return Remark;
581  });
582  }
583  LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "."
584  << DIL->getBaseDiscriminator() << ":" << Inst
585  << " (line offset: " << LineOffset << "."
586  << DIL->getBaseDiscriminator() << " - weight: " << R.get()
587  << ")\n");
588  }
589  return R;
590 }
591 
592 /// Compute the weight of a basic block.
593 ///
594 /// The weight of basic block \p BB is the maximum weight of all the
595 /// instructions in BB.
596 ///
597 /// \param BB The basic block to query.
598 ///
599 /// \returns the weight for \p BB.
600 ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
601  uint64_t Max = 0;
602  bool HasWeight = false;
603  for (auto &I : BB->getInstList()) {
604  const ErrorOr<uint64_t> &R = getInstWeight(I);
605  if (R) {
606  Max = std::max(Max, R.get());
607  HasWeight = true;
608  }
609  }
610  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
611 }
612 
613 /// Compute and store the weights of every basic block.
614 ///
615 /// This populates the BlockWeights map by computing
616 /// the weights of every basic block in the CFG.
617 ///
618 /// \param F The function to query.
619 bool SampleProfileLoader::computeBlockWeights(Function &F) {
620  bool Changed = false;
621  LLVM_DEBUG(dbgs() << "Block weights\n");
622  for (const auto &BB : F) {
623  ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
624  if (Weight) {
625  BlockWeights[&BB] = Weight.get();
626  VisitedBlocks.insert(&BB);
627  Changed = true;
628  }
629  LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
630  }
631 
632  return Changed;
633 }
634 
635 /// Get the FunctionSamples for a call instruction.
636 ///
637 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
638 /// instance in which that call instruction is calling to. It contains
639 /// all samples that resides in the inlined instance. We first find the
640 /// inlined instance in which the call instruction is from, then we
641 /// traverse its children to find the callsite with the matching
642 /// location.
643 ///
644 /// \param Inst Call/Invoke instruction to query.
645 ///
646 /// \returns The FunctionSamples pointer to the inlined instance.
647 const FunctionSamples *
648 SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
649  const DILocation *DIL = Inst.getDebugLoc();
650  if (!DIL) {
651  return nullptr;
652  }
653 
654  StringRef CalleeName;
655  if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
656  if (Function *Callee = CI->getCalledFunction())
657  CalleeName = Callee->getName();
658 
659  const FunctionSamples *FS = findFunctionSamples(Inst);
660  if (FS == nullptr)
661  return nullptr;
662 
664  DIL->getBaseDiscriminator()),
665  CalleeName);
666 }
667 
668 /// Returns a vector of FunctionSamples that are the indirect call targets
669 /// of \p Inst. The vector is sorted by the total number of samples. Stores
670 /// the total call count of the indirect call in \p Sum.
671 std::vector<const FunctionSamples *>
672 SampleProfileLoader::findIndirectCallFunctionSamples(
673  const Instruction &Inst, uint64_t &Sum) const {
674  const DILocation *DIL = Inst.getDebugLoc();
675  std::vector<const FunctionSamples *> R;
676 
677  if (!DIL) {
678  return R;
679  }
680 
681  const FunctionSamples *FS = findFunctionSamples(Inst);
682  if (FS == nullptr)
683  return R;
684 
685  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
686  uint32_t Discriminator = DIL->getBaseDiscriminator();
687 
688  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
689  Sum = 0;
690  if (T)
691  for (const auto &T_C : T.get())
692  Sum += T_C.second;
695  if (M->empty())
696  return R;
697  for (const auto &NameFS : *M) {
698  Sum += NameFS.second.getEntrySamples();
699  R.push_back(&NameFS.second);
700  }
701  llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
702  if (L->getEntrySamples() != R->getEntrySamples())
703  return L->getEntrySamples() > R->getEntrySamples();
704  return FunctionSamples::getGUID(L->getName()) <
706  });
707  }
708  return R;
709 }
710 
711 /// Get the FunctionSamples for an instruction.
712 ///
713 /// The FunctionSamples of an instruction \p Inst is the inlined instance
714 /// in which that instruction is coming from. We traverse the inline stack
715 /// of that instruction, and match it with the tree nodes in the profile.
716 ///
717 /// \param Inst Instruction to query.
718 ///
719 /// \returns the FunctionSamples pointer to the inlined instance.
720 const FunctionSamples *
721 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
722  const DILocation *DIL = Inst.getDebugLoc();
723  if (!DIL)
724  return Samples;
725 
726  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
727  if (it.second)
728  it.first->second = Samples->findFunctionSamples(DIL);
729  return it.first->second;
730 }
731 
732 bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
733  assert(isa<CallInst>(I) || isa<InvokeInst>(I));
734  CallSite CS(I);
735  Function *CalledFunction = CS.getCalledFunction();
736  assert(CalledFunction);
737  DebugLoc DLoc = I->getDebugLoc();
738  BasicBlock *BB = I->getParent();
739  InlineParams Params = getInlineParams();
740  Params.ComputeFullInlineCost = true;
741  // Checks if there is anything in the reachable portion of the callee at
742  // this callsite that makes this inlining potentially illegal. Need to
743  // set ComputeFullInlineCost, otherwise getInlineCost may return early
744  // when cost exceeds threshold without checking all IRs in the callee.
745  // The acutal cost does not matter because we only checks isNever() to
746  // see if it is legal to inline the callsite.
747  InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
748  None, nullptr, nullptr);
749  if (Cost.isNever()) {
750  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
751  << "incompatible inlining");
752  return false;
753  }
754  InlineFunctionInfo IFI(nullptr, &GetAC);
755  if (InlineFunction(CS, IFI)) {
756  // The call to InlineFunction erases I, so we can't pass it here.
757  ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
758  << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
759  << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
760  return true;
761  }
762  return false;
763 }
764 
765 /// Iteratively inline hot callsites of a function.
766 ///
767 /// Iteratively traverse all callsites of the function \p F, and find if
768 /// the corresponding inlined instance exists and is hot in profile. If
769 /// it is hot enough, inline the callsites and adds new callsites of the
770 /// callee into the caller. If the call is an indirect call, first promote
771 /// it to direct call. Each indirect call is limited with a single target.
772 ///
773 /// \param F function to perform iterative inlining.
774 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
775 /// inlined in the profiled binary.
776 ///
777 /// \returns True if there is any inline happened.
778 bool SampleProfileLoader::inlineHotFunctions(
779  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
780  DenseSet<Instruction *> PromotedInsns;
781  bool Changed = false;
782  while (true) {
783  bool LocalChanged = false;
785  for (auto &BB : F) {
786  bool Hot = false;
788  for (auto &I : BB.getInstList()) {
789  const FunctionSamples *FS = nullptr;
790  if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
791  !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
792  Candidates.push_back(&I);
793  if (callsiteIsHot(FS, PSI))
794  Hot = true;
795  }
796  }
797  if (Hot) {
798  CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
799  }
800  }
801  for (auto I : CIS) {
802  Function *CalledFunction = CallSite(I).getCalledFunction();
803  // Do not inline recursive calls.
804  if (CalledFunction == &F)
805  continue;
806  if (CallSite(I).isIndirectCall()) {
807  if (PromotedInsns.count(I))
808  continue;
809  uint64_t Sum;
810  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
811  if (IsThinLTOPreLink) {
812  FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
814  continue;
815  }
816  auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
817  // If it is a recursive call, we do not inline it as it could bloat
818  // the code exponentially. There is way to better handle this, e.g.
819  // clone the caller first, and inline the cloned caller if it is
820  // recursive. As llvm does not inline recursive calls, we will
821  // simply ignore it instead of handling it explicitly.
822  if (CalleeFunctionName == F.getName())
823  continue;
824 
825  const char *Reason = "Callee function not available";
826  auto R = SymbolMap.find(CalleeFunctionName);
827  if (R != SymbolMap.end() && R->getValue() &&
828  !R->getValue()->isDeclaration() &&
829  R->getValue()->getSubprogram() &&
830  isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
831  uint64_t C = FS->getEntrySamples();
832  Instruction *DI =
833  pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
834  Sum -= C;
835  PromotedInsns.insert(I);
836  // If profile mismatches, we should not attempt to inline DI.
837  if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
838  inlineCallInstruction(DI))
839  LocalChanged = true;
840  } else {
841  LLVM_DEBUG(dbgs()
842  << "\nFailed to promote indirect call to "
843  << CalleeFunctionName << " because " << Reason << "\n");
844  }
845  }
846  } else if (CalledFunction && CalledFunction->getSubprogram() &&
847  !CalledFunction->isDeclaration()) {
848  if (inlineCallInstruction(I))
849  LocalChanged = true;
850  } else if (IsThinLTOPreLink) {
851  findCalleeFunctionSamples(*I)->findInlinedFunctions(
852  InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
853  }
854  }
855  if (LocalChanged) {
856  Changed = true;
857  } else {
858  break;
859  }
860  }
861  return Changed;
862 }
863 
864 /// Find equivalence classes for the given block.
865 ///
866 /// This finds all the blocks that are guaranteed to execute the same
867 /// number of times as \p BB1. To do this, it traverses all the
868 /// descendants of \p BB1 in the dominator or post-dominator tree.
869 ///
870 /// A block BB2 will be in the same equivalence class as \p BB1 if
871 /// the following holds:
872 ///
873 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
874 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
875 /// dominate BB1 in the post-dominator tree.
876 ///
877 /// 2- Both BB2 and \p BB1 must be in the same loop.
878 ///
879 /// For every block BB2 that meets those two requirements, we set BB2's
880 /// equivalence class to \p BB1.
881 ///
882 /// \param BB1 Block to check.
883 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
884 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
885 /// with blocks from \p BB1's dominator tree, then
886 /// this is the post-dominator tree, and vice versa.
887 template <bool IsPostDom>
888 void SampleProfileLoader::findEquivalencesFor(
889  BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
891  const BasicBlock *EC = EquivalenceClass[BB1];
892  uint64_t Weight = BlockWeights[EC];
893  for (const auto *BB2 : Descendants) {
894  bool IsDomParent = DomTree->dominates(BB2, BB1);
895  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
896  if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
897  EquivalenceClass[BB2] = EC;
898  // If BB2 is visited, then the entire EC should be marked as visited.
899  if (VisitedBlocks.count(BB2)) {
900  VisitedBlocks.insert(EC);
901  }
902 
903  // If BB2 is heavier than BB1, make BB2 have the same weight
904  // as BB1.
905  //
906  // Note that we don't worry about the opposite situation here
907  // (when BB2 is lighter than BB1). We will deal with this
908  // during the propagation phase. Right now, we just want to
909  // make sure that BB1 has the largest weight of all the
910  // members of its equivalence set.
911  Weight = std::max(Weight, BlockWeights[BB2]);
912  }
913  }
914  if (EC == &EC->getParent()->getEntryBlock()) {
915  BlockWeights[EC] = Samples->getHeadSamples() + 1;
916  } else {
917  BlockWeights[EC] = Weight;
918  }
919 }
920 
921 /// Find equivalence classes.
922 ///
923 /// Since samples may be missing from blocks, we can fill in the gaps by setting
924 /// the weights of all the blocks in the same equivalence class to the same
925 /// weight. To compute the concept of equivalence, we use dominance and loop
926 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
927 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
928 ///
929 /// \param F The function to query.
930 void SampleProfileLoader::findEquivalenceClasses(Function &F) {
931  SmallVector<BasicBlock *, 8> DominatedBBs;
932  LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
933  // Find equivalence sets based on dominance and post-dominance information.
934  for (auto &BB : F) {
935  BasicBlock *BB1 = &BB;
936 
937  // Compute BB1's equivalence class once.
938  if (EquivalenceClass.count(BB1)) {
939  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
940  continue;
941  }
942 
943  // By default, blocks are in their own equivalence class.
944  EquivalenceClass[BB1] = BB1;
945 
946  // Traverse all the blocks dominated by BB1. We are looking for
947  // every basic block BB2 such that:
948  //
949  // 1- BB1 dominates BB2.
950  // 2- BB2 post-dominates BB1.
951  // 3- BB1 and BB2 are in the same loop nest.
952  //
953  // If all those conditions hold, it means that BB2 is executed
954  // as many times as BB1, so they are placed in the same equivalence
955  // class by making BB2's equivalence class be BB1.
956  DominatedBBs.clear();
957  DT->getDescendants(BB1, DominatedBBs);
958  findEquivalencesFor(BB1, DominatedBBs, PDT.get());
959 
960  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
961  }
962 
963  // Assign weights to equivalence classes.
964  //
965  // All the basic blocks in the same equivalence class will execute
966  // the same number of times. Since we know that the head block in
967  // each equivalence class has the largest weight, assign that weight
968  // to all the blocks in that equivalence class.
969  LLVM_DEBUG(
970  dbgs() << "\nAssign the same weight to all blocks in the same class\n");
971  for (auto &BI : F) {
972  const BasicBlock *BB = &BI;
973  const BasicBlock *EquivBB = EquivalenceClass[BB];
974  if (BB != EquivBB)
975  BlockWeights[BB] = BlockWeights[EquivBB];
976  LLVM_DEBUG(printBlockWeight(dbgs(), BB));
977  }
978 }
979 
980 /// Visit the given edge to decide if it has a valid weight.
981 ///
982 /// If \p E has not been visited before, we copy to \p UnknownEdge
983 /// and increment the count of unknown edges.
984 ///
985 /// \param E Edge to visit.
986 /// \param NumUnknownEdges Current number of unknown edges.
987 /// \param UnknownEdge Set if E has not been visited before.
988 ///
989 /// \returns E's weight, if known. Otherwise, return 0.
990 uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
991  Edge *UnknownEdge) {
992  if (!VisitedEdges.count(E)) {
993  (*NumUnknownEdges)++;
994  *UnknownEdge = E;
995  return 0;
996  }
997 
998  return EdgeWeights[E];
999 }
1000 
1001 /// Propagate weights through incoming/outgoing edges.
1002 ///
1003 /// If the weight of a basic block is known, and there is only one edge
1004 /// with an unknown weight, we can calculate the weight of that edge.
1005 ///
1006 /// Similarly, if all the edges have a known count, we can calculate the
1007 /// count of the basic block, if needed.
1008 ///
1009 /// \param F Function to process.
1010 /// \param UpdateBlockCount Whether we should update basic block counts that
1011 /// has already been annotated.
1012 ///
1013 /// \returns True if new weights were assigned to edges or blocks.
1014 bool SampleProfileLoader::propagateThroughEdges(Function &F,
1015  bool UpdateBlockCount) {
1016  bool Changed = false;
1017  LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
1018  for (const auto &BI : F) {
1019  const BasicBlock *BB = &BI;
1020  const BasicBlock *EC = EquivalenceClass[BB];
1021 
1022  // Visit all the predecessor and successor edges to determine
1023  // which ones have a weight assigned already. Note that it doesn't
1024  // matter that we only keep track of a single unknown edge. The
1025  // only case we are interested in handling is when only a single
1026  // edge is unknown (see setEdgeOrBlockWeight).
1027  for (unsigned i = 0; i < 2; i++) {
1028  uint64_t TotalWeight = 0;
1029  unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
1030  Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
1031 
1032  if (i == 0) {
1033  // First, visit all predecessor edges.
1034  NumTotalEdges = Predecessors[BB].size();
1035  for (auto *Pred : Predecessors[BB]) {
1036  Edge E = std::make_pair(Pred, BB);
1037  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1038  if (E.first == E.second)
1039  SelfReferentialEdge = E;
1040  }
1041  if (NumTotalEdges == 1) {
1042  SingleEdge = std::make_pair(Predecessors[BB][0], BB);
1043  }
1044  } else {
1045  // On the second round, visit all successor edges.
1046  NumTotalEdges = Successors[BB].size();
1047  for (auto *Succ : Successors[BB]) {
1048  Edge E = std::make_pair(BB, Succ);
1049  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1050  }
1051  if (NumTotalEdges == 1) {
1052  SingleEdge = std::make_pair(BB, Successors[BB][0]);
1053  }
1054  }
1055 
1056  // After visiting all the edges, there are three cases that we
1057  // can handle immediately:
1058  //
1059  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
1060  // In this case, we simply check that the sum of all the edges
1061  // is the same as BB's weight. If not, we change BB's weight
1062  // to match. Additionally, if BB had not been visited before,
1063  // we mark it visited.
1064  //
1065  // - Only one edge is unknown and BB has already been visited.
1066  // In this case, we can compute the weight of the edge by
1067  // subtracting the total block weight from all the known
1068  // edge weights. If the edges weight more than BB, then the
1069  // edge of the last remaining edge is set to zero.
1070  //
1071  // - There exists a self-referential edge and the weight of BB is
1072  // known. In this case, this edge can be based on BB's weight.
1073  // We add up all the other known edges and set the weight on
1074  // the self-referential edge as we did in the previous case.
1075  //
1076  // In any other case, we must continue iterating. Eventually,
1077  // all edges will get a weight, or iteration will stop when
1078  // it reaches SampleProfileMaxPropagateIterations.
1079  if (NumUnknownEdges <= 1) {
1080  uint64_t &BBWeight = BlockWeights[EC];
1081  if (NumUnknownEdges == 0) {
1082  if (!VisitedBlocks.count(EC)) {
1083  // If we already know the weight of all edges, the weight of the
1084  // basic block can be computed. It should be no larger than the sum
1085  // of all edge weights.
1086  if (TotalWeight > BBWeight) {
1087  BBWeight = TotalWeight;
1088  Changed = true;
1089  LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
1090  << " known. Set weight for block: ";
1091  printBlockWeight(dbgs(), BB););
1092  }
1093  } else if (NumTotalEdges == 1 &&
1094  EdgeWeights[SingleEdge] < BlockWeights[EC]) {
1095  // If there is only one edge for the visited basic block, use the
1096  // block weight to adjust edge weight if edge weight is smaller.
1097  EdgeWeights[SingleEdge] = BlockWeights[EC];
1098  Changed = true;
1099  }
1100  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
1101  // If there is a single unknown edge and the block has been
1102  // visited, then we can compute E's weight.
1103  if (BBWeight >= TotalWeight)
1104  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
1105  else
1106  EdgeWeights[UnknownEdge] = 0;
1107  const BasicBlock *OtherEC;
1108  if (i == 0)
1109  OtherEC = EquivalenceClass[UnknownEdge.first];
1110  else
1111  OtherEC = EquivalenceClass[UnknownEdge.second];
1112  // Edge weights should never exceed the BB weights it connects.
1113  if (VisitedBlocks.count(OtherEC) &&
1114  EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
1115  EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
1116  VisitedEdges.insert(UnknownEdge);
1117  Changed = true;
1118  LLVM_DEBUG(dbgs() << "Set weight for edge: ";
1119  printEdgeWeight(dbgs(), UnknownEdge));
1120  }
1121  } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
1122  // If a block Weights 0, all its in/out edges should weight 0.
1123  if (i == 0) {
1124  for (auto *Pred : Predecessors[BB]) {
1125  Edge E = std::make_pair(Pred, BB);
1126  EdgeWeights[E] = 0;
1127  VisitedEdges.insert(E);
1128  }
1129  } else {
1130  for (auto *Succ : Successors[BB]) {
1131  Edge E = std::make_pair(BB, Succ);
1132  EdgeWeights[E] = 0;
1133  VisitedEdges.insert(E);
1134  }
1135  }
1136  } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
1137  uint64_t &BBWeight = BlockWeights[BB];
1138  // We have a self-referential edge and the weight of BB is known.
1139  if (BBWeight >= TotalWeight)
1140  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
1141  else
1142  EdgeWeights[SelfReferentialEdge] = 0;
1143  VisitedEdges.insert(SelfReferentialEdge);
1144  Changed = true;
1145  LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
1146  printEdgeWeight(dbgs(), SelfReferentialEdge));
1147  }
1148  if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
1149  BlockWeights[EC] = TotalWeight;
1150  VisitedBlocks.insert(EC);
1151  Changed = true;
1152  }
1153  }
1154  }
1155 
1156  return Changed;
1157 }
1158 
1159 /// Build in/out edge lists for each basic block in the CFG.
1160 ///
1161 /// We are interested in unique edges. If a block B1 has multiple
1162 /// edges to another block B2, we only add a single B1->B2 edge.
1163 void SampleProfileLoader::buildEdges(Function &F) {
1164  for (auto &BI : F) {
1165  BasicBlock *B1 = &BI;
1166 
1167  // Add predecessors for B1.
1169  if (!Predecessors[B1].empty())
1170  llvm_unreachable("Found a stale predecessors list in a basic block.");
1171  for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
1172  BasicBlock *B2 = *PI;
1173  if (Visited.insert(B2).second)
1174  Predecessors[B1].push_back(B2);
1175  }
1176 
1177  // Add successors for B1.
1178  Visited.clear();
1179  if (!Successors[B1].empty())
1180  llvm_unreachable("Found a stale successors list in a basic block.");
1181  for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
1182  BasicBlock *B2 = *SI;
1183  if (Visited.insert(B2).second)
1184  Successors[B1].push_back(B2);
1185  }
1186  }
1187 }
1188 
1189 /// Returns the sorted CallTargetMap \p M by count in descending order.
1191  const SampleRecord::CallTargetMap &M) {
1193  for (auto I = M.begin(); I != M.end(); ++I)
1194  R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
1195  llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
1196  if (L.Count == R.Count)
1197  return L.Value > R.Value;
1198  else
1199  return L.Count > R.Count;
1200  });
1201  return R;
1202 }
1203 
1204 /// Propagate weights into edges
1205 ///
1206 /// The following rules are applied to every block BB in the CFG:
1207 ///
1208 /// - If BB has a single predecessor/successor, then the weight
1209 /// of that edge is the weight of the block.
1210 ///
1211 /// - If all incoming or outgoing edges are known except one, and the
1212 /// weight of the block is already known, the weight of the unknown
1213 /// edge will be the weight of the block minus the sum of all the known
1214 /// edges. If the sum of all the known edges is larger than BB's weight,
1215 /// we set the unknown edge weight to zero.
1216 ///
1217 /// - If there is a self-referential edge, and the weight of the block is
1218 /// known, the weight for that edge is set to the weight of the block
1219 /// minus the weight of the other incoming edges to that block (if
1220 /// known).
1221 void SampleProfileLoader::propagateWeights(Function &F) {
1222  bool Changed = true;
1223  unsigned I = 0;
1224 
1225  // If BB weight is larger than its corresponding loop's header BB weight,
1226  // use the BB weight to replace the loop header BB weight.
1227  for (auto &BI : F) {
1228  BasicBlock *BB = &BI;
1229  Loop *L = LI->getLoopFor(BB);
1230  if (!L) {
1231  continue;
1232  }
1233  BasicBlock *Header = L->getHeader();
1234  if (Header && BlockWeights[BB] > BlockWeights[Header]) {
1235  BlockWeights[Header] = BlockWeights[BB];
1236  }
1237  }
1238 
1239  // Before propagation starts, build, for each block, a list of
1240  // unique predecessors and successors. This is necessary to handle
1241  // identical edges in multiway branches. Since we visit all blocks and all
1242  // edges of the CFG, it is cleaner to build these lists once at the start
1243  // of the pass.
1244  buildEdges(F);
1245 
1246  // Propagate until we converge or we go past the iteration limit.
1247  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1248  Changed = propagateThroughEdges(F, false);
1249  }
1250 
1251  // The first propagation propagates BB counts from annotated BBs to unknown
1252  // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
1253  // to propagate edge weights.
1254  VisitedEdges.clear();
1255  Changed = true;
1256  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1257  Changed = propagateThroughEdges(F, false);
1258  }
1259 
1260  // The 3rd propagation pass allows adjust annotated BB weights that are
1261  // obviously wrong.
1262  Changed = true;
1263  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1264  Changed = propagateThroughEdges(F, true);
1265  }
1266 
1267  // Generate MD_prof metadata for every branch instruction using the
1268  // edge weights computed during propagation.
1269  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1270  LLVMContext &Ctx = F.getContext();
1271  MDBuilder MDB(Ctx);
1272  for (auto &BI : F) {
1273  BasicBlock *BB = &BI;
1274 
1275  if (BlockWeights[BB]) {
1276  for (auto &I : BB->getInstList()) {
1277  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1278  continue;
1279  CallSite CS(&I);
1280  if (!CS.getCalledFunction()) {
1281  const DebugLoc &DLoc = I.getDebugLoc();
1282  if (!DLoc)
1283  continue;
1284  const DILocation *DIL = DLoc;
1285  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
1286  uint32_t Discriminator = DIL->getBaseDiscriminator();
1287 
1288  const FunctionSamples *FS = findFunctionSamples(I);
1289  if (!FS)
1290  continue;
1291  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
1292  if (!T || T.get().empty())
1293  continue;
1294  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1295  SortCallTargets(T.get());
1296  uint64_t Sum;
1297  findIndirectCallFunctionSamples(I, Sum);
1298  annotateValueSite(*I.getParent()->getParent()->getParent(), I,
1299  SortedCallTargets, Sum, IPVK_IndirectCallTarget,
1300  SortedCallTargets.size());
1301  } else if (!dyn_cast<IntrinsicInst>(&I)) {
1302  SmallVector<uint32_t, 1> Weights;
1303  Weights.push_back(BlockWeights[BB]);
1304  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1305  }
1306  }
1307  }
1308  Instruction *TI = BB->getTerminator();
1309  if (TI->getNumSuccessors() == 1)
1310  continue;
1311  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
1312  continue;
1313 
1314  DebugLoc BranchLoc = TI->getDebugLoc();
1315  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1316  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1317  : Twine("<UNKNOWN LOCATION>"))
1318  << ".\n");
1319  SmallVector<uint32_t, 4> Weights;
1320  uint32_t MaxWeight = 0;
1321  Instruction *MaxDestInst;
1322  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1323  BasicBlock *Succ = TI->getSuccessor(I);
1324  Edge E = std::make_pair(BB, Succ);
1325  uint64_t Weight = EdgeWeights[E];
1326  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1327  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1328  // if needed. Sample counts in profiles are 64-bit unsigned values,
1329  // but internally branch weights are expressed as 32-bit values.
1330  if (Weight > std::numeric_limits<uint32_t>::max()) {
1331  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1333  }
1334  // Weight is added by one to avoid propagation errors introduced by
1335  // 0 weights.
1336  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1337  if (Weight != 0) {
1338  if (Weight > MaxWeight) {
1339  MaxWeight = Weight;
1340  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1341  }
1342  }
1343  }
1344 
1345  uint64_t TempWeight;
1346  // Only set weights if there is at least one non-zero weight.
1347  // In any other case, let the analyzer set weights.
1348  // Do not set weights if the weights are present. In ThinLTO, the profile
1349  // annotation is done twice. If the first annotation already set the
1350  // weights, the second pass does not need to set it.
1351  if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
1352  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1354  MDB.createBranchWeights(Weights));
1355  ORE->emit([&]() {
1356  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1357  << "most popular destination for conditional branches at "
1358  << ore::NV("CondBranchesLoc", BranchLoc);
1359  });
1360  } else {
1361  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1362  }
1363  }
1364 }
1365 
1366 /// Get the line number for the function header.
1367 ///
1368 /// This looks up function \p F in the current compilation unit and
1369 /// retrieves the line number where the function is defined. This is
1370 /// line 0 for all the samples read from the profile file. Every line
1371 /// number is relative to this line.
1372 ///
1373 /// \param F Function object to query.
1374 ///
1375 /// \returns the line number where \p F is defined. If it returns 0,
1376 /// it means that there is no debug information available for \p F.
1377 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
1378  if (DISubprogram *S = F.getSubprogram())
1379  return S->getLine();
1380 
1381  if (NoWarnSampleUnused)
1382  return 0;
1383 
1384  // If the start of \p F is missing, emit a diagnostic to inform the user
1385  // about the missed opportunity.
1387  "No debug information found in function " + F.getName() +
1388  ": Function profile not used",
1389  DS_Warning));
1390  return 0;
1391 }
1392 
1393 void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
1394  DT.reset(new DominatorTree);
1395  DT->recalculate(F);
1396 
1397  PDT.reset(new PostDominatorTree(F));
1398 
1399  LI.reset(new LoopInfo);
1400  LI->analyze(*DT);
1401 }
1402 
1403 /// Generate branch weight metadata for all branches in \p F.
1404 ///
1405 /// Branch weights are computed out of instruction samples using a
1406 /// propagation heuristic. Propagation proceeds in 3 phases:
1407 ///
1408 /// 1- Assignment of block weights. All the basic blocks in the function
1409 /// are initial assigned the same weight as their most frequently
1410 /// executed instruction.
1411 ///
1412 /// 2- Creation of equivalence classes. Since samples may be missing from
1413 /// blocks, we can fill in the gaps by setting the weights of all the
1414 /// blocks in the same equivalence class to the same weight. To compute
1415 /// the concept of equivalence, we use dominance and loop information.
1416 /// Two blocks B1 and B2 are in the same equivalence class if B1
1417 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
1418 ///
1419 /// 3- Propagation of block weights into edges. This uses a simple
1420 /// propagation heuristic. The following rules are applied to every
1421 /// block BB in the CFG:
1422 ///
1423 /// - If BB has a single predecessor/successor, then the weight
1424 /// of that edge is the weight of the block.
1425 ///
1426 /// - If all the edges are known except one, and the weight of the
1427 /// block is already known, the weight of the unknown edge will
1428 /// be the weight of the block minus the sum of all the known
1429 /// edges. If the sum of all the known edges is larger than BB's weight,
1430 /// we set the unknown edge weight to zero.
1431 ///
1432 /// - If there is a self-referential edge, and the weight of the block is
1433 /// known, the weight for that edge is set to the weight of the block
1434 /// minus the weight of the other incoming edges to that block (if
1435 /// known).
1436 ///
1437 /// Since this propagation is not guaranteed to finalize for every CFG, we
1438 /// only allow it to proceed for a limited number of iterations (controlled
1439 /// by -sample-profile-max-propagate-iterations).
1440 ///
1441 /// FIXME: Try to replace this propagation heuristic with a scheme
1442 /// that is guaranteed to finalize. A work-list approach similar to
1443 /// the standard value propagation algorithm used by SSA-CCP might
1444 /// work here.
1445 ///
1446 /// Once all the branch weights are computed, we emit the MD_prof
1447 /// metadata on BB using the computed values for each of its branches.
1448 ///
1449 /// \param F The function to query.
1450 ///
1451 /// \returns true if \p F was modified. Returns false, otherwise.
1452 bool SampleProfileLoader::emitAnnotations(Function &F) {
1453  bool Changed = false;
1454 
1455  if (getFunctionLoc(F) == 0)
1456  return false;
1457 
1458  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1459  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1460 
1461  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1462  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1463 
1464  // Compute basic block weights.
1465  Changed |= computeBlockWeights(F);
1466 
1467  if (Changed) {
1468  // Add an entry count to the function using the samples gathered at the
1469  // function entry.
1470  // Sets the GUIDs that are inlined in the profiled binary. This is used
1471  // for ThinLink to make correct liveness analysis, and also make the IR
1472  // match the profiled binary before annotation.
1473  F.setEntryCount(
1474  ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
1475  &InlinedGUIDs);
1476 
1477  // Compute dominance and loop info needed for propagation.
1478  computeDominanceAndLoopInfo(F);
1479 
1480  // Find equivalence classes.
1481  findEquivalenceClasses(F);
1482 
1483  // Propagate weights to all edges.
1484  propagateWeights(F);
1485  }
1486 
1487  // If coverage checking was requested, compute it now.
1489  unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1490  unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1491  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1492  if (Coverage < SampleProfileRecordCoverage) {
1494  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1495  Twine(Used) + " of " + Twine(Total) + " available profile records (" +
1496  Twine(Coverage) + "%) were applied",
1497  DS_Warning));
1498  }
1499  }
1500 
1502  uint64_t Used = CoverageTracker.getTotalUsedSamples();
1503  uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1504  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1505  if (Coverage < SampleProfileSampleCoverage) {
1507  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1508  Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
1509  Twine(Coverage) + "%) were applied",
1510  DS_Warning));
1511  }
1512  }
1513  return Changed;
1514 }
1515 
1517 
1518 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1519  "Sample Profile loader", false, false)
1523 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1524  "Sample Profile loader", false, false)
1525 
1526 bool SampleProfileLoader::doInitialization(Module &M) {
1527  auto &Ctx = M.getContext();
1528  auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
1529  if (std::error_code EC = ReaderOrErr.getError()) {
1530  std::string Msg = "Could not open profile: " + EC.message();
1531  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1532  return false;
1533  }
1534  Reader = std::move(ReaderOrErr.get());
1535  Reader->collectFuncsToUse(M);
1536  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1537 
1538  if (!RemappingFilename.empty()) {
1539  // Apply profile remappings to the loaded profile data if requested.
1540  // For now, we only support remapping symbols encoded using the Itanium
1541  // C++ ABI's name mangling scheme.
1543  RemappingFilename, Ctx, std::move(Reader));
1544  if (std::error_code EC = ReaderOrErr.getError()) {
1545  std::string Msg = "Could not open profile remapping file: " + EC.message();
1546  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1547  return false;
1548  }
1549  Reader = std::move(ReaderOrErr.get());
1550  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1551  }
1552  return true;
1553 }
1554 
1556  return new SampleProfileLoaderLegacyPass();
1557 }
1558 
1560  return new SampleProfileLoaderLegacyPass(Name);
1561 }
1562 
1563 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1564  ProfileSummaryInfo *_PSI) {
1566  if (!ProfileIsValid)
1567  return false;
1568 
1569  PSI = _PSI;
1570  if (M.getProfileSummary() == nullptr)
1571  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
1572 
1573  // Compute the total number of samples collected in this profile.
1574  for (const auto &I : Reader->getProfiles())
1575  TotalCollectedSamples += I.second.getTotalSamples();
1576 
1577  // Populate the symbol map.
1578  for (const auto &N_F : M.getValueSymbolTable()) {
1579  StringRef OrigName = N_F.getKey();
1580  Function *F = dyn_cast<Function>(N_F.getValue());
1581  if (F == nullptr)
1582  continue;
1583  SymbolMap[OrigName] = F;
1584  auto pos = OrigName.find('.');
1585  if (pos != StringRef::npos) {
1586  StringRef NewName = OrigName.substr(0, pos);
1587  auto r = SymbolMap.insert(std::make_pair(NewName, F));
1588  // Failiing to insert means there is already an entry in SymbolMap,
1589  // thus there are multiple functions that are mapped to the same
1590  // stripped name. In this case of name conflicting, set the value
1591  // to nullptr to avoid confusion.
1592  if (!r.second)
1593  r.first->second = nullptr;
1594  }
1595  }
1596 
1597  bool retval = false;
1598  for (auto &F : M)
1599  if (!F.isDeclaration()) {
1600  clearFunctionData();
1601  retval |= runOnFunction(F, AM);
1602  }
1603  return retval;
1604 }
1605 
1606 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1607  ACT = &getAnalysis<AssumptionCacheTracker>();
1608  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1609  ProfileSummaryInfo *PSI =
1610  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1611  return SampleLoader.runOnModule(M, nullptr, PSI);
1612 }
1613 
1615 
1616  DILocation2SampleMap.clear();
1617  // By default the entry count is initialized to -1, which will be treated
1618  // conservatively by getEntryCount as the same as unknown (None). This is
1619  // to avoid newly added code to be treated as cold. If we have samples
1620  // this will be overwritten in emitAnnotations.
1621  // If ProfileSampleAccurate is true or F has profile-sample-accurate
1622  // attribute, initialize the entry count to 0 so callsites or functions
1623  // unsampled will be treated as cold.
1624  uint64_t initialEntryCount =
1625  (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
1626  ? 0
1627  : -1;
1628  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
1629  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1630  if (AM) {
1631  auto &FAM =
1633  .getManager();
1634  ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1635  } else {
1636  OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
1637  ORE = OwnedORE.get();
1638  }
1639  Samples = Reader->getSamplesFor(F);
1640  if (Samples && !Samples->empty())
1641  return emitAnnotations(F);
1642  return false;
1643 }
1644 
1646  ModuleAnalysisManager &AM) {
1648  AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1649 
1650  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
1651  return FAM.getResult<AssumptionAnalysis>(F);
1652  };
1653  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
1654  return FAM.getResult<TargetIRAnalysis>(F);
1655  };
1656 
1657  SampleProfileLoader SampleLoader(
1658  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1659  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1660  : ProfileRemappingFileName,
1661  IsThinLTOPreLink, GetAssumptionCache, GetTTI);
1662 
1663  SampleLoader.doInitialization(M);
1664 
1666  if (!SampleLoader.runOnModule(M, &AM, PSI))
1667  return PreservedAnalyses::all();
1668 
1669  return PreservedAnalyses::none();
1670 }
const NoneType None
Definition: None.h:23
uint64_t CallInst * C
const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const
Returns the FunctionSamplesMap at the given Loc.
Definition: SampleProf.h:283
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:489
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
Represents either an error or a value T.
Definition: ErrorOr.h:56
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:104
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
sample profile
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const ValueSymbolTable & getValueSymbolTable() const
Get the symbol table of global variable and function identifiers.
Definition: Module.h:564
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C)
Create a sample profile reader appropriate to the file format.
unsigned getLine() const
Definition: DebugLoc.cpp:25
ErrorOr< uint64_t > findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const
Return the number of samples collected at the given location.
Definition: SampleProf.h:256
A debug info location.
Definition: DebugLoc.h:33
F(f)
uint64_t getOrCompHotCountThreshold()
Returns HotCountThreshold if set.
InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const Module *M, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:382
static SmallVector< InstrProfValueData, 2 > SortCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
bool isHotCount(uint64_t C)
Returns true if count C is considered hot.
Represents the cost of inlining a function.
Definition: InlineCost.h:63
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Representation of the samples collected for a function.
Definition: SampleProf.h:216
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
amdgpu Simplify well known AMD library false Value Value const Twine & Name
Definition: BitVector.h:937
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:1362
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:243
Diagnostic information for optimization analysis remarks.
static StringRef getName(Value *V)
Metadata * getProfileSummary()
Returns profile summary metadata.
Definition: Module.cpp:539
StringRef getFilename() const
BlockT * getHeader() const
Definition: LoopInfo.h:99
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Subprogram description.
const Instruction * getFirstNonPHIOrDbgOrLifetime() const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic...
Definition: BasicBlock.cpp:203
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1339
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:597
Debug location.
ModulePass * createSampleProfileLoaderPass()
Instruction * promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
amdgpu Simplify well known AMD library false Value * Callee
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
static cl::opt< unsigned > SampleProfileRecordCoverage("sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of records in the input profile " "are matched to the IR."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
Core dominator tree base class.
Definition: LoopInfo.h:60
const BasicBlock & getEntryBlock() const
Definition: Function.h:639
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
static cl::opt< unsigned > SampleProfileMaxPropagateIterations("sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG."))
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1507
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:350
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:128
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Function::ProfileCount ProfileCount
Represent the analysis usage information of a pass.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:47
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
Used in the streaming interface as the general argument type.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:812
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Class to represent profile counts.
Definition: Function.h:260
size_t size() const
Definition: SmallVector.h:52
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1225
StringRef getFuncNameInModule(const Module *M) const
Return the original function name if it exists in Module M.
Definition: SampleProf.h:409
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:180
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName...
Definition: SampleProf.h:294
A function analysis which provides an AssumptionCache.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:845
Module.h This file contains the declarations for the Module class.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:164
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
ErrorOr< SampleRecord::CallTargetMap > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
Definition: SampleProf.h:269
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
StringMap - This is an unconventional map that is specialized for handling keys that are "strings"...
Definition: StringMap.h:219
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:211
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:476
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void setProfileSummary(Metadata *M)
Attach profile summary metadata to this module.
Definition: Module.cpp:535
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:132
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:310
#define DEBUG_TYPE
static const size_t npos
Definition: StringRef.h:50
iterator begin()
Definition: StringMap.h:314
Represents the relative location of an instruction.
Definition: SampleProf.h:117
static cl::opt< unsigned > SampleProfileSampleCoverage("sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of samples in the input profile " "are matched to the IR."))
static bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:213
Establish a view to a call site for examination.
Definition: CallSite.h:886
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
iterator end()
Definition: DenseMap.h:108
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
std::map< std::string, FunctionSamples > FunctionSamplesMap
Definition: SampleProf.h:208
StringRef getName() const
Return the function name.
Definition: SampleProf.h:406
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:205
Provides ErrorOr<T> smart pointer.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:106
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
Definition: SampleProf.h:320
sample Sample Profile loader
const CallsiteSampleMap & getCallsiteSamples() const
Return all the callsite samples collected in the body of the function.
Definition: SampleProf.h:353
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
print Print MemDeps of function
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:331
static cl::opt< bool > NoWarnSampleUnused("no-warn-sample-unused", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. "))
This header defines various interfaces for pass management in LLVM.
Diagnostic information for the sample profiler.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:297
#define LLVM_DEBUG(X)
Definition: Debug.h:122
The optimization diagnostic interface.
iterator end()
Definition: StringMap.h:317
This file provides the interface for the sampled PGO loader pass.
reference get()
Definition: ErrorOr.h:156
const BasicBlock * getParent() const
Definition: Instruction.h:66
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C, std::unique_ptr< SampleProfileReader > Underlying)
Create a remapped sample profile from the given remapping file and underlying samples.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1037