LLVM  9.0.0svn
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringMap.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
37 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CFG.h"
44 #include "llvm/IR/CallSite.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/MDBuilder.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/PassManager.h"
60 #include "llvm/Pass.h"
64 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/ErrorOr.h"
71 #include "llvm/Transforms/IPO.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <functional>
79 #include <limits>
80 #include <map>
81 #include <memory>
82 #include <string>
83 #include <system_error>
84 #include <utility>
85 #include <vector>
86 
87 using namespace llvm;
88 using namespace sampleprof;
90 #define DEBUG_TYPE "sample-profile"
91 
92 // Command line option to specify the file to read samples from. This is
93 // mainly used for debugging.
95  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
96  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
97 
98 // The named file contains a set of transformations that may have been applied
99 // to the symbol names between the program from which the sample data was
100 // collected and the current program's symbols.
102  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
103  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
104 
106  "sample-profile-max-propagate-iterations", cl::init(100),
107  cl::desc("Maximum number of iterations to go through when propagating "
108  "sample block/edge weights through the CFG."));
109 
111  "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
112  cl::desc("Emit a warning if less than N% of records in the input profile "
113  "are matched to the IR."));
114 
116  "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
117  cl::desc("Emit a warning if less than N% of samples in the input profile "
118  "are matched to the IR."));
119 
121  "no-warn-sample-unused", cl::init(false), cl::Hidden,
122  cl::desc("Use this option to turn off/on warnings about function with "
123  "samples but without debug information to use those samples. "));
124 
126  "profile-sample-accurate", cl::Hidden, cl::init(false),
127  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
128  "callsite and function as having 0 samples. Otherwise, treat "
129  "un-sampled callsites and functions conservatively as unknown. "));
130 
131 namespace {
132 
133 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
134 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
135 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
136 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
137 using BlockEdgeMap =
139 
140 class SampleCoverageTracker {
141 public:
142  SampleCoverageTracker() = default;
143 
144  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
145  uint32_t Discriminator, uint64_t Samples);
146  unsigned computeCoverage(unsigned Used, unsigned Total) const;
147  unsigned countUsedRecords(const FunctionSamples *FS,
148  ProfileSummaryInfo *PSI) const;
149  unsigned countBodyRecords(const FunctionSamples *FS,
150  ProfileSummaryInfo *PSI) const;
151  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
152  uint64_t countBodySamples(const FunctionSamples *FS,
153  ProfileSummaryInfo *PSI) const;
154 
155  void clear() {
156  SampleCoverage.clear();
157  TotalUsedSamples = 0;
158  }
159 
160 private:
161  using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
162  using FunctionSamplesCoverageMap =
164 
165  /// Coverage map for sampling records.
166  ///
167  /// This map keeps a record of sampling records that have been matched to
168  /// an IR instruction. This is used to detect some form of staleness in
169  /// profiles (see flag -sample-profile-check-coverage).
170  ///
171  /// Each entry in the map corresponds to a FunctionSamples instance. This is
172  /// another map that counts how many times the sample record at the
173  /// given location has been used.
174  FunctionSamplesCoverageMap SampleCoverage;
175 
176  /// Number of samples used from the profile.
177  ///
178  /// When a sampling record is used for the first time, the samples from
179  /// that record are added to this accumulator. Coverage is later computed
180  /// based on the total number of samples available in this function and
181  /// its callsites.
182  ///
183  /// Note that this accumulator tracks samples used from a single function
184  /// and all the inlined callsites. Strictly, we should have a map of counters
185  /// keyed by FunctionSamples pointers, but these stats are cleared after
186  /// every function, so we just need to keep a single counter.
187  uint64_t TotalUsedSamples = 0;
188 };
189 
190 /// Sample profile pass.
191 ///
192 /// This pass reads profile data from the file specified by
193 /// -sample-profile-file and annotates every affected function with the
194 /// profile information found in that file.
195 class SampleProfileLoader {
196 public:
197  SampleProfileLoader(
198  StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
199  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
200  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
201  : GetAC(std::move(GetAssumptionCache)),
202  GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
203  RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
204 
205  bool doInitialization(Module &M);
206  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
207  ProfileSummaryInfo *_PSI);
208 
209  void dump() { Reader->dump(); }
210 
211 protected:
213  unsigned getFunctionLoc(Function &F);
214  bool emitAnnotations(Function &F);
215  ErrorOr<uint64_t> getInstWeight(const Instruction &I);
216  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
217  const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
218  std::vector<const FunctionSamples *>
219  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
220  mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
221  const FunctionSamples *findFunctionSamples(const Instruction &I) const;
222  bool inlineCallInstruction(Instruction *I);
223  bool inlineHotFunctions(Function &F,
224  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
225  void printEdgeWeight(raw_ostream &OS, Edge E);
226  void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
227  void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
228  bool computeBlockWeights(Function &F);
229  void findEquivalenceClasses(Function &F);
230  template <bool IsPostDom>
231  void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
233 
234  void propagateWeights(Function &F);
235  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
236  void buildEdges(Function &F);
237  bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
238  void computeDominanceAndLoopInfo(Function &F);
239  void clearFunctionData();
240 
241  /// Map basic blocks to their computed weights.
242  ///
243  /// The weight of a basic block is defined to be the maximum
244  /// of all the instruction weights in that block.
245  BlockWeightMap BlockWeights;
246 
247  /// Map edges to their computed weights.
248  ///
249  /// Edge weights are computed by propagating basic block weights in
250  /// SampleProfile::propagateWeights.
251  EdgeWeightMap EdgeWeights;
252 
253  /// Set of visited blocks during propagation.
255 
256  /// Set of visited edges during propagation.
257  SmallSet<Edge, 32> VisitedEdges;
258 
259  /// Equivalence classes for block weights.
260  ///
261  /// Two blocks BB1 and BB2 are in the same equivalence class if they
262  /// dominate and post-dominate each other, and they are in the same loop
263  /// nest. When this happens, the two blocks are guaranteed to execute
264  /// the same number of times.
265  EquivalenceClassMap EquivalenceClass;
266 
267  /// Map from function name to Function *. Used to find the function from
268  /// the function name. If the function name contains suffix, additional
269  /// entry is added to map from the stripped name to the function if there
270  /// is one-to-one mapping.
272 
273  /// Dominance, post-dominance and loop information.
274  std::unique_ptr<DominatorTree> DT;
275  std::unique_ptr<PostDominatorTree> PDT;
276  std::unique_ptr<LoopInfo> LI;
277 
278  std::function<AssumptionCache &(Function &)> GetAC;
279  std::function<TargetTransformInfo &(Function &)> GetTTI;
280 
281  /// Predecessors for each basic block in the CFG.
282  BlockEdgeMap Predecessors;
283 
284  /// Successors for each basic block in the CFG.
285  BlockEdgeMap Successors;
286 
287  SampleCoverageTracker CoverageTracker;
288 
289  /// Profile reader object.
290  std::unique_ptr<SampleProfileReader> Reader;
291 
292  /// Samples collected for the body of this function.
293  FunctionSamples *Samples = nullptr;
294 
295  /// Name of the profile file to load.
296  std::string Filename;
297 
298  /// Name of the profile remapping file to load.
299  std::string RemappingFilename;
300 
301  /// Flag indicating whether the profile input loaded successfully.
302  bool ProfileIsValid = false;
303 
304  /// Flag indicating if the pass is invoked in ThinLTO compile phase.
305  ///
306  /// In this phase, in annotation, we should not promote indirect calls.
307  /// Instead, we will mark GUIDs that needs to be annotated to the function.
308  bool IsThinLTOPreLink;
309 
310  /// Profile Summary Info computed from sample profile.
311  ProfileSummaryInfo *PSI = nullptr;
312 
313  /// Total number of samples collected in this profile.
314  ///
315  /// This is the sum of all the samples collected in all the functions executed
316  /// at runtime.
317  uint64_t TotalCollectedSamples = 0;
318 
319  /// Optimization Remark Emitter used to emit diagnostic remarks.
320  OptimizationRemarkEmitter *ORE = nullptr;
321 
322  // Information recorded when we declined to inline a call site
323  // because we have determined it is too cold is accumulated for
324  // each callee function. Initially this is just the entry count.
325  struct NotInlinedProfileInfo {
326  uint64_t entryCount;
327  };
329 };
330 
331 class SampleProfileLoaderLegacyPass : public ModulePass {
332 public:
333  // Class identification, replacement for typeinfo
334  static char ID;
335 
336  SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
337  bool IsThinLTOPreLink = false)
338  : ModulePass(ID),
339  SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
340  [&](Function &F) -> AssumptionCache & {
341  return ACT->getAssumptionCache(F);
342  },
343  [&](Function &F) -> TargetTransformInfo & {
344  return TTIWP->getTTI(F);
345  }) {
348  }
349 
350  void dump() { SampleLoader.dump(); }
351 
352  bool doInitialization(Module &M) override {
353  return SampleLoader.doInitialization(M);
354  }
355 
356  StringRef getPassName() const override { return "Sample profile pass"; }
357  bool runOnModule(Module &M) override;
358 
359  void getAnalysisUsage(AnalysisUsage &AU) const override {
363  }
364 
365 private:
366  SampleProfileLoader SampleLoader;
367  AssumptionCacheTracker *ACT = nullptr;
368  TargetTransformInfoWrapperPass *TTIWP = nullptr;
369 };
370 
371 } // end anonymous namespace
372 
373 /// Return true if the given callsite is hot wrt to hot cutoff threshold.
374 ///
375 /// Functions that were inlined in the original binary will be represented
376 /// in the inline stack in the sample profile. If the profile shows that
377 /// the original inline decision was "good" (i.e., the callsite is executed
378 /// frequently), then we will recreate the inline decision and apply the
379 /// profile from the inlined callsite.
380 ///
381 /// To decide whether an inlined callsite is hot, we compare the callsite
382 /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
383 /// regarded as hot if the count is above the cutoff value.
384 static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
385  ProfileSummaryInfo *PSI) {
386  if (!CallsiteFS)
387  return false; // The callsite was not inlined in the original binary.
388 
389  assert(PSI && "PSI is expected to be non null");
390  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
391  return PSI->isHotCount(CallsiteTotalSamples);
392 }
393 
394 /// Mark as used the sample record for the given function samples at
395 /// (LineOffset, Discriminator).
396 ///
397 /// \returns true if this is the first time we mark the given record.
398 bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
399  uint32_t LineOffset,
400  uint32_t Discriminator,
401  uint64_t Samples) {
402  LineLocation Loc(LineOffset, Discriminator);
403  unsigned &Count = SampleCoverage[FS][Loc];
404  bool FirstTime = (++Count == 1);
405  if (FirstTime)
406  TotalUsedSamples += Samples;
407  return FirstTime;
408 }
409 
410 /// Return the number of sample records that were applied from this profile.
411 ///
412 /// This count does not include records from cold inlined callsites.
413 unsigned
414 SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
415  ProfileSummaryInfo *PSI) const {
416  auto I = SampleCoverage.find(FS);
417 
418  // The size of the coverage map for FS represents the number of records
419  // that were marked used at least once.
420  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
421 
422  // If there are inlined callsites in this function, count the samples found
423  // in the respective bodies. However, do not bother counting callees with 0
424  // total samples, these are callees that were never invoked at runtime.
425  for (const auto &I : FS->getCallsiteSamples())
426  for (const auto &J : I.second) {
427  const FunctionSamples *CalleeSamples = &J.second;
428  if (callsiteIsHot(CalleeSamples, PSI))
429  Count += countUsedRecords(CalleeSamples, PSI);
430  }
431 
432  return Count;
433 }
434 
435 /// Return the number of sample records in the body of this profile.
436 ///
437 /// This count does not include records from cold inlined callsites.
438 unsigned
439 SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
440  ProfileSummaryInfo *PSI) const {
441  unsigned Count = FS->getBodySamples().size();
442 
443  // Only count records in hot callsites.
444  for (const auto &I : FS->getCallsiteSamples())
445  for (const auto &J : I.second) {
446  const FunctionSamples *CalleeSamples = &J.second;
447  if (callsiteIsHot(CalleeSamples, PSI))
448  Count += countBodyRecords(CalleeSamples, PSI);
449  }
450 
451  return Count;
452 }
453 
454 /// Return the number of samples collected in the body of this profile.
455 ///
456 /// This count does not include samples from cold inlined callsites.
457 uint64_t
458 SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
459  ProfileSummaryInfo *PSI) const {
460  uint64_t Total = 0;
461  for (const auto &I : FS->getBodySamples())
462  Total += I.second.getSamples();
463 
464  // Only count samples in hot callsites.
465  for (const auto &I : FS->getCallsiteSamples())
466  for (const auto &J : I.second) {
467  const FunctionSamples *CalleeSamples = &J.second;
468  if (callsiteIsHot(CalleeSamples, PSI))
469  Total += countBodySamples(CalleeSamples, PSI);
470  }
471 
472  return Total;
473 }
474 
475 /// Return the fraction of sample records used in this profile.
476 ///
477 /// The returned value is an unsigned integer in the range 0-100 indicating
478 /// the percentage of sample records that were used while applying this
479 /// profile to the associated function.
480 unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
481  unsigned Total) const {
482  assert(Used <= Total &&
483  "number of used records cannot exceed the total number of records");
484  return Total > 0 ? Used * 100 / Total : 100;
485 }
486 
487 /// Clear all the per-function data used to load samples and propagate weights.
488 void SampleProfileLoader::clearFunctionData() {
489  BlockWeights.clear();
490  EdgeWeights.clear();
491  VisitedBlocks.clear();
492  VisitedEdges.clear();
493  EquivalenceClass.clear();
494  DT = nullptr;
495  PDT = nullptr;
496  LI = nullptr;
497  Predecessors.clear();
498  Successors.clear();
499  CoverageTracker.clear();
500 }
501 
502 #ifndef NDEBUG
503 /// Print the weight of edge \p E on stream \p OS.
504 ///
505 /// \param OS Stream to emit the output to.
506 /// \param E Edge to print.
507 void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
508  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
509  << "]: " << EdgeWeights[E] << "\n";
510 }
511 
512 /// Print the equivalence class of block \p BB on stream \p OS.
513 ///
514 /// \param OS Stream to emit the output to.
515 /// \param BB Block to print.
516 void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
517  const BasicBlock *BB) {
518  const BasicBlock *Equiv = EquivalenceClass[BB];
519  OS << "equivalence[" << BB->getName()
520  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
521 }
522 
523 /// Print the weight of block \p BB on stream \p OS.
524 ///
525 /// \param OS Stream to emit the output to.
526 /// \param BB Block to print.
527 void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
528  const BasicBlock *BB) const {
529  const auto &I = BlockWeights.find(BB);
530  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
531  OS << "weight[" << BB->getName() << "]: " << W << "\n";
532 }
533 #endif
534 
535 /// Get the weight for an instruction.
536 ///
537 /// The "weight" of an instruction \p Inst is the number of samples
538 /// collected on that instruction at runtime. To retrieve it, we
539 /// need to compute the line number of \p Inst relative to the start of its
540 /// function. We use HeaderLineno to compute the offset. We then
541 /// look up the samples collected for \p Inst using BodySamples.
542 ///
543 /// \param Inst Instruction to query.
544 ///
545 /// \returns the weight of \p Inst.
546 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
547  const DebugLoc &DLoc = Inst.getDebugLoc();
548  if (!DLoc)
549  return std::error_code();
550 
551  const FunctionSamples *FS = findFunctionSamples(Inst);
552  if (!FS)
553  return std::error_code();
554 
555  // Ignore all intrinsics, phinodes and branch instructions.
556  // Branch and phinodes instruction usually contains debug info from sources outside of
557  // the residing basic block, thus we ignore them during annotation.
558  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
559  return std::error_code();
560 
561  // If a direct call/invoke instruction is inlined in profile
562  // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
563  // it means that the inlined callsite has no sample, thus the call
564  // instruction should have 0 count.
565  if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
566  !ImmutableCallSite(&Inst).isIndirectCall() &&
567  findCalleeFunctionSamples(Inst))
568  return 0;
569 
570  const DILocation *DIL = DLoc;
571  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
572  uint32_t Discriminator = DIL->getBaseDiscriminator();
573  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
574  if (R) {
575  bool FirstMark =
576  CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
577  if (FirstMark) {
578  ORE->emit([&]() {
579  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
580  Remark << "Applied " << ore::NV("NumSamples", *R);
581  Remark << " samples from profile (offset: ";
582  Remark << ore::NV("LineOffset", LineOffset);
583  if (Discriminator) {
584  Remark << ".";
585  Remark << ore::NV("Discriminator", Discriminator);
586  }
587  Remark << ")";
588  return Remark;
589  });
590  }
591  LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "."
592  << DIL->getBaseDiscriminator() << ":" << Inst
593  << " (line offset: " << LineOffset << "."
594  << DIL->getBaseDiscriminator() << " - weight: " << R.get()
595  << ")\n");
596  }
597  return R;
598 }
599 
600 /// Compute the weight of a basic block.
601 ///
602 /// The weight of basic block \p BB is the maximum weight of all the
603 /// instructions in BB.
604 ///
605 /// \param BB The basic block to query.
606 ///
607 /// \returns the weight for \p BB.
608 ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
609  uint64_t Max = 0;
610  bool HasWeight = false;
611  for (auto &I : BB->getInstList()) {
612  const ErrorOr<uint64_t> &R = getInstWeight(I);
613  if (R) {
614  Max = std::max(Max, R.get());
615  HasWeight = true;
616  }
617  }
618  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
619 }
620 
621 /// Compute and store the weights of every basic block.
622 ///
623 /// This populates the BlockWeights map by computing
624 /// the weights of every basic block in the CFG.
625 ///
626 /// \param F The function to query.
627 bool SampleProfileLoader::computeBlockWeights(Function &F) {
628  bool Changed = false;
629  LLVM_DEBUG(dbgs() << "Block weights\n");
630  for (const auto &BB : F) {
631  ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
632  if (Weight) {
633  BlockWeights[&BB] = Weight.get();
634  VisitedBlocks.insert(&BB);
635  Changed = true;
636  }
637  LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
638  }
639 
640  return Changed;
641 }
642 
643 /// Get the FunctionSamples for a call instruction.
644 ///
645 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
646 /// instance in which that call instruction is calling to. It contains
647 /// all samples that resides in the inlined instance. We first find the
648 /// inlined instance in which the call instruction is from, then we
649 /// traverse its children to find the callsite with the matching
650 /// location.
651 ///
652 /// \param Inst Call/Invoke instruction to query.
653 ///
654 /// \returns The FunctionSamples pointer to the inlined instance.
655 const FunctionSamples *
656 SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
657  const DILocation *DIL = Inst.getDebugLoc();
658  if (!DIL) {
659  return nullptr;
660  }
661 
662  StringRef CalleeName;
663  if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
664  if (Function *Callee = CI->getCalledFunction())
665  CalleeName = Callee->getName();
666 
667  const FunctionSamples *FS = findFunctionSamples(Inst);
668  if (FS == nullptr)
669  return nullptr;
670 
672  DIL->getBaseDiscriminator()),
673  CalleeName);
674 }
675 
676 /// Returns a vector of FunctionSamples that are the indirect call targets
677 /// of \p Inst. The vector is sorted by the total number of samples. Stores
678 /// the total call count of the indirect call in \p Sum.
679 std::vector<const FunctionSamples *>
680 SampleProfileLoader::findIndirectCallFunctionSamples(
681  const Instruction &Inst, uint64_t &Sum) const {
682  const DILocation *DIL = Inst.getDebugLoc();
683  std::vector<const FunctionSamples *> R;
684 
685  if (!DIL) {
686  return R;
687  }
688 
689  const FunctionSamples *FS = findFunctionSamples(Inst);
690  if (FS == nullptr)
691  return R;
692 
693  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
694  uint32_t Discriminator = DIL->getBaseDiscriminator();
695 
696  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
697  Sum = 0;
698  if (T)
699  for (const auto &T_C : T.get())
700  Sum += T_C.second;
703  if (M->empty())
704  return R;
705  for (const auto &NameFS : *M) {
706  Sum += NameFS.second.getEntrySamples();
707  R.push_back(&NameFS.second);
708  }
709  llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
710  if (L->getEntrySamples() != R->getEntrySamples())
711  return L->getEntrySamples() > R->getEntrySamples();
712  return FunctionSamples::getGUID(L->getName()) <
714  });
715  }
716  return R;
717 }
718 
719 /// Get the FunctionSamples for an instruction.
720 ///
721 /// The FunctionSamples of an instruction \p Inst is the inlined instance
722 /// in which that instruction is coming from. We traverse the inline stack
723 /// of that instruction, and match it with the tree nodes in the profile.
724 ///
725 /// \param Inst Instruction to query.
726 ///
727 /// \returns the FunctionSamples pointer to the inlined instance.
728 const FunctionSamples *
729 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
730  const DILocation *DIL = Inst.getDebugLoc();
731  if (!DIL)
732  return Samples;
733 
734  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
735  if (it.second)
736  it.first->second = Samples->findFunctionSamples(DIL);
737  return it.first->second;
738 }
739 
740 bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
741  assert(isa<CallInst>(I) || isa<InvokeInst>(I));
742  CallSite CS(I);
743  Function *CalledFunction = CS.getCalledFunction();
744  assert(CalledFunction);
745  DebugLoc DLoc = I->getDebugLoc();
746  BasicBlock *BB = I->getParent();
747  InlineParams Params = getInlineParams();
748  Params.ComputeFullInlineCost = true;
749  // Checks if there is anything in the reachable portion of the callee at
750  // this callsite that makes this inlining potentially illegal. Need to
751  // set ComputeFullInlineCost, otherwise getInlineCost may return early
752  // when cost exceeds threshold without checking all IRs in the callee.
753  // The acutal cost does not matter because we only checks isNever() to
754  // see if it is legal to inline the callsite.
755  InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
756  None, nullptr, nullptr);
757  if (Cost.isNever()) {
758  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
759  << "incompatible inlining");
760  return false;
761  }
762  InlineFunctionInfo IFI(nullptr, &GetAC);
763  if (InlineFunction(CS, IFI)) {
764  // The call to InlineFunction erases I, so we can't pass it here.
765  ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
766  << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
767  << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
768  return true;
769  }
770  return false;
771 }
772 
773 /// Iteratively inline hot callsites of a function.
774 ///
775 /// Iteratively traverse all callsites of the function \p F, and find if
776 /// the corresponding inlined instance exists and is hot in profile. If
777 /// it is hot enough, inline the callsites and adds new callsites of the
778 /// callee into the caller. If the call is an indirect call, first promote
779 /// it to direct call. Each indirect call is limited with a single target.
780 ///
781 /// \param F function to perform iterative inlining.
782 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
783 /// inlined in the profiled binary.
784 ///
785 /// \returns True if there is any inline happened.
786 bool SampleProfileLoader::inlineHotFunctions(
787  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
788  DenseSet<Instruction *> PromotedInsns;
789 
790  DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
791  bool Changed = false;
792  while (true) {
793  bool LocalChanged = false;
795  for (auto &BB : F) {
796  bool Hot = false;
798  for (auto &I : BB.getInstList()) {
799  const FunctionSamples *FS = nullptr;
800  if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
801  !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
802  Candidates.push_back(&I);
803  if (FS->getEntrySamples() > 0)
804  localNotInlinedCallSites.try_emplace(&I, FS);
805  if (callsiteIsHot(FS, PSI))
806  Hot = true;
807  }
808  }
809  if (Hot) {
810  CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
811  }
812  }
813  for (auto I : CIS) {
814  Function *CalledFunction = CallSite(I).getCalledFunction();
815  // Do not inline recursive calls.
816  if (CalledFunction == &F)
817  continue;
818  if (CallSite(I).isIndirectCall()) {
819  if (PromotedInsns.count(I))
820  continue;
821  uint64_t Sum;
822  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
823  if (IsThinLTOPreLink) {
824  FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
826  continue;
827  }
828  auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
829  // If it is a recursive call, we do not inline it as it could bloat
830  // the code exponentially. There is way to better handle this, e.g.
831  // clone the caller first, and inline the cloned caller if it is
832  // recursive. As llvm does not inline recursive calls, we will
833  // simply ignore it instead of handling it explicitly.
834  if (CalleeFunctionName == F.getName())
835  continue;
836 
837  const char *Reason = "Callee function not available";
838  auto R = SymbolMap.find(CalleeFunctionName);
839  if (R != SymbolMap.end() && R->getValue() &&
840  !R->getValue()->isDeclaration() &&
841  R->getValue()->getSubprogram() &&
842  isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
843  uint64_t C = FS->getEntrySamples();
844  Instruction *DI =
845  pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
846  Sum -= C;
847  PromotedInsns.insert(I);
848  // If profile mismatches, we should not attempt to inline DI.
849  if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
850  inlineCallInstruction(DI)) {
851  localNotInlinedCallSites.erase(I);
852  LocalChanged = true;
853  }
854  } else {
855  LLVM_DEBUG(dbgs()
856  << "\nFailed to promote indirect call to "
857  << CalleeFunctionName << " because " << Reason << "\n");
858  }
859  }
860  } else if (CalledFunction && CalledFunction->getSubprogram() &&
861  !CalledFunction->isDeclaration()) {
862  if (inlineCallInstruction(I)) {
863  localNotInlinedCallSites.erase(I);
864  LocalChanged = true;
865  }
866  } else if (IsThinLTOPreLink) {
867  findCalleeFunctionSamples(*I)->findInlinedFunctions(
868  InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
869  }
870  }
871  if (LocalChanged) {
872  Changed = true;
873  } else {
874  break;
875  }
876  }
877 
878  // Accumulate not inlined callsite information into notInlinedSamples
879  for (const auto &Pair : localNotInlinedCallSites) {
880  Instruction *I = Pair.getFirst();
882  if (!Callee || Callee->isDeclaration())
883  continue;
884  const FunctionSamples *FS = Pair.getSecond();
885  auto pair =
886  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
887  pair.first->second.entryCount += FS->getEntrySamples();
888  }
889  return Changed;
890 }
891 
892 /// Find equivalence classes for the given block.
893 ///
894 /// This finds all the blocks that are guaranteed to execute the same
895 /// number of times as \p BB1. To do this, it traverses all the
896 /// descendants of \p BB1 in the dominator or post-dominator tree.
897 ///
898 /// A block BB2 will be in the same equivalence class as \p BB1 if
899 /// the following holds:
900 ///
901 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
902 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
903 /// dominate BB1 in the post-dominator tree.
904 ///
905 /// 2- Both BB2 and \p BB1 must be in the same loop.
906 ///
907 /// For every block BB2 that meets those two requirements, we set BB2's
908 /// equivalence class to \p BB1.
909 ///
910 /// \param BB1 Block to check.
911 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
912 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
913 /// with blocks from \p BB1's dominator tree, then
914 /// this is the post-dominator tree, and vice versa.
915 template <bool IsPostDom>
916 void SampleProfileLoader::findEquivalencesFor(
917  BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
919  const BasicBlock *EC = EquivalenceClass[BB1];
920  uint64_t Weight = BlockWeights[EC];
921  for (const auto *BB2 : Descendants) {
922  bool IsDomParent = DomTree->dominates(BB2, BB1);
923  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
924  if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
925  EquivalenceClass[BB2] = EC;
926  // If BB2 is visited, then the entire EC should be marked as visited.
927  if (VisitedBlocks.count(BB2)) {
928  VisitedBlocks.insert(EC);
929  }
930 
931  // If BB2 is heavier than BB1, make BB2 have the same weight
932  // as BB1.
933  //
934  // Note that we don't worry about the opposite situation here
935  // (when BB2 is lighter than BB1). We will deal with this
936  // during the propagation phase. Right now, we just want to
937  // make sure that BB1 has the largest weight of all the
938  // members of its equivalence set.
939  Weight = std::max(Weight, BlockWeights[BB2]);
940  }
941  }
942  if (EC == &EC->getParent()->getEntryBlock()) {
943  BlockWeights[EC] = Samples->getHeadSamples() + 1;
944  } else {
945  BlockWeights[EC] = Weight;
946  }
947 }
948 
949 /// Find equivalence classes.
950 ///
951 /// Since samples may be missing from blocks, we can fill in the gaps by setting
952 /// the weights of all the blocks in the same equivalence class to the same
953 /// weight. To compute the concept of equivalence, we use dominance and loop
954 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
955 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
956 ///
957 /// \param F The function to query.
958 void SampleProfileLoader::findEquivalenceClasses(Function &F) {
959  SmallVector<BasicBlock *, 8> DominatedBBs;
960  LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
961  // Find equivalence sets based on dominance and post-dominance information.
962  for (auto &BB : F) {
963  BasicBlock *BB1 = &BB;
964 
965  // Compute BB1's equivalence class once.
966  if (EquivalenceClass.count(BB1)) {
967  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
968  continue;
969  }
970 
971  // By default, blocks are in their own equivalence class.
972  EquivalenceClass[BB1] = BB1;
973 
974  // Traverse all the blocks dominated by BB1. We are looking for
975  // every basic block BB2 such that:
976  //
977  // 1- BB1 dominates BB2.
978  // 2- BB2 post-dominates BB1.
979  // 3- BB1 and BB2 are in the same loop nest.
980  //
981  // If all those conditions hold, it means that BB2 is executed
982  // as many times as BB1, so they are placed in the same equivalence
983  // class by making BB2's equivalence class be BB1.
984  DominatedBBs.clear();
985  DT->getDescendants(BB1, DominatedBBs);
986  findEquivalencesFor(BB1, DominatedBBs, PDT.get());
987 
988  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
989  }
990 
991  // Assign weights to equivalence classes.
992  //
993  // All the basic blocks in the same equivalence class will execute
994  // the same number of times. Since we know that the head block in
995  // each equivalence class has the largest weight, assign that weight
996  // to all the blocks in that equivalence class.
997  LLVM_DEBUG(
998  dbgs() << "\nAssign the same weight to all blocks in the same class\n");
999  for (auto &BI : F) {
1000  const BasicBlock *BB = &BI;
1001  const BasicBlock *EquivBB = EquivalenceClass[BB];
1002  if (BB != EquivBB)
1003  BlockWeights[BB] = BlockWeights[EquivBB];
1004  LLVM_DEBUG(printBlockWeight(dbgs(), BB));
1005  }
1006 }
1007 
1008 /// Visit the given edge to decide if it has a valid weight.
1009 ///
1010 /// If \p E has not been visited before, we copy to \p UnknownEdge
1011 /// and increment the count of unknown edges.
1012 ///
1013 /// \param E Edge to visit.
1014 /// \param NumUnknownEdges Current number of unknown edges.
1015 /// \param UnknownEdge Set if E has not been visited before.
1016 ///
1017 /// \returns E's weight, if known. Otherwise, return 0.
1018 uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
1019  Edge *UnknownEdge) {
1020  if (!VisitedEdges.count(E)) {
1021  (*NumUnknownEdges)++;
1022  *UnknownEdge = E;
1023  return 0;
1024  }
1025 
1026  return EdgeWeights[E];
1027 }
1028 
1029 /// Propagate weights through incoming/outgoing edges.
1030 ///
1031 /// If the weight of a basic block is known, and there is only one edge
1032 /// with an unknown weight, we can calculate the weight of that edge.
1033 ///
1034 /// Similarly, if all the edges have a known count, we can calculate the
1035 /// count of the basic block, if needed.
1036 ///
1037 /// \param F Function to process.
1038 /// \param UpdateBlockCount Whether we should update basic block counts that
1039 /// has already been annotated.
1040 ///
1041 /// \returns True if new weights were assigned to edges or blocks.
1042 bool SampleProfileLoader::propagateThroughEdges(Function &F,
1043  bool UpdateBlockCount) {
1044  bool Changed = false;
1045  LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
1046  for (const auto &BI : F) {
1047  const BasicBlock *BB = &BI;
1048  const BasicBlock *EC = EquivalenceClass[BB];
1049 
1050  // Visit all the predecessor and successor edges to determine
1051  // which ones have a weight assigned already. Note that it doesn't
1052  // matter that we only keep track of a single unknown edge. The
1053  // only case we are interested in handling is when only a single
1054  // edge is unknown (see setEdgeOrBlockWeight).
1055  for (unsigned i = 0; i < 2; i++) {
1056  uint64_t TotalWeight = 0;
1057  unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
1058  Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
1059 
1060  if (i == 0) {
1061  // First, visit all predecessor edges.
1062  NumTotalEdges = Predecessors[BB].size();
1063  for (auto *Pred : Predecessors[BB]) {
1064  Edge E = std::make_pair(Pred, BB);
1065  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1066  if (E.first == E.second)
1067  SelfReferentialEdge = E;
1068  }
1069  if (NumTotalEdges == 1) {
1070  SingleEdge = std::make_pair(Predecessors[BB][0], BB);
1071  }
1072  } else {
1073  // On the second round, visit all successor edges.
1074  NumTotalEdges = Successors[BB].size();
1075  for (auto *Succ : Successors[BB]) {
1076  Edge E = std::make_pair(BB, Succ);
1077  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1078  }
1079  if (NumTotalEdges == 1) {
1080  SingleEdge = std::make_pair(BB, Successors[BB][0]);
1081  }
1082  }
1083 
1084  // After visiting all the edges, there are three cases that we
1085  // can handle immediately:
1086  //
1087  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
1088  // In this case, we simply check that the sum of all the edges
1089  // is the same as BB's weight. If not, we change BB's weight
1090  // to match. Additionally, if BB had not been visited before,
1091  // we mark it visited.
1092  //
1093  // - Only one edge is unknown and BB has already been visited.
1094  // In this case, we can compute the weight of the edge by
1095  // subtracting the total block weight from all the known
1096  // edge weights. If the edges weight more than BB, then the
1097  // edge of the last remaining edge is set to zero.
1098  //
1099  // - There exists a self-referential edge and the weight of BB is
1100  // known. In this case, this edge can be based on BB's weight.
1101  // We add up all the other known edges and set the weight on
1102  // the self-referential edge as we did in the previous case.
1103  //
1104  // In any other case, we must continue iterating. Eventually,
1105  // all edges will get a weight, or iteration will stop when
1106  // it reaches SampleProfileMaxPropagateIterations.
1107  if (NumUnknownEdges <= 1) {
1108  uint64_t &BBWeight = BlockWeights[EC];
1109  if (NumUnknownEdges == 0) {
1110  if (!VisitedBlocks.count(EC)) {
1111  // If we already know the weight of all edges, the weight of the
1112  // basic block can be computed. It should be no larger than the sum
1113  // of all edge weights.
1114  if (TotalWeight > BBWeight) {
1115  BBWeight = TotalWeight;
1116  Changed = true;
1117  LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
1118  << " known. Set weight for block: ";
1119  printBlockWeight(dbgs(), BB););
1120  }
1121  } else if (NumTotalEdges == 1 &&
1122  EdgeWeights[SingleEdge] < BlockWeights[EC]) {
1123  // If there is only one edge for the visited basic block, use the
1124  // block weight to adjust edge weight if edge weight is smaller.
1125  EdgeWeights[SingleEdge] = BlockWeights[EC];
1126  Changed = true;
1127  }
1128  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
1129  // If there is a single unknown edge and the block has been
1130  // visited, then we can compute E's weight.
1131  if (BBWeight >= TotalWeight)
1132  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
1133  else
1134  EdgeWeights[UnknownEdge] = 0;
1135  const BasicBlock *OtherEC;
1136  if (i == 0)
1137  OtherEC = EquivalenceClass[UnknownEdge.first];
1138  else
1139  OtherEC = EquivalenceClass[UnknownEdge.second];
1140  // Edge weights should never exceed the BB weights it connects.
1141  if (VisitedBlocks.count(OtherEC) &&
1142  EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
1143  EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
1144  VisitedEdges.insert(UnknownEdge);
1145  Changed = true;
1146  LLVM_DEBUG(dbgs() << "Set weight for edge: ";
1147  printEdgeWeight(dbgs(), UnknownEdge));
1148  }
1149  } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
1150  // If a block Weights 0, all its in/out edges should weight 0.
1151  if (i == 0) {
1152  for (auto *Pred : Predecessors[BB]) {
1153  Edge E = std::make_pair(Pred, BB);
1154  EdgeWeights[E] = 0;
1155  VisitedEdges.insert(E);
1156  }
1157  } else {
1158  for (auto *Succ : Successors[BB]) {
1159  Edge E = std::make_pair(BB, Succ);
1160  EdgeWeights[E] = 0;
1161  VisitedEdges.insert(E);
1162  }
1163  }
1164  } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
1165  uint64_t &BBWeight = BlockWeights[BB];
1166  // We have a self-referential edge and the weight of BB is known.
1167  if (BBWeight >= TotalWeight)
1168  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
1169  else
1170  EdgeWeights[SelfReferentialEdge] = 0;
1171  VisitedEdges.insert(SelfReferentialEdge);
1172  Changed = true;
1173  LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
1174  printEdgeWeight(dbgs(), SelfReferentialEdge));
1175  }
1176  if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
1177  BlockWeights[EC] = TotalWeight;
1178  VisitedBlocks.insert(EC);
1179  Changed = true;
1180  }
1181  }
1182  }
1183 
1184  return Changed;
1185 }
1186 
1187 /// Build in/out edge lists for each basic block in the CFG.
1188 ///
1189 /// We are interested in unique edges. If a block B1 has multiple
1190 /// edges to another block B2, we only add a single B1->B2 edge.
1191 void SampleProfileLoader::buildEdges(Function &F) {
1192  for (auto &BI : F) {
1193  BasicBlock *B1 = &BI;
1194 
1195  // Add predecessors for B1.
1197  if (!Predecessors[B1].empty())
1198  llvm_unreachable("Found a stale predecessors list in a basic block.");
1199  for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
1200  BasicBlock *B2 = *PI;
1201  if (Visited.insert(B2).second)
1202  Predecessors[B1].push_back(B2);
1203  }
1204 
1205  // Add successors for B1.
1206  Visited.clear();
1207  if (!Successors[B1].empty())
1208  llvm_unreachable("Found a stale successors list in a basic block.");
1209  for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
1210  BasicBlock *B2 = *SI;
1211  if (Visited.insert(B2).second)
1212  Successors[B1].push_back(B2);
1213  }
1214  }
1215 }
1216 
1217 /// Returns the sorted CallTargetMap \p M by count in descending order.
1219  const SampleRecord::CallTargetMap &M) {
1221  for (auto I = M.begin(); I != M.end(); ++I)
1222  R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
1223  llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
1224  if (L.Count == R.Count)
1225  return L.Value > R.Value;
1226  else
1227  return L.Count > R.Count;
1228  });
1229  return R;
1230 }
1231 
1232 /// Propagate weights into edges
1233 ///
1234 /// The following rules are applied to every block BB in the CFG:
1235 ///
1236 /// - If BB has a single predecessor/successor, then the weight
1237 /// of that edge is the weight of the block.
1238 ///
1239 /// - If all incoming or outgoing edges are known except one, and the
1240 /// weight of the block is already known, the weight of the unknown
1241 /// edge will be the weight of the block minus the sum of all the known
1242 /// edges. If the sum of all the known edges is larger than BB's weight,
1243 /// we set the unknown edge weight to zero.
1244 ///
1245 /// - If there is a self-referential edge, and the weight of the block is
1246 /// known, the weight for that edge is set to the weight of the block
1247 /// minus the weight of the other incoming edges to that block (if
1248 /// known).
1249 void SampleProfileLoader::propagateWeights(Function &F) {
1250  bool Changed = true;
1251  unsigned I = 0;
1252 
1253  // If BB weight is larger than its corresponding loop's header BB weight,
1254  // use the BB weight to replace the loop header BB weight.
1255  for (auto &BI : F) {
1256  BasicBlock *BB = &BI;
1257  Loop *L = LI->getLoopFor(BB);
1258  if (!L) {
1259  continue;
1260  }
1261  BasicBlock *Header = L->getHeader();
1262  if (Header && BlockWeights[BB] > BlockWeights[Header]) {
1263  BlockWeights[Header] = BlockWeights[BB];
1264  }
1265  }
1266 
1267  // Before propagation starts, build, for each block, a list of
1268  // unique predecessors and successors. This is necessary to handle
1269  // identical edges in multiway branches. Since we visit all blocks and all
1270  // edges of the CFG, it is cleaner to build these lists once at the start
1271  // of the pass.
1272  buildEdges(F);
1273 
1274  // Propagate until we converge or we go past the iteration limit.
1275  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1276  Changed = propagateThroughEdges(F, false);
1277  }
1278 
1279  // The first propagation propagates BB counts from annotated BBs to unknown
1280  // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
1281  // to propagate edge weights.
1282  VisitedEdges.clear();
1283  Changed = true;
1284  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1285  Changed = propagateThroughEdges(F, false);
1286  }
1287 
1288  // The 3rd propagation pass allows adjust annotated BB weights that are
1289  // obviously wrong.
1290  Changed = true;
1291  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1292  Changed = propagateThroughEdges(F, true);
1293  }
1294 
1295  // Generate MD_prof metadata for every branch instruction using the
1296  // edge weights computed during propagation.
1297  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1298  LLVMContext &Ctx = F.getContext();
1299  MDBuilder MDB(Ctx);
1300  for (auto &BI : F) {
1301  BasicBlock *BB = &BI;
1302 
1303  if (BlockWeights[BB]) {
1304  for (auto &I : BB->getInstList()) {
1305  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1306  continue;
1307  CallSite CS(&I);
1308  if (!CS.getCalledFunction()) {
1309  const DebugLoc &DLoc = I.getDebugLoc();
1310  if (!DLoc)
1311  continue;
1312  const DILocation *DIL = DLoc;
1313  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
1314  uint32_t Discriminator = DIL->getBaseDiscriminator();
1315 
1316  const FunctionSamples *FS = findFunctionSamples(I);
1317  if (!FS)
1318  continue;
1319  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
1320  if (!T || T.get().empty())
1321  continue;
1322  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1323  SortCallTargets(T.get());
1324  uint64_t Sum;
1325  findIndirectCallFunctionSamples(I, Sum);
1326  annotateValueSite(*I.getParent()->getParent()->getParent(), I,
1327  SortedCallTargets, Sum, IPVK_IndirectCallTarget,
1328  SortedCallTargets.size());
1329  } else if (!dyn_cast<IntrinsicInst>(&I)) {
1330  I.setMetadata(LLVMContext::MD_prof,
1331  MDB.createBranchWeights(
1332  {static_cast<uint32_t>(BlockWeights[BB])}));
1333  }
1334  }
1335  }
1336  Instruction *TI = BB->getTerminator();
1337  if (TI->getNumSuccessors() == 1)
1338  continue;
1339  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
1340  continue;
1341 
1342  DebugLoc BranchLoc = TI->getDebugLoc();
1343  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1344  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1345  : Twine("<UNKNOWN LOCATION>"))
1346  << ".\n");
1347  SmallVector<uint32_t, 4> Weights;
1348  uint32_t MaxWeight = 0;
1349  Instruction *MaxDestInst;
1350  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1351  BasicBlock *Succ = TI->getSuccessor(I);
1352  Edge E = std::make_pair(BB, Succ);
1353  uint64_t Weight = EdgeWeights[E];
1354  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1355  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1356  // if needed. Sample counts in profiles are 64-bit unsigned values,
1357  // but internally branch weights are expressed as 32-bit values.
1358  if (Weight > std::numeric_limits<uint32_t>::max()) {
1359  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1361  }
1362  // Weight is added by one to avoid propagation errors introduced by
1363  // 0 weights.
1364  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1365  if (Weight != 0) {
1366  if (Weight > MaxWeight) {
1367  MaxWeight = Weight;
1368  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1369  }
1370  }
1371  }
1372 
1373  uint64_t TempWeight;
1374  // Only set weights if there is at least one non-zero weight.
1375  // In any other case, let the analyzer set weights.
1376  // Do not set weights if the weights are present. In ThinLTO, the profile
1377  // annotation is done twice. If the first annotation already set the
1378  // weights, the second pass does not need to set it.
1379  if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
1380  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1382  MDB.createBranchWeights(Weights));
1383  ORE->emit([&]() {
1384  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1385  << "most popular destination for conditional branches at "
1386  << ore::NV("CondBranchesLoc", BranchLoc);
1387  });
1388  } else {
1389  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1390  }
1391  }
1392 }
1393 
1394 /// Get the line number for the function header.
1395 ///
1396 /// This looks up function \p F in the current compilation unit and
1397 /// retrieves the line number where the function is defined. This is
1398 /// line 0 for all the samples read from the profile file. Every line
1399 /// number is relative to this line.
1400 ///
1401 /// \param F Function object to query.
1402 ///
1403 /// \returns the line number where \p F is defined. If it returns 0,
1404 /// it means that there is no debug information available for \p F.
1405 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
1406  if (DISubprogram *S = F.getSubprogram())
1407  return S->getLine();
1408 
1409  if (NoWarnSampleUnused)
1410  return 0;
1411 
1412  // If the start of \p F is missing, emit a diagnostic to inform the user
1413  // about the missed opportunity.
1415  "No debug information found in function " + F.getName() +
1416  ": Function profile not used",
1417  DS_Warning));
1418  return 0;
1419 }
1420 
1421 void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
1422  DT.reset(new DominatorTree);
1423  DT->recalculate(F);
1424 
1425  PDT.reset(new PostDominatorTree(F));
1426 
1427  LI.reset(new LoopInfo);
1428  LI->analyze(*DT);
1429 }
1430 
1431 /// Generate branch weight metadata for all branches in \p F.
1432 ///
1433 /// Branch weights are computed out of instruction samples using a
1434 /// propagation heuristic. Propagation proceeds in 3 phases:
1435 ///
1436 /// 1- Assignment of block weights. All the basic blocks in the function
1437 /// are initial assigned the same weight as their most frequently
1438 /// executed instruction.
1439 ///
1440 /// 2- Creation of equivalence classes. Since samples may be missing from
1441 /// blocks, we can fill in the gaps by setting the weights of all the
1442 /// blocks in the same equivalence class to the same weight. To compute
1443 /// the concept of equivalence, we use dominance and loop information.
1444 /// Two blocks B1 and B2 are in the same equivalence class if B1
1445 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
1446 ///
1447 /// 3- Propagation of block weights into edges. This uses a simple
1448 /// propagation heuristic. The following rules are applied to every
1449 /// block BB in the CFG:
1450 ///
1451 /// - If BB has a single predecessor/successor, then the weight
1452 /// of that edge is the weight of the block.
1453 ///
1454 /// - If all the edges are known except one, and the weight of the
1455 /// block is already known, the weight of the unknown edge will
1456 /// be the weight of the block minus the sum of all the known
1457 /// edges. If the sum of all the known edges is larger than BB's weight,
1458 /// we set the unknown edge weight to zero.
1459 ///
1460 /// - If there is a self-referential edge, and the weight of the block is
1461 /// known, the weight for that edge is set to the weight of the block
1462 /// minus the weight of the other incoming edges to that block (if
1463 /// known).
1464 ///
1465 /// Since this propagation is not guaranteed to finalize for every CFG, we
1466 /// only allow it to proceed for a limited number of iterations (controlled
1467 /// by -sample-profile-max-propagate-iterations).
1468 ///
1469 /// FIXME: Try to replace this propagation heuristic with a scheme
1470 /// that is guaranteed to finalize. A work-list approach similar to
1471 /// the standard value propagation algorithm used by SSA-CCP might
1472 /// work here.
1473 ///
1474 /// Once all the branch weights are computed, we emit the MD_prof
1475 /// metadata on BB using the computed values for each of its branches.
1476 ///
1477 /// \param F The function to query.
1478 ///
1479 /// \returns true if \p F was modified. Returns false, otherwise.
1480 bool SampleProfileLoader::emitAnnotations(Function &F) {
1481  bool Changed = false;
1482 
1483  if (getFunctionLoc(F) == 0)
1484  return false;
1485 
1486  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1487  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1488 
1489  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1490  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1491 
1492  // Compute basic block weights.
1493  Changed |= computeBlockWeights(F);
1494 
1495  if (Changed) {
1496  // Add an entry count to the function using the samples gathered at the
1497  // function entry.
1498  // Sets the GUIDs that are inlined in the profiled binary. This is used
1499  // for ThinLink to make correct liveness analysis, and also make the IR
1500  // match the profiled binary before annotation.
1501  F.setEntryCount(
1502  ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
1503  &InlinedGUIDs);
1504 
1505  // Compute dominance and loop info needed for propagation.
1506  computeDominanceAndLoopInfo(F);
1507 
1508  // Find equivalence classes.
1509  findEquivalenceClasses(F);
1510 
1511  // Propagate weights to all edges.
1512  propagateWeights(F);
1513  }
1514 
1515  // If coverage checking was requested, compute it now.
1517  unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1518  unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1519  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1520  if (Coverage < SampleProfileRecordCoverage) {
1522  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1523  Twine(Used) + " of " + Twine(Total) + " available profile records (" +
1524  Twine(Coverage) + "%) were applied",
1525  DS_Warning));
1526  }
1527  }
1528 
1530  uint64_t Used = CoverageTracker.getTotalUsedSamples();
1531  uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1532  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1533  if (Coverage < SampleProfileSampleCoverage) {
1535  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1536  Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
1537  Twine(Coverage) + "%) were applied",
1538  DS_Warning));
1539  }
1540  }
1541  return Changed;
1542 }
1543 
1545 
1546 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1547  "Sample Profile loader", false, false)
1551 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1552  "Sample Profile loader", false, false)
1553 
1554 bool SampleProfileLoader::doInitialization(Module &M) {
1555  auto &Ctx = M.getContext();
1556  auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
1557  if (std::error_code EC = ReaderOrErr.getError()) {
1558  std::string Msg = "Could not open profile: " + EC.message();
1559  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1560  return false;
1561  }
1562  Reader = std::move(ReaderOrErr.get());
1563  Reader->collectFuncsToUse(M);
1564  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1565 
1566  if (!RemappingFilename.empty()) {
1567  // Apply profile remappings to the loaded profile data if requested.
1568  // For now, we only support remapping symbols encoded using the Itanium
1569  // C++ ABI's name mangling scheme.
1571  RemappingFilename, Ctx, std::move(Reader));
1572  if (std::error_code EC = ReaderOrErr.getError()) {
1573  std::string Msg = "Could not open profile remapping file: " + EC.message();
1574  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1575  return false;
1576  }
1577  Reader = std::move(ReaderOrErr.get());
1578  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1579  }
1580  return true;
1581 }
1582 
1584  return new SampleProfileLoaderLegacyPass();
1585 }
1586 
1588  return new SampleProfileLoaderLegacyPass(Name);
1589 }
1590 
1591 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1592  ProfileSummaryInfo *_PSI) {
1594  if (!ProfileIsValid)
1595  return false;
1596 
1597  PSI = _PSI;
1598  if (M.getProfileSummary(/* IsCS */ false) == nullptr)
1599  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
1601 
1602  // Compute the total number of samples collected in this profile.
1603  for (const auto &I : Reader->getProfiles())
1604  TotalCollectedSamples += I.second.getTotalSamples();
1605 
1606  // Populate the symbol map.
1607  for (const auto &N_F : M.getValueSymbolTable()) {
1608  StringRef OrigName = N_F.getKey();
1609  Function *F = dyn_cast<Function>(N_F.getValue());
1610  if (F == nullptr)
1611  continue;
1612  SymbolMap[OrigName] = F;
1613  auto pos = OrigName.find('.');
1614  if (pos != StringRef::npos) {
1615  StringRef NewName = OrigName.substr(0, pos);
1616  auto r = SymbolMap.insert(std::make_pair(NewName, F));
1617  // Failiing to insert means there is already an entry in SymbolMap,
1618  // thus there are multiple functions that are mapped to the same
1619  // stripped name. In this case of name conflicting, set the value
1620  // to nullptr to avoid confusion.
1621  if (!r.second)
1622  r.first->second = nullptr;
1623  }
1624  }
1625 
1626  bool retval = false;
1627  for (auto &F : M)
1628  if (!F.isDeclaration()) {
1629  clearFunctionData();
1630  retval |= runOnFunction(F, AM);
1631  }
1632 
1633  // Account for cold calls not inlined....
1634  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
1635  notInlinedCallInfo)
1636  updateProfileCallee(pair.first, pair.second.entryCount);
1637 
1638  return retval;
1639 }
1640 
1641 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1642  ACT = &getAnalysis<AssumptionCacheTracker>();
1643  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1644  ProfileSummaryInfo *PSI =
1645  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1646  return SampleLoader.runOnModule(M, nullptr, PSI);
1647 }
1648 
1650 
1651  DILocation2SampleMap.clear();
1652  // By default the entry count is initialized to -1, which will be treated
1653  // conservatively by getEntryCount as the same as unknown (None). This is
1654  // to avoid newly added code to be treated as cold. If we have samples
1655  // this will be overwritten in emitAnnotations.
1656  // If ProfileSampleAccurate is true or F has profile-sample-accurate
1657  // attribute, initialize the entry count to 0 so callsites or functions
1658  // unsampled will be treated as cold.
1659  uint64_t initialEntryCount =
1660  (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
1661  ? 0
1662  : -1;
1663  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
1664  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1665  if (AM) {
1666  auto &FAM =
1668  .getManager();
1669  ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1670  } else {
1671  OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
1672  ORE = OwnedORE.get();
1673  }
1674  Samples = Reader->getSamplesFor(F);
1675  if (Samples && !Samples->empty())
1676  return emitAnnotations(F);
1677  return false;
1678 }
1679 
1681  ModuleAnalysisManager &AM) {
1683  AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1684 
1685  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
1686  return FAM.getResult<AssumptionAnalysis>(F);
1687  };
1688  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
1689  return FAM.getResult<TargetIRAnalysis>(F);
1690  };
1691 
1692  SampleProfileLoader SampleLoader(
1693  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1694  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1695  : ProfileRemappingFileName,
1696  IsThinLTOPreLink, GetAssumptionCache, GetTTI);
1697 
1698  SampleLoader.doInitialization(M);
1699 
1701  if (!SampleLoader.runOnModule(M, &AM, PSI))
1702  return PreservedAnalyses::all();
1703 
1704  return PreservedAnalyses::none();
1705 }
const NoneType None
Definition: None.h:23
uint64_t CallInst * C
const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const
Returns the FunctionSamplesMap at the given Loc.
Definition: SampleProf.h:283
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:515
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
Represents either an error or a value T.
Definition: ErrorOr.h:56
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:104
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
sample profile
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const ValueSymbolTable & getValueSymbolTable() const
Get the symbol table of global variable and function identifiers.
Definition: Module.h:568
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
Metadata * getProfileSummary(bool IsCS)
Returns profile summary metadata.
Definition: Module.cpp:541
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C)
Create a sample profile reader appropriate to the file format.
unsigned getLine() const
Definition: DebugLoc.cpp:25
ErrorOr< uint64_t > findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const
Return the number of samples collected at the given location.
Definition: SampleProf.h:256
A debug info location.
Definition: DebugLoc.h:33
F(f)
uint64_t getOrCompHotCountThreshold()
Returns HotCountThreshold if set.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const Module *M, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:382
static SmallVector< InstrProfValueData, 2 > SortCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
bool isHotCount(uint64_t C)
Returns true if count C is considered hot.
Represents the cost of inlining a function.
Definition: InlineCost.h:63
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Representation of the samples collected for a function.
Definition: SampleProf.h:216
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Definition: BitVector.h:937
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:1366
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
Diagnostic information for optimization analysis remarks.
static StringRef getName(Value *V)
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:578
StringRef getFilename() const
BlockT * getHeader() const
Definition: LoopInfo.h:99
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Subprogram description.
const Instruction * getFirstNonPHIOrDbgOrLifetime() const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic...
Definition: BasicBlock.cpp:203
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1339
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
Debug location.
ModulePass * createSampleProfileLoaderPass()
Instruction * promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
static cl::opt< unsigned > SampleProfileRecordCoverage("sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of records in the input profile " "are matched to the IR."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
Core dominator tree base class.
Definition: LoopInfo.h:60
const BasicBlock & getEntryBlock() const
Definition: Function.h:639
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static cl::opt< unsigned > SampleProfileMaxPropagateIterations("sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG."))
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1507
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:350
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:285
Function::ProfileCount ProfileCount
Represent the analysis usage information of a pass.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:47
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
Used in the streaming interface as the general argument type.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:812
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Class to represent profile counts.
Definition: Function.h:260
size_t size() const
Definition: SmallVector.h:52
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1225
StringRef getFuncNameInModule(const Module *M) const
Return the original function name if it exists in Module M.
Definition: SampleProf.h:409
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:180
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName...
Definition: SampleProf.h:294
A function analysis which provides an AssumptionCache.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:164
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
ErrorOr< SampleRecord::CallTargetMap > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
Definition: SampleProf.h:269
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
StringMap - This is an unconventional map that is specialized for handling keys that are "strings"...
Definition: StringMap.h:219
amdgpu Simplify well known AMD library false FunctionCallee Callee
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:211
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:471
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:324
#define DEBUG_TYPE
static const size_t npos
Definition: StringRef.h:50
iterator begin()
Definition: StringMap.h:314
Represents the relative location of an instruction.
Definition: SampleProf.h:117
static cl::opt< unsigned > SampleProfileSampleCoverage("sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of samples in the input profile " "are matched to the IR."))
static bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Establish a view to a call site for examination.
Definition: CallSite.h:892
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
iterator end()
Definition: DenseMap.h:108
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
std::map< std::string, FunctionSamples > FunctionSamplesMap
Definition: SampleProf.h:208
StringRef getName() const
Return the function name.
Definition: SampleProf.h:406
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
void updateProfileCallee(Function *Callee, int64_t entryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding entryDelta then scaling callsite i...
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:205
Provides ErrorOr<T> smart pointer.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
Definition: SampleProf.h:320
sample Sample Profile loader
const CallsiteSampleMap & getCallsiteSamples() const
Return all the callsite samples collected in the body of the function.
Definition: SampleProf.h:353
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
print Print MemDeps of function
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:331
static cl::opt< bool > NoWarnSampleUnused("no-warn-sample-unused", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. "))
This header defines various interfaces for pass management in LLVM.
Diagnostic information for the sample profiler.
void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind)
Attach profile summary metadata to this module.
Definition: Module.cpp:534
#define LLVM_DEBUG(X)
Definition: Debug.h:122
The optimization diagnostic interface.
iterator end()
Definition: StringMap.h:317
This file provides the interface for the sampled PGO loader pass.
reference get()
Definition: ErrorOr.h:156
const BasicBlock * getParent() const
Definition: Instruction.h:66
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C, std::unique_ptr< SampleProfileReader > Underlying)
Create a remapped sample profile from the given remapping file and underlying samples.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1037