LLVM  15.0.0git
SampleProfileLoaderBaseUtil.cpp
Go to the documentation of this file.
1 //===- SampleProfileLoaderBaseUtil.cpp - Profile loader Util func ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader base utility functions.
10 //
11 //===----------------------------------------------------------------------===//
12 
15 #include "llvm/IR/Constants.h"
16 #include "llvm/IR/Module.h"
18 
19 namespace llvm {
20 
21 cl::opt<unsigned> SampleProfileMaxPropagateIterations(
22  "sample-profile-max-propagate-iterations", cl::init(100),
23  cl::desc("Maximum number of iterations to go through when propagating "
24  "sample block/edge weights through the CFG."));
25 
26 cl::opt<unsigned> SampleProfileRecordCoverage(
27  "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
28  cl::desc("Emit a warning if less than N% of records in the input profile "
29  "are matched to the IR."));
30 
31 cl::opt<unsigned> SampleProfileSampleCoverage(
32  "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
33  cl::desc("Emit a warning if less than N% of samples in the input profile "
34  "are matched to the IR."));
35 
36 cl::opt<bool> NoWarnSampleUnused(
37  "no-warn-sample-unused", cl::init(false), cl::Hidden,
38  cl::desc("Use this option to turn off/on warnings about function with "
39  "samples but without debug information to use those samples. "));
40 
41 cl::opt<bool> SampleProfileUseProfi(
42  "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore,
43  cl::desc("Use profi to infer block and edge counts."));
44 
45 cl::opt<bool> SampleProfileInferEntryCount(
46  "sample-profile-infer-entry-count", cl::init(true), cl::Hidden,
47  cl::ZeroOrMore, cl::desc("Use profi to infer function entry count."));
48 
49 namespace sampleprofutil {
50 
51 /// Return true if the given callsite is hot wrt to hot cutoff threshold.
52 ///
53 /// Functions that were inlined in the original binary will be represented
54 /// in the inline stack in the sample profile. If the profile shows that
55 /// the original inline decision was "good" (i.e., the callsite is executed
56 /// frequently), then we will recreate the inline decision and apply the
57 /// profile from the inlined callsite.
58 ///
59 /// To decide whether an inlined callsite is hot, we compare the callsite
60 /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
61 /// regarded as hot if the count is above the cutoff value.
62 ///
63 /// When ProfileAccurateForSymsInList is enabled and profile symbol list
64 /// is present, functions in the profile symbol list but without profile will
65 /// be regarded as cold and much less inlining will happen in CGSCC inlining
66 /// pass, so we tend to lower the hot criteria here to allow more early
67 /// inlining to happen for warm callsites and it is helpful for performance.
68 bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI,
69  bool ProfAccForSymsInList) {
70  if (!CallsiteFS)
71  return false; // The callsite was not inlined in the original binary.
72 
73  assert(PSI && "PSI is expected to be non null");
74  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
75  if (ProfAccForSymsInList)
76  return !PSI->isColdCount(CallsiteTotalSamples);
77  else
78  return PSI->isHotCount(CallsiteTotalSamples);
79 }
80 
81 /// Mark as used the sample record for the given function samples at
82 /// (LineOffset, Discriminator).
83 ///
84 /// \returns true if this is the first time we mark the given record.
86  uint32_t LineOffset,
87  uint32_t Discriminator,
88  uint64_t Samples) {
89  LineLocation Loc(LineOffset, Discriminator);
90  unsigned &Count = SampleCoverage[FS][Loc];
91  bool FirstTime = (++Count == 1);
92  if (FirstTime)
93  TotalUsedSamples += Samples;
94  return FirstTime;
95 }
96 
97 /// Return the number of sample records that were applied from this profile.
98 ///
99 /// This count does not include records from cold inlined callsites.
100 unsigned
102  ProfileSummaryInfo *PSI) const {
103  auto I = SampleCoverage.find(FS);
104 
105  // The size of the coverage map for FS represents the number of records
106  // that were marked used at least once.
107  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
108 
109  // If there are inlined callsites in this function, count the samples found
110  // in the respective bodies. However, do not bother counting callees with 0
111  // total samples, these are callees that were never invoked at runtime.
112  for (const auto &I : FS->getCallsiteSamples())
113  for (const auto &J : I.second) {
114  const FunctionSamples *CalleeSamples = &J.second;
115  if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
116  Count += countUsedRecords(CalleeSamples, PSI);
117  }
118 
119  return Count;
120 }
121 
122 /// Return the number of sample records in the body of this profile.
123 ///
124 /// This count does not include records from cold inlined callsites.
125 unsigned
127  ProfileSummaryInfo *PSI) const {
128  unsigned Count = FS->getBodySamples().size();
129 
130  // Only count records in hot callsites.
131  for (const auto &I : FS->getCallsiteSamples())
132  for (const auto &J : I.second) {
133  const FunctionSamples *CalleeSamples = &J.second;
134  if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
135  Count += countBodyRecords(CalleeSamples, PSI);
136  }
137 
138  return Count;
139 }
140 
141 /// Return the number of samples collected in the body of this profile.
142 ///
143 /// This count does not include samples from cold inlined callsites.
144 uint64_t
146  ProfileSummaryInfo *PSI) const {
147  uint64_t Total = 0;
148  for (const auto &I : FS->getBodySamples())
149  Total += I.second.getSamples();
150 
151  // Only count samples in hot callsites.
152  for (const auto &I : FS->getCallsiteSamples())
153  for (const auto &J : I.second) {
154  const FunctionSamples *CalleeSamples = &J.second;
155  if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
156  Total += countBodySamples(CalleeSamples, PSI);
157  }
158 
159  return Total;
160 }
161 
162 /// Return the fraction of sample records used in this profile.
163 ///
164 /// The returned value is an unsigned integer in the range 0-100 indicating
165 /// the percentage of sample records that were used while applying this
166 /// profile to the associated function.
168  unsigned Total) const {
169  assert(Used <= Total &&
170  "number of used records cannot exceed the total number of records");
171  return Total > 0 ? Used * 100 / Total : 100;
172 }
173 
174 /// Create a global variable to flag FSDiscriminators are used.
176  const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
177  if (M->getGlobalVariable(FSDiscriminatorVar))
178  return;
179 
180  auto &Context = M->getContext();
181  // Place this variable to llvm.used so it won't be GC'ed.
185  FSDiscriminatorVar)});
186 }
187 
188 } // end of namespace sampleprofutil
189 } // end of namespace llvm
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
Module.h
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
llvm::ProfileSummaryInfo::isColdCount
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
Definition: ProfileSummaryInfo.cpp:293
Constants.h
llvm::sampleprof::FunctionSamples::getTotalSamples
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
Definition: SampleProf.h:885
llvm::sampleprofutil::createFSDiscriminatorVariable
void createFSDiscriminatorVariable(Module *M)
Create a global variable to flag FSDiscriminators are used.
Definition: SampleProfileLoaderBaseUtil.cpp:175
SampleProfileLoaderBaseUtil.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
llvm::ProfileSummaryInfo::isHotCount
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Definition: ProfileSummaryInfo.cpp:289
uint64_t
ProfileSummaryInfo.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:720
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::appendToUsed
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.
Definition: ModuleUtils.cpp:105
llvm::sampleprofutil::SampleCoverageTracker::countBodySamples
uint64_t countBodySamples(const FunctionSamples *FS, ProfileSummaryInfo *PSI) const
Return the number of samples collected in the body of this profile.
Definition: SampleProfileLoaderBaseUtil.cpp:145
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SampleProfileInferEntryCount
cl::opt< bool > SampleProfileInferEntryCount
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::sampleprof::LineLocation
Represents the relative location of an instruction.
Definition: SampleProf.h:282
llvm::sampleprofutil::SampleCoverageTracker::computeCoverage
unsigned computeCoverage(unsigned Used, unsigned Total) const
Return the fraction of sample records used in this profile.
Definition: SampleProfileLoaderBaseUtil.cpp:167
llvm::SampleProfileSampleCoverage
cl::opt< unsigned > SampleProfileSampleCoverage
llvm::GlobalValue::WeakODRLinkage
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:53
llvm::sampleprofutil::SampleCoverageTracker::countBodyRecords
unsigned countBodyRecords(const FunctionSamples *FS, ProfileSummaryInfo *PSI) const
Return the number of sample records in the body of this profile.
Definition: SampleProfileLoaderBaseUtil.cpp:126
uint32_t
llvm::sampleprofutil::SampleCoverageTracker::countUsedRecords
unsigned countUsedRecords(const FunctionSamples *FS, ProfileSummaryInfo *PSI) const
Return the number of sample records that were applied from this profile.
Definition: SampleProfileLoaderBaseUtil.cpp:101
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:867
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:68
llvm::sampleprofutil::SampleCoverageTracker::markSamplesUsed
bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, uint32_t Discriminator, uint64_t Samples)
Mark as used the sample record for the given function samples at (LineOffset, Discriminator).
Definition: SampleProfileLoaderBaseUtil.cpp:85
ModuleUtils.h
llvm::SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileRecordCoverage
llvm::SampleProfileMaxPropagateIterations
cl::opt< unsigned > SampleProfileMaxPropagateIterations
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::NoWarnSampleUnused
cl::opt< bool > NoWarnSampleUnused