Go to the documentation of this file.
35 cl::desc(
"Specify the current profile is used as a partial profile."));
40 "If true, scale the working set size of the partial sample profile "
41 "by the partial profile ratio to reflect the size of the program "
45 "partial-sample-profile-working-set-size-scale-factor",
cl::Hidden,
47 cl::desc(
"The scale factor used to scale the working set size of the "
48 "partial sample profile along with the partial profile ratio. "
49 "This includes the factor of the profile counter per block "
50 "and the factor to scale the working set size to use the same "
51 "shared thresholds as PGO."));
78 assert((isa<CallInst>(Call) || isa<InvokeInst>(Call)) &&
79 "We can only get profile count for call/invoke instruction.");
86 if (Call.extractProfTotalWeight(TotalCount))
91 return BFI->getBlockProfileCount(Call.getParent(), AllowSynthetic);
101 auto FunctionCount =
F->getEntryCount();
105 return FunctionCount &&
isHotCount(FunctionCount->getCount());
117 if (
auto FunctionCount =
F->getEntryCount())
123 for (
const auto &
BB : *
F)
124 for (
const auto &
I :
BB)
125 if (isa<CallInst>(
I) || isa<InvokeInst>(
I))
127 TotalCallCount += CallCount.getValue();
131 for (
const auto &
BB : *
F)
146 if (
auto FunctionCount =
F->getEntryCount())
152 for (
const auto &
BB : *
F)
153 for (
const auto &
I :
BB)
154 if (isa<CallInst>(
I) || isa<InvokeInst>(
I))
156 TotalCallCount += CallCount.getValue();
160 for (
const auto &
BB : *
F)
168 return !
F.getEntryCount().hasValue();
171 template <
bool isHot>
172 bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile(
176 if (
auto FunctionCount =
F->getEntryCount()) {
186 for (
const auto &
BB : *
F)
187 for (
const auto &
I :
BB)
188 if (isa<CallInst>(
I) || isa<InvokeInst>(
I))
190 TotalCallCount += CallCount.getValue();
196 for (
const auto &
BB : *
F) {
208 return isFunctionHotOrColdInCallGraphNthPercentile<true>(
214 return isFunctionHotOrColdInCallGraphNthPercentile<false>(
228 auto FunctionCount =
F->getEntryCount();
232 return FunctionCount &&
isColdCount(FunctionCount->getCount());
236 void ProfileSummaryInfo::computeThresholds() {
237 auto &DetailedSummary = Summary->getDetailedSummary();
244 assert(ColdCountThreshold <= HotCountThreshold &&
245 "Cold count threshold cannot exceed hot count threshold!");
247 HasHugeWorkingSetSize =
249 HasLargeWorkingSetSize =
254 double PartialProfileRatio = Summary->getPartialProfileRatio();
256 static_cast<uint64_t>(HotEntry.NumCounts * PartialProfileRatio *
258 HasHugeWorkingSetSize =
260 HasLargeWorkingSetSize =
270 if (iter != ThresholdCache.
end()) {
273 auto &DetailedSummary = Summary->getDetailedSummary();
282 return HasHugeWorkingSetSize && HasHugeWorkingSetSize.
getValue();
286 return HasLargeWorkingSetSize && HasLargeWorkingSetSize.
getValue();
290 return HotCountThreshold &&
C >= HotCountThreshold.
getValue();
294 return ColdCountThreshold &&
C <= ColdCountThreshold.
getValue();
297 template <
bool isHot>
298 bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(
int PercentileCutoff,
327 auto Count =
BFI->getBlockProfileCount(
BB);
333 auto Count =
BFI->getBlockProfileCount(
BB);
337 template <
bool isHot>
338 bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile(
340 auto Count =
BFI->getBlockProfileCount(
BB);
381 "Profile summary info",
false,
true)
408 OS <<
"Functions in " <<
M.getName() <<
" with hot/cold annotations: \n";
412 OS <<
" :hot entry ";
414 OS <<
" :cold entry ";
A set of analyses that are preserved following a run of a transformation pass.
static uint64_t getHotCountThreshold(const SummaryEntryVector &DS)
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
bool isColdCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const
Returns true if call site CB is considered cold.
bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold with regard to a given cold percentile cutoff value.
bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
ImmutablePass class - This class is used to provide information that does not need to be run.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static ProfileSummary * getFromMD(Metadata *MD)
Construct profile summary from metdata.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isFunctionEntryHot(const Function *F) const
Returns true if F has hot function entry.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered hot.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
Optional< uint64_t > getProfileCount(const CallBase &CallInst, BlockFrequencyInfo *BFI, bool AllowSynthetic=false) const
Returns the profile count for CallInst.
cl::opt< int > ProfileSummaryCutoffCold
Metadata * getProfileSummary(bool IsCS) const
Returns profile summary metadata.
bool isHotCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const
Returns true if the call site CB is considered hot.
LLVM Basic Block Representation.
static cl::opt< double > PartialSampleProfileWorkingSetSizeScaleFactor("partial-sample-profile-working-set-size-scale-factor", cl::Hidden, cl::init(0.008), cl::desc("The scale factor used to scale the working set size of the " "partial sample profile along with the partial profile ratio. " "This includes the factor of the profile counter per block " "and the factor to scale the working set size to use the same " "shared thresholds as PGO."))
bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI) const
Returns true if F contains hot code.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered hot with regard to a given hot percentile cutoff value.
void initializeProfileSummaryInfoWrapperPassPass(PassRegistry &)
(vector float) vec_cmpeq(*A, *B) C
constexpr T getValueOr(U &&value) const &
cl::opt< unsigned > ProfileSummaryHugeWorkingSetSizeThreshold
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getCaller()
Helper to get the caller (the parent function).
Analysis providing profile information.
static cl::opt< unsigned > CountThreshold("hexagon-cext-threshold", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Minimum number of extenders to trigger replacement"))
cl::opt< bool > ScalePartialSampleProfileWorkingSetSize("scale-partial-sample-profile-working-set-size", cl::Hidden, cl::init(true), cl::desc("If true, scale the working set size of the partial sample profile " "by the partial profile ratio to reflect the size of the program " "being compiled."))
bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const
Returns true if F contains hot code with regard to a given hot percentile cutoff value.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", "Profile summary info", false, true) ProfileSummaryInfoWrapperPass
A special type used by analysis passes to provide an address that identifies that particular analysis...
static uint64_t getColdCountThreshold(const SummaryEntryVector &DS)
initializer< Ty > init(const Ty &Val)
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
iterator find(const_arg_type_t< KeyT > Val)
bool hasSampleProfile() const
Returns true if module M has sample profile.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
constexpr const T & getValue() const &
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A Module instance is used to store all the information related to an LLVM module.
bool isFunctionColdInCallGraphNthPercentile(int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const
Returns true if F contains cold code with regard to a given cold percentile cutoff value.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
void refresh()
If no summary is present, attempt to refresh.
cl::opt< int > ProfileSummaryHotCount
bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
Result run(Module &M, ModuleAnalysisManager &)
bool hasLargeWorkingSetSize() const
Returns true if the working set size of the code is considered large.
bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI) const
Returns true if F contains only cold code.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
cl::opt< int > ProfileSummaryColdCount
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold.
cl::opt< unsigned > ProfileSummaryLargeWorkingSetSizeThreshold
cl::opt< int > ProfileSummaryCutoffHot
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A container for analyses that lazily runs them and caches their results.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
static cl::opt< bool > PartialProfile("partial-profile", cl::Hidden, cl::init(false), cl::desc("Specify the current profile is used as a partial profile."))
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
static const ProfileSummaryEntry & getEntryForPercentile(const SummaryEntryVector &DS, uint64_t Percentile)
Find the summary entry for a desired percentile of counts.