LLVM  15.0.0git
SyntheticCountsPropagation.cpp
Go to the documentation of this file.
1 //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a transformation that synthesizes entry counts for
10 // functions and attaches !prof metadata to functions with the synthesized
11 // counts. The presence of !prof metadata with counter name set to
12 // 'synthesized_function_entry_count' indicate that the value of the counter is
13 // an estimation of the likely execution count of the function. This transform
14 // is applied only in non PGO mode as functions get 'real' profile-based
15 // function entry counts in the PGO mode.
16 //
17 // The transformation works by first assigning some initial values to the entry
18 // counts of all functions and then doing a top-down traversal of the
19 // callgraph-scc to propagate the counts. For each function the set of callsites
20 // and their relative block frequency is gathered. The relative block frequency
21 // multiplied by the entry count of the caller and added to the callee's entry
22 // count. For non-trivial SCCs, the new counts are computed from the previous
23 // counts and updated in one shot.
24 //
25 //===----------------------------------------------------------------------===//
26 
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Module.h"
35 
36 using namespace llvm;
39 
40 #define DEBUG_TYPE "synthetic-counts-propagation"
41 
42 namespace llvm {
44  InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
46  cl::desc("Initial value of synthetic entry count"));
47 } // namespace llvm
48 
49 /// Initial synthetic count assigned to inline functions.
51  "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
52  cl::desc("Initial synthetic entry count for inline functions."));
53 
54 /// Initial synthetic count assigned to cold functions.
56  "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
57  cl::desc("Initial synthetic entry count for cold functions."));
58 
59 // Assign initial synthetic entry counts to functions.
60 static void
62  auto MayHaveIndirectCalls = [](Function &F) {
63  for (auto *U : F.users()) {
64  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
65  return true;
66  }
67  return false;
68  };
69 
70  for (Function &F : M) {
71  uint64_t InitialCount = InitialSyntheticCount;
72  if (F.isDeclaration())
73  continue;
74  if (F.hasFnAttribute(Attribute::AlwaysInline) ||
75  F.hasFnAttribute(Attribute::InlineHint)) {
76  // Use a higher value for inline functions to account for the fact that
77  // these are usually beneficial to inline.
78  InitialCount = InlineSyntheticCount;
79  } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
80  // Local functions without inline hints get counts only through
81  // propagation.
82  InitialCount = 0;
83  } else if (F.hasFnAttribute(Attribute::Cold) ||
84  F.hasFnAttribute(Attribute::NoInline)) {
85  // Use a lower value for noinline and cold functions.
86  InitialCount = ColdSyntheticCount;
87  }
88  SetCount(&F, InitialCount);
89  }
90 }
91 
97  // Set initial entry counts.
99  M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
100 
101  // Edge includes information about the source. Hence ignore the first
102  // parameter.
103  auto GetCallSiteProfCount = [&](const CallGraphNode *,
104  const CallGraphNode::CallRecord &Edge) {
105  Optional<Scaled64> Res = None;
106  if (!Edge.first)
107  return Res;
108  CallBase &CB = *cast<CallBase>(*Edge.first);
109  Function *Caller = CB.getCaller();
110  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
111 
112  // Now compute the callsite count from relative frequency and
113  // entry count:
114  BasicBlock *CSBB = CB.getParent();
115  Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
116  Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
117  BBCount /= EntryFreq;
118  BBCount *= Counts[Caller];
119  return Optional<Scaled64>(BBCount);
120  };
121 
122  CallGraph CG(M);
123  // Propgate the entry counts on the callgraph.
125  &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
126  auto F = N->getFunction();
127  if (!F || F->isDeclaration())
128  return;
129 
130  Counts[F] += New;
131  });
132 
133  // Set the counts as metadata.
134  for (auto Entry : Counts) {
135  Entry.first->setEntryCount(ProfileCount(
136  Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic));
137  }
138 
139  return PreservedAnalyses::all();
140 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::Function
Definition: Function.h:60
llvm::CallGraphNode::CallRecord
std::pair< Optional< WeakTrackingVH >, CallGraphNode * > CallRecord
A pair of the calling instruction (a call or invoke) and the call graph node being called.
Definition: CallGraph.h:178
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::SyntheticCountsPropagation::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition: SyntheticCountsPropagation.cpp:92
Module.h
llvm::Optional
Definition: APInt.h:33
llvm::SyntheticCountsUtils::propagate
static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount, AddCountTy AddCount)
Propgate synthetic entry counts on a callgraph CG.
Definition: SyntheticCountsUtils.cpp:83
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
CommandLine.h
llvm::Function::PCT_Synthetic
@ PCT_Synthetic
Definition: Function.h:248
ColdSyntheticCount
static cl::opt< int > ColdSyntheticCount("cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for cold functions."))
Initial synthetic count assigned to cold functions.
MAM
ModuleAnalysisManager MAM
Definition: PassBuilderBindings.cpp:61
llvm::InitialSyntheticCount
cl::opt< int > InitialSyntheticCount
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::None
const NoneType None
Definition: None.h:24
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
ProfileCount
Function::ProfileCount ProfileCount
Definition: SyntheticCountsPropagation.cpp:38
uint64_t
InlineSyntheticCount
static cl::opt< int > InlineSyntheticCount("inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for inline functions."))
Initial synthetic count assigned to inline functions.
llvm::DenseMap
Definition: DenseMap.h:716
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SyntheticCountsPropagation.h
SyntheticCountsUtils.h
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::ScaledNumber< uint64_t >
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Function.h
Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: SyntheticCountsPropagation.cpp:37
CallGraph.h
Instructions.h
initializeCounts
static void initializeCounts(Module &M, function_ref< void(Function *, uint64_t)> SetCount)
Definition: SyntheticCountsPropagation.cpp:61
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:937
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::cl::desc
Definition: CommandLine.h:405