LLVM  16.0.0git
SyntheticCountsPropagation.cpp
Go to the documentation of this file.
1 //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a transformation that synthesizes entry counts for
10 // functions and attaches !prof metadata to functions with the synthesized
11 // counts. The presence of !prof metadata with counter name set to
12 // 'synthesized_function_entry_count' indicate that the value of the counter is
13 // an estimation of the likely execution count of the function. This transform
14 // is applied only in non PGO mode as functions get 'real' profile-based
15 // function entry counts in the PGO mode.
16 //
17 // The transformation works by first assigning some initial values to the entry
18 // counts of all functions and then doing a top-down traversal of the
19 // callgraph-scc to propagate the counts. For each function the set of callsites
20 // and their relative block frequency is gathered. The relative block frequency
21 // multiplied by the entry count of the caller and added to the callee's entry
22 // count. For non-trivial SCCs, the new counts are computed from the previous
23 // counts and updated in one shot.
24 //
25 //===----------------------------------------------------------------------===//
26 
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Module.h"
35 
36 using namespace llvm;
39 
40 #define DEBUG_TYPE "synthetic-counts-propagation"
41 
42 namespace llvm {
44  InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
45  cl::desc("Initial value of synthetic entry count"));
46 } // namespace llvm
47 
48 /// Initial synthetic count assigned to inline functions.
50  "inline-synthetic-count", cl::Hidden, cl::init(15),
51  cl::desc("Initial synthetic entry count for inline functions."));
52 
53 /// Initial synthetic count assigned to cold functions.
55  "cold-synthetic-count", cl::Hidden, cl::init(5),
56  cl::desc("Initial synthetic entry count for cold functions."));
57 
58 // Assign initial synthetic entry counts to functions.
59 static void
61  auto MayHaveIndirectCalls = [](Function &F) {
62  for (auto *U : F.users()) {
63  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
64  return true;
65  }
66  return false;
67  };
68 
69  for (Function &F : M) {
70  uint64_t InitialCount = InitialSyntheticCount;
71  if (F.isDeclaration())
72  continue;
73  if (F.hasFnAttribute(Attribute::AlwaysInline) ||
74  F.hasFnAttribute(Attribute::InlineHint)) {
75  // Use a higher value for inline functions to account for the fact that
76  // these are usually beneficial to inline.
77  InitialCount = InlineSyntheticCount;
78  } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
79  // Local functions without inline hints get counts only through
80  // propagation.
81  InitialCount = 0;
82  } else if (F.hasFnAttribute(Attribute::Cold) ||
83  F.hasFnAttribute(Attribute::NoInline)) {
84  // Use a lower value for noinline and cold functions.
85  InitialCount = ColdSyntheticCount;
86  }
87  SetCount(&F, InitialCount);
88  }
89 }
90 
96  // Set initial entry counts.
98  M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
99 
100  // Edge includes information about the source. Hence ignore the first
101  // parameter.
102  auto GetCallSiteProfCount = [&](const CallGraphNode *,
103  const CallGraphNode::CallRecord &Edge) {
104  Optional<Scaled64> Res;
105  if (!Edge.first)
106  return Res;
107  CallBase &CB = *cast<CallBase>(*Edge.first);
108  Function *Caller = CB.getCaller();
109  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
110 
111  // Now compute the callsite count from relative frequency and
112  // entry count:
113  BasicBlock *CSBB = CB.getParent();
114  Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
115  Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
116  BBCount /= EntryFreq;
117  BBCount *= Counts[Caller];
118  return Optional<Scaled64>(BBCount);
119  };
120 
121  CallGraph CG(M);
122  // Propgate the entry counts on the callgraph.
124  &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
125  auto F = N->getFunction();
126  if (!F || F->isDeclaration())
127  return;
128 
129  Counts[F] += New;
130  });
131 
132  // Set the counts as metadata.
133  for (auto Entry : Counts) {
134  Entry.first->setEntryCount(ProfileCount(
135  Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic));
136  }
137 
138  return PreservedAnalyses::all();
139 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::Function
Definition: Function.h:60
llvm::CallGraphNode::CallRecord
std::pair< Optional< WeakTrackingVH >, CallGraphNode * > CallRecord
A pair of the calling instruction (a call or invoke) and the call graph node being called.
Definition: CallGraph.h:178
llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::SyntheticCountsPropagation::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition: SyntheticCountsPropagation.cpp:91
Module.h
llvm::Optional
Definition: APInt.h:33
llvm::SyntheticCountsUtils::propagate
static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount, AddCountTy AddCount)
Propgate synthetic entry counts on a callgraph CG.
Definition: SyntheticCountsUtils.cpp:83
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
CommandLine.h
llvm::Function::PCT_Synthetic
@ PCT_Synthetic
Definition: Function.h:248
MAM
ModuleAnalysisManager MAM
Definition: PassBuilderBindings.cpp:61
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:284
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::cl::opt
Definition: CommandLine.h:1412
InlineSyntheticCount
static cl::opt< int > InlineSyntheticCount("inline-synthetic-count", cl::Hidden, cl::init(15), cl::desc("Initial synthetic entry count for inline functions."))
Initial synthetic count assigned to inline functions.
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
ProfileCount
Function::ProfileCount ProfileCount
Definition: SyntheticCountsPropagation.cpp:38
uint64_t
llvm::DenseMap
Definition: DenseMap.h:714
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SyntheticCountsPropagation.h
SyntheticCountsUtils.h
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:432
llvm::ScaledNumber< uint64_t >
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Function.h
llvm::InitialSyntheticCount
cl::opt< int > InitialSyntheticCount
Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: SyntheticCountsPropagation.cpp:37
CallGraph.h
Instructions.h
initializeCounts
static void initializeCounts(Module &M, function_ref< void(Function *, uint64_t)> SetCount)
Definition: SyntheticCountsPropagation.cpp:60
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
ColdSyntheticCount
static cl::opt< int > ColdSyntheticCount("cold-synthetic-count", cl::Hidden, cl::init(5), cl::desc("Initial synthetic entry count for cold functions."))
Initial synthetic count assigned to cold functions.
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:931
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::cl::desc
Definition: CommandLine.h:413