LLVM  10.0.0svn
SyntheticCountsPropagation.cpp
Go to the documentation of this file.
1 //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a transformation that synthesizes entry counts for
10 // functions and attaches !prof metadata to functions with the synthesized
11 // counts. The presence of !prof metadata with counter name set to
12 // 'synthesized_function_entry_count' indicate that the value of the counter is
13 // an estimation of the likely execution count of the function. This transform
14 // is applied only in non PGO mode as functions get 'real' profile-based
15 // function entry counts in the PGO mode.
16 //
17 // The transformation works by first assigning some initial values to the entry
18 // counts of all functions and then doing a top-down traversal of the
19 // callgraph-scc to propagate the counts. For each function the set of callsites
20 // and their relative block frequency is gathered. The relative block frequency
21 // multiplied by the entry count of the caller and added to the callee's entry
22 // count. For non-trivial SCCs, the new counts are computed from the previous
23 // counts and updated in one shot.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/DenseSet.h"
29 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/Module.h"
39 #include "llvm/Support/Debug.h"
41 
42 using namespace llvm;
45 
46 #define DEBUG_TYPE "synthetic-counts-propagation"
47 
48 /// Initial synthetic count assigned to functions.
50  InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
52  cl::desc("Initial value of synthetic entry count."));
53 
54 /// Initial synthetic count assigned to inline functions.
56  "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
57  cl::desc("Initial synthetic entry count for inline functions."));
58 
59 /// Initial synthetic count assigned to cold functions.
61  "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
62  cl::desc("Initial synthetic entry count for cold functions."));
63 
64 // Assign initial synthetic entry counts to functions.
65 static void
66 initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) {
67  auto MayHaveIndirectCalls = [](Function &F) {
68  for (auto *U : F.users()) {
69  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
70  return true;
71  }
72  return false;
73  };
74 
75  for (Function &F : M) {
76  uint64_t InitialCount = InitialSyntheticCount;
77  if (F.isDeclaration())
78  continue;
79  if (F.hasFnAttribute(Attribute::AlwaysInline) ||
80  F.hasFnAttribute(Attribute::InlineHint)) {
81  // Use a higher value for inline functions to account for the fact that
82  // these are usually beneficial to inline.
83  InitialCount = InlineSyntheticCount;
84  } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
85  // Local functions without inline hints get counts only through
86  // propagation.
87  InitialCount = 0;
88  } else if (F.hasFnAttribute(Attribute::Cold) ||
89  F.hasFnAttribute(Attribute::NoInline)) {
90  // Use a lower value for noinline and cold functions.
91  InitialCount = ColdSyntheticCount;
92  }
93  SetCount(&F, InitialCount);
94  }
95 }
96 
98  ModuleAnalysisManager &MAM) {
100  MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
102  // Set initial entry counts.
104  M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
105 
106  // Edge includes information about the source. Hence ignore the first
107  // parameter.
108  auto GetCallSiteProfCount = [&](const CallGraphNode *,
109  const CallGraphNode::CallRecord &Edge) {
110  Optional<Scaled64> Res = None;
111  if (!Edge.first)
112  return Res;
113  assert(isa<Instruction>(Edge.first));
114  CallSite CS(cast<Instruction>(Edge.first));
115  Function *Caller = CS.getCaller();
116  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
117 
118  // Now compute the callsite count from relative frequency and
119  // entry count:
120  BasicBlock *CSBB = CS.getInstruction()->getParent();
121  Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
122  Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
123  BBCount /= EntryFreq;
124  BBCount *= Counts[Caller];
125  return Optional<Scaled64>(BBCount);
126  };
127 
128  CallGraph CG(M);
129  // Propgate the entry counts on the callgraph.
131  &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
132  auto F = N->getFunction();
133  if (!F || F->isDeclaration())
134  return;
135 
136  Counts[F] += New;
137  });
138 
139  // Set the counts as metadata.
140  for (auto Entry : Counts) {
141  Entry.first->setEntryCount(ProfileCount(
142  Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic));
143  }
144 
145  return PreservedAnalyses::all();
146 }
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:777
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
cl::opt< int > InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), cl::ZeroOrMore, cl::desc("Initial value of synthetic entry count."))
Initial synthetic count assigned to functions.
F(f)
A node in the call graph for a module.
Definition: CallGraph.h:164
static cl::opt< int > ColdSyntheticCount("cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for cold functions."))
Initial synthetic count assigned to cold functions.
InstrTy * getInstruction() const
Definition: CallSite.h:96
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount, AddCountTy AddCount)
Propgate synthetic entry counts on a callgraph CG.
Function::ProfileCount ProfileCount
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
Class to represent profile counts.
Definition: Function.h:260
Function * getFunction() const
Returns the function that this call graph node represents.
Definition: CallGraph.h:187
Analysis pass which computes BlockFrequencyInfo.
Module.h This file contains the declarations for the Module class.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
std::pair< WeakTrackingVH, CallGraphNode * > CallRecord
A pair of the calling instruction (a call or invoke) and the call graph node being called...
Definition: CallGraph.h:168
#define N
static void initializeCounts(Module &M, function_ref< void(Function *, uint64_t)> SetCount)
static cl::opt< int > InlineSyntheticCount("inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for inline functions."))
Initial synthetic count assigned to inline functions.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ScaledNumber< uint64_t > Scaled64
A container for analyses that lazily runs them and caches their results.
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1045
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:275