LLVM 18.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/IR/MDBuilder.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/IR/Value.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/Error.h"
39#include <cassert>
40#include <cstdint>
41#include <memory>
42#include <string>
43#include <utility>
44#include <vector>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "pgo-icall-prom"
49
50STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
51STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
52
53// Command line option to disable indirect-call promotion with the default as
54// false. This is for debug purpose.
55static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
56 cl::desc("Disable indirect call promotion"));
57
58// Set the cutoff value for the promotion. If the value is other than 0, we
59// stop the transformation once the total number of promotions equals the cutoff
60// value.
61// For debug use only.
63 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
64 cl::desc("Max number of promotions for this compilation"));
65
66// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
67// For debug use only.
69 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
70 cl::desc("Skip Callsite up to this number for this compilation"));
71
72// Set if the pass is called in LTO optimization. The difference for LTO mode
73// is the pass won't prefix the source module name to the internal linkage
74// symbols.
75static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
76 cl::desc("Run indirect-call promotion in LTO "
77 "mode"));
78
79// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
80// mode is it will add prof metadatato the created direct call.
81static cl::opt<bool>
82 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
83 cl::desc("Run indirect-call promotion in SamplePGO mode"));
84
85// If the option is set to true, only call instructions will be considered for
86// transformation -- invoke instructions will be ignored.
87static cl::opt<bool>
88 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
89 cl::desc("Run indirect-call promotion for call instructions "
90 "only"));
91
92// If the option is set to true, only invoke instructions will be considered for
93// transformation -- call instructions will be ignored.
94static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
96 cl::desc("Run indirect-call promotion for "
97 "invoke instruction only"));
98
99// Dump the function level IR if the transformation happened in this
100// function. For debug use only.
101static cl::opt<bool>
102 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
103 cl::desc("Dump IR after transformation happens"));
104
105namespace {
106
107// Promote indirect calls to conditional direct calls, keeping track of
108// thresholds.
109class IndirectCallPromoter {
110private:
111 Function &F;
112
113 // Symtab that maps indirect call profile values to function names and
114 // defines.
115 InstrProfSymtab *const Symtab;
116
117 const bool SamplePGO;
118
120
121 // A struct that records the direct target and it's call count.
122 struct PromotionCandidate {
123 Function *const TargetFunction;
124 const uint64_t Count;
125
126 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
127 };
128
129 // Check if the indirect-call call site should be promoted. Return the number
130 // of promotions. Inst is the candidate indirect call, ValueDataRef
131 // contains the array of value profile data for profiled targets,
132 // TotalCount is the total profiled count of call executions, and
133 // NumCandidates is the number of candidate entries in ValueDataRef.
134 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
135 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
136 uint64_t TotalCount, uint32_t NumCandidates);
137
138 // Promote a list of targets for one indirect-call callsite. Return
139 // the number of promotions.
140 uint32_t tryToPromote(CallBase &CB,
141 const std::vector<PromotionCandidate> &Candidates,
142 uint64_t &TotalCount);
143
144public:
145 IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
147 : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
148 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
149 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
150
151 bool processFunction(ProfileSummaryInfo *PSI);
152};
153
154} // end anonymous namespace
155
156// Indirect-call promotion heuristic. The direct targets are sorted based on
157// the count. Stop at the first target that is not promoted.
158std::vector<IndirectCallPromoter::PromotionCandidate>
159IndirectCallPromoter::getPromotionCandidatesForCallSite(
160 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
161 uint64_t TotalCount, uint32_t NumCandidates) {
162 std::vector<PromotionCandidate> Ret;
163
164 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
165 << " Num_targets: " << ValueDataRef.size()
166 << " Num_candidates: " << NumCandidates << "\n");
167 NumOfPGOICallsites++;
168 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
169 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
170 return Ret;
171 }
172
173 for (uint32_t I = 0; I < NumCandidates; I++) {
174 uint64_t Count = ValueDataRef[I].Count;
175 assert(Count <= TotalCount);
176 (void)TotalCount;
177 uint64_t Target = ValueDataRef[I].Value;
178 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
179 << " Target_func: " << Target << "\n");
180
181 if (ICPInvokeOnly && isa<CallInst>(CB)) {
182 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
183 ORE.emit([&]() {
184 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
185 << " Not promote: User options";
186 });
187 break;
188 }
189 if (ICPCallOnly && isa<InvokeInst>(CB)) {
190 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
191 ORE.emit([&]() {
192 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
193 << " Not promote: User options";
194 });
195 break;
196 }
197 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
198 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
199 ORE.emit([&]() {
200 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
201 << " Not promote: Cutoff reached";
202 });
203 break;
204 }
205
206 // Don't promote if the symbol is not defined in the module. This avoids
207 // creating a reference to a symbol that doesn't exist in the module
208 // This can happen when we compile with a sample profile collected from
209 // one binary but used for another, which may have profiled targets that
210 // aren't used in the new binary. We might have a declaration initially in
211 // the case where the symbol is globally dead in the binary and removed by
212 // ThinLTO.
213 Function *TargetFunction = Symtab->getFunction(Target);
214 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
215 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
216 ORE.emit([&]() {
217 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
218 << "Cannot promote indirect call: target with md5sum "
219 << ore::NV("target md5sum", Target) << " not found";
220 });
221 break;
222 }
223
224 const char *Reason = nullptr;
225 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
226 using namespace ore;
227
228 ORE.emit([&]() {
229 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
230 << "Cannot promote indirect call to "
231 << NV("TargetFunction", TargetFunction) << " with count of "
232 << NV("Count", Count) << ": " << Reason;
233 });
234 break;
235 }
236
237 Ret.push_back(PromotionCandidate(TargetFunction, Count));
238 TotalCount -= Count;
239 }
240 return Ret;
241}
242
244 uint64_t Count, uint64_t TotalCount,
245 bool AttachProfToDirectCall,
247
248 uint64_t ElseCount = TotalCount - Count;
249 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
250 uint64_t Scale = calculateCountScale(MaxCount);
251 MDBuilder MDB(CB.getContext());
252 MDNode *BranchWeights = MDB.createBranchWeights(
253 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
254
255 CallBase &NewInst =
256 promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
257
258 if (AttachProfToDirectCall) {
259 MDBuilder MDB(NewInst.getContext());
260 NewInst.setMetadata(
261 LLVMContext::MD_prof,
262 MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
263 }
264
265 using namespace ore;
266
267 if (ORE)
268 ORE->emit([&]() {
269 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
270 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
271 << " with count " << NV("Count", Count) << " out of "
272 << NV("TotalCount", TotalCount);
273 });
274 return NewInst;
275}
276
277// Promote indirect-call to conditional direct-call for one callsite.
278uint32_t IndirectCallPromoter::tryToPromote(
279 CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
280 uint64_t &TotalCount) {
281 uint32_t NumPromoted = 0;
282
283 for (const auto &C : Candidates) {
284 uint64_t Count = C.Count;
285 pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
286 &ORE);
287 assert(TotalCount >= Count);
288 TotalCount -= Count;
289 NumOfPGOICallPromotion++;
290 NumPromoted++;
291 }
292 return NumPromoted;
293}
294
295// Traverse all the indirect-call callsite and get the value profile
296// annotation to perform indirect-call promotion.
297bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
298 bool Changed = false;
299 ICallPromotionAnalysis ICallAnalysis;
300 for (auto *CB : findIndirectCalls(F)) {
301 uint32_t NumVals, NumCandidates;
302 uint64_t TotalCount;
303 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
304 CB, NumVals, TotalCount, NumCandidates);
305 if (!NumCandidates ||
306 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
307 continue;
308 auto PromotionCandidates = getPromotionCandidatesForCallSite(
309 *CB, ICallProfDataRef, TotalCount, NumCandidates);
310 uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount);
311 if (NumPromoted == 0)
312 continue;
313
314 Changed = true;
315 // Adjust the MD.prof metadata. First delete the old one.
316 CB->setMetadata(LLVMContext::MD_prof, nullptr);
317 // If all promoted, we don't need the MD.prof metadata.
318 if (TotalCount == 0 || NumPromoted == NumVals)
319 continue;
320 // Otherwise we need update with the un-promoted records back.
321 annotateValueSite(*F.getParent(), *CB, ICallProfDataRef.slice(NumPromoted),
322 TotalCount, IPVK_IndirectCallTarget, NumCandidates);
323 }
324 return Changed;
325}
326
327// A wrapper function that does the actual work.
328static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
329 bool SamplePGO, ModuleAnalysisManager &MAM) {
330 if (DisableICP)
331 return false;
332 InstrProfSymtab Symtab;
333 if (Error E = Symtab.create(M, InLTO)) {
334 std::string SymtabFailure = toString(std::move(E));
335 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
336 return false;
337 }
338 bool Changed = false;
339 for (auto &F : M) {
340 if (F.isDeclaration() || F.hasOptNone())
341 continue;
342
343 auto &FAM =
346
347 IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
348 bool FuncChanged = CallPromoter.processFunction(PSI);
349 if (ICPDUMPAFTER && FuncChanged) {
350 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
351 LLVM_DEBUG(dbgs() << "\n");
352 }
353 Changed |= FuncChanged;
354 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
355 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
356 break;
357 }
358 }
359 return Changed;
360}
361
365
366 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
367 SamplePGO | ICPSamplePGOMode, MAM))
368 return PreservedAnalyses::all();
369
371}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
const Function & getFunction() const
Definition: Function.h:136
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:273
ArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
A symbol table used for function PGO name look-up with keys (such as pointers, md5hash values) to the...
Definition: InstrProf.h:423
Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:950
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Target - Wrapper for Target specific information.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1118
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.