LLVM 19.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/IR/MDBuilder.h"
28#include "llvm/IR/PassManager.h"
30#include "llvm/IR/Value.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/Error.h"
40#include <cassert>
41#include <cstdint>
42#include <memory>
43#include <string>
44#include <utility>
45#include <vector>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "pgo-icall-prom"
50
51STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
52STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
53
54// Command line option to disable indirect-call promotion with the default as
55// false. This is for debug purpose.
56static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
57 cl::desc("Disable indirect call promotion"));
58
59// Set the cutoff value for the promotion. If the value is other than 0, we
60// stop the transformation once the total number of promotions equals the cutoff
61// value.
62// For debug use only.
64 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
65 cl::desc("Max number of promotions for this compilation"));
66
67// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
68// For debug use only.
70 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
71 cl::desc("Skip Callsite up to this number for this compilation"));
72
73// Set if the pass is called in LTO optimization. The difference for LTO mode
74// is the pass won't prefix the source module name to the internal linkage
75// symbols.
76static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
77 cl::desc("Run indirect-call promotion in LTO "
78 "mode"));
79
80// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
81// mode is it will add prof metadatato the created direct call.
82static cl::opt<bool>
83 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
84 cl::desc("Run indirect-call promotion in SamplePGO mode"));
85
86// If the option is set to true, only call instructions will be considered for
87// transformation -- invoke instructions will be ignored.
88static cl::opt<bool>
89 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
90 cl::desc("Run indirect-call promotion for call instructions "
91 "only"));
92
93// If the option is set to true, only invoke instructions will be considered for
94// transformation -- call instructions will be ignored.
95static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
97 cl::desc("Run indirect-call promotion for "
98 "invoke instruction only"));
99
100// Dump the function level IR if the transformation happened in this
101// function. For debug use only.
102static cl::opt<bool>
103 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
104 cl::desc("Dump IR after transformation happens"));
105
106namespace {
107
108// Promote indirect calls to conditional direct calls, keeping track of
109// thresholds.
110class IndirectCallPromoter {
111private:
112 Function &F;
113
114 // Symtab that maps indirect call profile values to function names and
115 // defines.
116 InstrProfSymtab *const Symtab;
117
118 const bool SamplePGO;
119
121
122 // A struct that records the direct target and it's call count.
123 struct PromotionCandidate {
124 Function *const TargetFunction;
125 const uint64_t Count;
126
127 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
128 };
129
130 // Check if the indirect-call call site should be promoted. Return the number
131 // of promotions. Inst is the candidate indirect call, ValueDataRef
132 // contains the array of value profile data for profiled targets,
133 // TotalCount is the total profiled count of call executions, and
134 // NumCandidates is the number of candidate entries in ValueDataRef.
135 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
136 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
137 uint64_t TotalCount, uint32_t NumCandidates);
138
139 // Promote a list of targets for one indirect-call callsite by comparing
140 // indirect callee with functions. Returns true if there are IR
141 // transformations and false otherwise.
142 bool tryToPromoteWithFuncCmp(
143 CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
144 uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
145 uint32_t NumCandidates);
146
147public:
148 IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
150 : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
151 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
152 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
153
154 bool processFunction(ProfileSummaryInfo *PSI);
155};
156
157} // end anonymous namespace
158
159// Indirect-call promotion heuristic. The direct targets are sorted based on
160// the count. Stop at the first target that is not promoted.
161std::vector<IndirectCallPromoter::PromotionCandidate>
162IndirectCallPromoter::getPromotionCandidatesForCallSite(
163 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
164 uint64_t TotalCount, uint32_t NumCandidates) {
165 std::vector<PromotionCandidate> Ret;
166
167 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
168 << " Num_targets: " << ValueDataRef.size()
169 << " Num_candidates: " << NumCandidates << "\n");
170 NumOfPGOICallsites++;
171 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
172 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
173 return Ret;
174 }
175
176 for (uint32_t I = 0; I < NumCandidates; I++) {
177 uint64_t Count = ValueDataRef[I].Count;
178 assert(Count <= TotalCount);
179 (void)TotalCount;
180 uint64_t Target = ValueDataRef[I].Value;
181 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
182 << " Target_func: " << Target << "\n");
183
184 if (ICPInvokeOnly && isa<CallInst>(CB)) {
185 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
186 ORE.emit([&]() {
187 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
188 << " Not promote: User options";
189 });
190 break;
191 }
192 if (ICPCallOnly && isa<InvokeInst>(CB)) {
193 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
194 ORE.emit([&]() {
195 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
196 << " Not promote: User options";
197 });
198 break;
199 }
200 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
201 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
202 ORE.emit([&]() {
203 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
204 << " Not promote: Cutoff reached";
205 });
206 break;
207 }
208
209 // Don't promote if the symbol is not defined in the module. This avoids
210 // creating a reference to a symbol that doesn't exist in the module
211 // This can happen when we compile with a sample profile collected from
212 // one binary but used for another, which may have profiled targets that
213 // aren't used in the new binary. We might have a declaration initially in
214 // the case where the symbol is globally dead in the binary and removed by
215 // ThinLTO.
216 Function *TargetFunction = Symtab->getFunction(Target);
217 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
218 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
219 ORE.emit([&]() {
220 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
221 << "Cannot promote indirect call: target with md5sum "
222 << ore::NV("target md5sum", Target) << " not found";
223 });
224 break;
225 }
226
227 const char *Reason = nullptr;
228 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
229 using namespace ore;
230
231 ORE.emit([&]() {
232 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
233 << "Cannot promote indirect call to "
234 << NV("TargetFunction", TargetFunction) << " with count of "
235 << NV("Count", Count) << ": " << Reason;
236 });
237 break;
238 }
239
240 Ret.push_back(PromotionCandidate(TargetFunction, Count));
241 TotalCount -= Count;
242 }
243 return Ret;
244}
245
247 uint64_t Count, uint64_t TotalCount,
248 bool AttachProfToDirectCall,
250
251 uint64_t ElseCount = TotalCount - Count;
252 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
253 uint64_t Scale = calculateCountScale(MaxCount);
254 MDBuilder MDB(CB.getContext());
255 MDNode *BranchWeights = MDB.createBranchWeights(
256 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
257
258 CallBase &NewInst =
259 promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
260
261 if (AttachProfToDirectCall) {
262 setBranchWeights(NewInst, {static_cast<uint32_t>(Count)});
263 }
264
265 using namespace ore;
266
267 if (ORE)
268 ORE->emit([&]() {
269 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
270 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
271 << " with count " << NV("Count", Count) << " out of "
272 << NV("TotalCount", TotalCount);
273 });
274 return NewInst;
275}
276
277// Promote indirect-call to conditional direct-call for one callsite.
278bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
279 CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
280 uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
281 uint32_t NumCandidates) {
282 uint32_t NumPromoted = 0;
283
284 for (const auto &C : Candidates) {
285 uint64_t Count = C.Count;
286 pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
287 &ORE);
288 assert(TotalCount >= Count);
289 TotalCount -= Count;
290 NumOfPGOICallPromotion++;
291 NumPromoted++;
292 }
293
294 if (NumPromoted == 0)
295 return false;
296
297 // Adjust the MD.prof metadata. First delete the old one.
298 CB.setMetadata(LLVMContext::MD_prof, nullptr);
299
300 assert(NumPromoted <= ICallProfDataRef.size() &&
301 "Number of promoted functions should not be greater than the number "
302 "of values in profile metadata");
303 // Annotate the remaining value profiles if counter is not zero.
304 if (TotalCount != 0)
305 annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
306 TotalCount, IPVK_IndirectCallTarget, NumCandidates);
307
308 return true;
309}
310
311// Traverse all the indirect-call callsite and get the value profile
312// annotation to perform indirect-call promotion.
313bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
314 bool Changed = false;
315 ICallPromotionAnalysis ICallAnalysis;
316 for (auto *CB : findIndirectCalls(F)) {
317 uint32_t NumVals, NumCandidates;
318 uint64_t TotalCount;
319 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
320 CB, NumVals, TotalCount, NumCandidates);
321 if (!NumCandidates ||
322 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
323 continue;
324 auto PromotionCandidates = getPromotionCandidatesForCallSite(
325 *CB, ICallProfDataRef, TotalCount, NumCandidates);
326 Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
327 ICallProfDataRef, NumCandidates);
328 }
329 return Changed;
330}
331
332// A wrapper function that does the actual work.
333static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
334 bool SamplePGO, ModuleAnalysisManager &MAM) {
335 if (DisableICP)
336 return false;
337 InstrProfSymtab Symtab;
338 if (Error E = Symtab.create(M, InLTO)) {
339 std::string SymtabFailure = toString(std::move(E));
340 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
341 return false;
342 }
343 bool Changed = false;
344 for (auto &F : M) {
345 if (F.isDeclaration() || F.hasOptNone())
346 continue;
347
348 auto &FAM =
351
352 IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
353 bool FuncChanged = CallPromoter.processFunction(PSI);
354 if (ICPDUMPAFTER && FuncChanged) {
355 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
356 LLVM_DEBUG(dbgs() << "\n");
357 }
358 Changed |= FuncChanged;
359 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
360 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
361 break;
362 }
363 }
364 return Changed;
365}
366
370
371 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
372 SamplePGO | ICPSamplePGOMode, MAM))
373 return PreservedAnalyses::all();
374
376}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1467
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
const Function & getFunction() const
Definition: Function.h:161
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:281
ArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:631
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition: InstrProf.h:451
Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Target - Wrapper for Target specific information.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1229
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.