LLVM 17.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/IR/MDBuilder.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/IR/Value.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/Error.h"
39#include <cassert>
40#include <cstdint>
41#include <memory>
42#include <string>
43#include <utility>
44#include <vector>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "pgo-icall-prom"
49
50STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
51STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
52
53// Command line option to disable indirect-call promotion with the default as
54// false. This is for debug purpose.
55static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
56 cl::desc("Disable indirect call promotion"));
57
58// Set the cutoff value for the promotion. If the value is other than 0, we
59// stop the transformation once the total number of promotions equals the cutoff
60// value.
61// For debug use only.
63 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
64 cl::desc("Max number of promotions for this compilation"));
65
66// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
67// For debug use only.
69 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
70 cl::desc("Skip Callsite up to this number for this compilation"));
71
72// Set if the pass is called in LTO optimization. The difference for LTO mode
73// is the pass won't prefix the source module name to the internal linkage
74// symbols.
75static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
76 cl::desc("Run indirect-call promotion in LTO "
77 "mode"));
78
79// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
80// mode is it will add prof metadatato the created direct call.
81static cl::opt<bool>
82 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
83 cl::desc("Run indirect-call promotion in SamplePGO mode"));
84
85// If the option is set to true, only call instructions will be considered for
86// transformation -- invoke instructions will be ignored.
87static cl::opt<bool>
88 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
89 cl::desc("Run indirect-call promotion for call instructions "
90 "only"));
91
92// If the option is set to true, only invoke instructions will be considered for
93// transformation -- call instructions will be ignored.
94static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
96 cl::desc("Run indirect-call promotion for "
97 "invoke instruction only"));
98
99// Dump the function level IR if the transformation happened in this
100// function. For debug use only.
101static cl::opt<bool>
102 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
103 cl::desc("Dump IR after transformation happens"));
104
105namespace {
106
107// The class for main data structure to promote indirect calls to conditional
108// direct calls.
109class ICallPromotionFunc {
110private:
111 Function &F;
112 Module *M;
113
114 // Symtab that maps indirect call profile values to function names and
115 // defines.
116 InstrProfSymtab *Symtab;
117
118 bool SamplePGO;
119
121
122 // A struct that records the direct target and it's call count.
123 struct PromotionCandidate {
124 Function *TargetFunction;
125 uint64_t Count;
126
127 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
128 };
129
130 // Check if the indirect-call call site should be promoted. Return the number
131 // of promotions. Inst is the candidate indirect call, ValueDataRef
132 // contains the array of value profile data for profiled targets,
133 // TotalCount is the total profiled count of call executions, and
134 // NumCandidates is the number of candidate entries in ValueDataRef.
135 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
136 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
137 uint64_t TotalCount, uint32_t NumCandidates);
138
139 // Promote a list of targets for one indirect-call callsite. Return
140 // the number of promotions.
141 uint32_t tryToPromote(CallBase &CB,
142 const std::vector<PromotionCandidate> &Candidates,
143 uint64_t &TotalCount);
144
145public:
146 ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
147 bool SamplePGO, OptimizationRemarkEmitter &ORE)
148 : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
149 ICallPromotionFunc(const ICallPromotionFunc &) = delete;
150 ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete;
151
152 bool processFunction(ProfileSummaryInfo *PSI);
153};
154
155} // end anonymous namespace
156
157// Indirect-call promotion heuristic. The direct targets are sorted based on
158// the count. Stop at the first target that is not promoted.
159std::vector<ICallPromotionFunc::PromotionCandidate>
160ICallPromotionFunc::getPromotionCandidatesForCallSite(
161 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
162 uint64_t TotalCount, uint32_t NumCandidates) {
163 std::vector<PromotionCandidate> Ret;
164
165 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
166 << " Num_targets: " << ValueDataRef.size()
167 << " Num_candidates: " << NumCandidates << "\n");
168 NumOfPGOICallsites++;
169 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
170 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
171 return Ret;
172 }
173
174 for (uint32_t I = 0; I < NumCandidates; I++) {
175 uint64_t Count = ValueDataRef[I].Count;
176 assert(Count <= TotalCount);
177 (void)TotalCount;
178 uint64_t Target = ValueDataRef[I].Value;
179 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
180 << " Target_func: " << Target << "\n");
181
182 if (ICPInvokeOnly && isa<CallInst>(CB)) {
183 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
184 ORE.emit([&]() {
185 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
186 << " Not promote: User options";
187 });
188 break;
189 }
190 if (ICPCallOnly && isa<InvokeInst>(CB)) {
191 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
192 ORE.emit([&]() {
193 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
194 << " Not promote: User options";
195 });
196 break;
197 }
198 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
199 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
200 ORE.emit([&]() {
201 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
202 << " Not promote: Cutoff reached";
203 });
204 break;
205 }
206
207 // Don't promote if the symbol is not defined in the module. This avoids
208 // creating a reference to a symbol that doesn't exist in the module
209 // This can happen when we compile with a sample profile collected from
210 // one binary but used for another, which may have profiled targets that
211 // aren't used in the new binary. We might have a declaration initially in
212 // the case where the symbol is globally dead in the binary and removed by
213 // ThinLTO.
214 Function *TargetFunction = Symtab->getFunction(Target);
215 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
216 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
217 ORE.emit([&]() {
218 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
219 << "Cannot promote indirect call: target with md5sum "
220 << ore::NV("target md5sum", Target) << " not found";
221 });
222 break;
223 }
224
225 const char *Reason = nullptr;
226 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
227 using namespace ore;
228
229 ORE.emit([&]() {
230 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
231 << "Cannot promote indirect call to "
232 << NV("TargetFunction", TargetFunction) << " with count of "
233 << NV("Count", Count) << ": " << Reason;
234 });
235 break;
236 }
237
238 Ret.push_back(PromotionCandidate(TargetFunction, Count));
239 TotalCount -= Count;
240 }
241 return Ret;
242}
243
245 uint64_t Count, uint64_t TotalCount,
246 bool AttachProfToDirectCall,
248
249 uint64_t ElseCount = TotalCount - Count;
250 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
251 uint64_t Scale = calculateCountScale(MaxCount);
252 MDBuilder MDB(CB.getContext());
253 MDNode *BranchWeights = MDB.createBranchWeights(
254 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
255
256 CallBase &NewInst =
257 promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
258
259 if (AttachProfToDirectCall) {
260 MDBuilder MDB(NewInst.getContext());
261 NewInst.setMetadata(
262 LLVMContext::MD_prof,
263 MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
264 }
265
266 using namespace ore;
267
268 if (ORE)
269 ORE->emit([&]() {
270 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
271 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
272 << " with count " << NV("Count", Count) << " out of "
273 << NV("TotalCount", TotalCount);
274 });
275 return NewInst;
276}
277
278// Promote indirect-call to conditional direct-call for one callsite.
279uint32_t ICallPromotionFunc::tryToPromote(
280 CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
281 uint64_t &TotalCount) {
282 uint32_t NumPromoted = 0;
283
284 for (const auto &C : Candidates) {
285 uint64_t Count = C.Count;
286 pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
287 &ORE);
288 assert(TotalCount >= Count);
289 TotalCount -= Count;
290 NumOfPGOICallPromotion++;
291 NumPromoted++;
292 }
293 return NumPromoted;
294}
295
296// Traverse all the indirect-call callsite and get the value profile
297// annotation to perform indirect-call promotion.
298bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
299 bool Changed = false;
300 ICallPromotionAnalysis ICallAnalysis;
301 for (auto *CB : findIndirectCalls(F)) {
302 uint32_t NumVals, NumCandidates;
303 uint64_t TotalCount;
304 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
305 CB, NumVals, TotalCount, NumCandidates);
306 if (!NumCandidates ||
307 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
308 continue;
309 auto PromotionCandidates = getPromotionCandidatesForCallSite(
310 *CB, ICallProfDataRef, TotalCount, NumCandidates);
311 uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount);
312 if (NumPromoted == 0)
313 continue;
314
315 Changed = true;
316 // Adjust the MD.prof metadata. First delete the old one.
317 CB->setMetadata(LLVMContext::MD_prof, nullptr);
318 // If all promoted, we don't need the MD.prof metadata.
319 if (TotalCount == 0 || NumPromoted == NumVals)
320 continue;
321 // Otherwise we need update with the un-promoted records back.
322 annotateValueSite(*M, *CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
323 IPVK_IndirectCallTarget, NumCandidates);
324 }
325 return Changed;
326}
327
328// A wrapper function that does the actual work.
330 bool InLTO, bool SamplePGO,
331 ModuleAnalysisManager *AM = nullptr) {
332 if (DisableICP)
333 return false;
334 InstrProfSymtab Symtab;
335 if (Error E = Symtab.create(M, InLTO)) {
336 std::string SymtabFailure = toString(std::move(E));
337 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
338 return false;
339 }
340 bool Changed = false;
341 for (auto &F : M) {
342 if (F.isDeclaration() || F.hasOptNone())
343 continue;
344
345 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
347 if (AM) {
348 auto &FAM =
351 } else {
352 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
353 ORE = OwnedORE.get();
354 }
355
356 ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE);
357 bool FuncChanged = ICallPromotion.processFunction(PSI);
358 if (ICPDUMPAFTER && FuncChanged) {
359 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
360 LLVM_DEBUG(dbgs() << "\n");
361 }
362 Changed |= FuncChanged;
363 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
364 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
365 break;
366 }
367 }
368 return Changed;
369}
370
374
375 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
376 SamplePGO | ICPSamplePGOMode, &AM))
377 return PreservedAnalyses::all();
378
380}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager *AM=nullptr)
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
const Function & getFunction() const
Definition: Function.h:134
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:275
ArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
A symbol table used for function PGO name look-up with keys (such as pointers, md5hash values) to the...
Definition: InstrProf.h:446
Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:943
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Target - Wrapper for Target specific information.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1018
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.