LLVM 23.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/PassManager.h"
33#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/Error.h"
43#include <cassert>
44#include <cstdint>
45#include <set>
46#include <string>
47#include <unordered_map>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "pgo-icall-prom"
54
55STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
56STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
57
58namespace llvm {
60
62} // namespace llvm
63
64// Command line option to disable indirect-call promotion with the default as
65// false. This is for debug purpose.
66static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
67 cl::desc("Disable indirect call promotion"));
68
69// Set the cutoff value for the promotion. If the value is other than 0, we
70// stop the transformation once the total number of promotions equals the cutoff
71// value.
72// For debug use only.
74 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
75 cl::desc("Max number of promotions for this compilation"));
76
77// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
78// For debug use only.
80 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
81 cl::desc("Skip Callsite up to this number for this compilation"));
82
83// ICP the candidate function even when only a declaration is present.
85 "icp-allow-decls", cl::init(false), cl::Hidden,
86 cl::desc("Promote the target candidate even when the definition "
87 " is not available"));
88
89// ICP hot candidate functions only. When setting to false, non-cold functions
90// (warm functions) can also be promoted.
91static cl::opt<bool>
92 ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
93 cl::desc("Promote the target candidate only if it is a "
94 "hot function. Otherwise, warm functions can "
95 "also be promoted"));
96
97// If one target cannot be ICP'd, proceed with the remaining targets instead
98// of exiting the callsite.
100 "icp-allow-candidate-skip", cl::init(false), cl::Hidden,
101 cl::desc("Continue with the remaining targets instead of exiting "
102 "when failing in a candidate"));
103
104// Set if the pass is called in LTO optimization. The difference for LTO mode
105// is the pass won't prefix the source module name to the internal linkage
106// symbols.
107static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
108 cl::desc("Run indirect-call promotion in LTO "
109 "mode"));
110
111// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
112// mode is it will add prof metadatato the created direct call.
113static cl::opt<bool>
114 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
115 cl::desc("Run indirect-call promotion in SamplePGO mode"));
116
117// If the option is set to true, only call instructions will be considered for
118// transformation -- invoke instructions will be ignored.
119static cl::opt<bool>
120 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
121 cl::desc("Run indirect-call promotion for call instructions "
122 "only"));
123
124// If the option is set to true, only invoke instructions will be considered for
125// transformation -- call instructions will be ignored.
126static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
128 cl::desc("Run indirect-call promotion for "
129 "invoke instruction only"));
130
131// Dump the function level IR if the transformation happened in this
132// function. For debug use only.
133static cl::opt<bool>
134 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
135 cl::desc("Dump IR after transformation happens"));
136
137// Indirect call promotion pass will fall back to function-based comparison if
138// vtable-count / function-count is smaller than this threshold.
140 "icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden,
141 cl::desc("The percentage threshold of vtable-count / function-count for "
142 "cost-benefit analysis."));
143
144// Although comparing vtables can save a vtable load, we may need to compare
145// vtable pointer with multiple vtable address points due to class inheritance.
146// Comparing with multiple vtables inserts additional instructions on hot code
147// path, and doing so for an earlier candidate delays the comparisons for later
148// candidates. For the last candidate, only the fallback path is affected.
149// We allow multiple vtable comparison for the last function candidate and use
150// the option below to cap the number of vtables.
152 "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
153 cl::desc("The maximum number of vtable for the last candidate."));
154
156 "icp-ignored-base-types", cl::Hidden,
157 cl::desc(
158 "A list of mangled vtable type info names. Classes specified by the "
159 "type info names and their derived ones will not be vtable-ICP'ed. "
160 "Useful when the profiled types and actual types in the optimized "
161 "binary could be different due to profiling limitations. Type info "
162 "names are those string literals used in LLVM type metadata"));
163
164namespace {
165
166// The key is a vtable global variable, and the value is a map.
167// In the inner map, the key represents address point offsets and the value is a
168// constant for this address point.
169using VTableAddressPointOffsetValMap =
171
172// A struct to collect type information for a virtual call site.
173struct VirtualCallSiteInfo {
174 // The offset from the address point to virtual function in the vtable.
175 uint64_t FunctionOffset;
176 // The instruction that computes the address point of vtable.
177 Instruction *VPtr;
178 // The compatible type used in LLVM type intrinsics.
179 StringRef CompatibleTypeStr;
180};
181
182// The key is a virtual call, and value is its type information.
183using VirtualCallSiteTypeInfoMap =
185
186// The key is vtable GUID, and value is its value profile count.
187using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
188
189// Return the address point offset of the given compatible type.
190//
191// Type metadata of a vtable specifies the types that can contain a pointer to
192// this vtable, for example, `Base*` can be a pointer to an derived type
193// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
194static std::optional<uint64_t>
195getAddressPointOffset(const GlobalVariable &VTableVar,
196 StringRef CompatibleType) {
198 VTableVar.getMetadata(LLVMContext::MD_type, Types);
199
200 for (MDNode *Type : Types)
201 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
202 TypeId && TypeId->getString() == CompatibleType)
203 return cast<ConstantInt>(
204 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
205 ->getZExtValue();
206
207 return std::nullopt;
208}
209
210// Return a constant representing the vtable's address point specified by the
211// offset.
212static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
213 uint32_t AddressPointOffset) {
214 Module &M = *VTable->getParent();
215 LLVMContext &Context = M.getContext();
216 assert(AddressPointOffset < VTable->getGlobalSize(M.getDataLayout()) &&
217 "Out-of-bound access");
218
220 Type::getInt8Ty(Context), VTable,
221 llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
222}
223
224// Return the basic block in which Use `U` is used via its `UserInst`.
225static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
226 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
227 return PN->getIncomingBlock(U);
228
229 return UserInst->getParent();
230}
231
232// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
233// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
234// is the sole predecessor of `DestBB` and `DestBB` is dominated by
235// `Inst->getParent()`.
236static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
237 // 'BB' is used only by assert.
238 [[maybe_unused]] BasicBlock *BB = Inst->getParent();
239
240 assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
241 DestBB->getUniquePredecessor() == BB &&
242 "Guaranteed by ICP transformation");
243
244 BasicBlock *UserBB = nullptr;
245 for (Use &Use : Inst->uses()) {
246 User *User = Use.getUser();
247 // Do checked cast since IR verifier guarantees that the user of an
248 // instruction must be an instruction. See `Verifier::visitInstruction`.
250 // We can sink debug or pseudo instructions together with Inst.
251 if (UserInst->isDebugOrPseudoInst())
252 continue;
253 UserBB = getUserBasicBlock(Use, UserInst);
254 // Do not sink if Inst is used in a basic block that is not DestBB.
255 // TODO: Sink to the common dominator of all user blocks.
256 if (UserBB != DestBB)
257 return false;
258 }
259 return UserBB != nullptr;
260}
261
262// For the virtual call dispatch sequence, try to sink vtable load instructions
263// to the cold indirect call fallback.
264// FIXME: Move the sink eligibility check below to a utility function in
265// Transforms/Utils/ directory.
266static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
267 if (!isDestBBSuitableForSink(I, DestBlock))
268 return false;
269
270 // Do not move control-flow-involving, volatile loads, vaarg, alloca
271 // instructions, etc.
272 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
274 return false;
275
276 // Do not sink convergent call instructions.
277 if (const auto *C = dyn_cast<CallBase>(I))
278 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
279 return false;
280
281 // Do not move an instruction that may write to memory.
282 if (I->mayWriteToMemory())
283 return false;
284
285 // We can only sink load instructions if there is nothing between the load and
286 // the end of block that could change the value.
287 if (I->mayReadFromMemory()) {
288 // We already know that SrcBlock is the unique predecessor of DestBlock.
289 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
290 E = I->getParent()->end();
291 Scan != E; ++Scan) {
292 // Note analysis analysis can tell whether two pointers can point to the
293 // same object in memory or not thereby find further opportunities to
294 // sink.
295 if (Scan->mayWriteToMemory())
296 return false;
297 }
298 }
299
300 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
301 I->moveBefore(*DestBlock, InsertPos);
302
303 // TODO: Sink debug intrinsic users of I to 'DestBlock'.
304 // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
305 // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
306 // the core logic to do this.
307 return true;
308}
309
310// Try to sink instructions after VPtr to the indirect call fallback.
311// Return the number of sunk IR instructions.
312static int tryToSinkInstructions(BasicBlock *OriginalBB,
313 BasicBlock *IndirectCallBB) {
314 int SinkCount = 0;
315 // Do not sink across a critical edge for simplicity.
316 if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
317 return SinkCount;
318 // Sink all eligible instructions in OriginalBB in reverse order.
319 for (Instruction &I :
321 if (tryToSinkInstruction(&I, IndirectCallBB))
322 SinkCount++;
323
324 return SinkCount;
325}
326
327// Promote indirect calls to conditional direct calls, keeping track of
328// thresholds.
329class IndirectCallPromoter {
330private:
331 Function &F;
332 Module &M;
333
334 // Symtab that maps indirect call profile values to function names and
335 // defines.
336 InstrProfSymtab *const Symtab;
337
338 const bool SamplePGO;
339
340 // A map from a virtual call to its type information.
341 const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
342
343 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
344
345 OptimizationRemarkEmitter &ORE;
346
347 const DenseSet<StringRef> &IgnoredBaseTypes;
348
349 // A struct that records the direct target and it's call count.
350 struct PromotionCandidate {
351 Function *const TargetFunction;
352 const uint64_t Count;
353 const uint32_t Index;
354
355 // The following fields only exists for promotion candidates with vtable
356 // information.
357 //
358 // Due to class inheritance, one virtual call candidate can come from
359 // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
360 // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
361 // points for comparison.
362 VTableGUIDCountsMap VTableGUIDAndCounts;
363 SmallVector<Constant *> AddressPoints;
364
365 PromotionCandidate(Function *F, uint64_t C, uint32_t I)
366 : TargetFunction(F), Count(C), Index(I) {}
367 };
368
369 // Check if the indirect-call call site should be promoted. Return the number
370 // of promotions. Inst is the candidate indirect call, ValueDataRef
371 // contains the array of value profile data for profiled targets,
372 // TotalCount is the total profiled count of call executions, and
373 // NumCandidates is the number of candidate entries in ValueDataRef.
374 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
375 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
376 uint64_t TotalCount, uint32_t NumCandidates);
377
378 // Promote a list of targets for one indirect-call callsite by comparing
379 // indirect callee with functions. Return true if there are IR
380 // transformations and false otherwise.
381 bool tryToPromoteWithFuncCmp(
382 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
383 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
384 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
385
386 // Promote a list of targets for one indirect call by comparing vtables with
387 // functions. Return true if there are IR transformations and false
388 // otherwise.
389 bool tryToPromoteWithVTableCmp(
390 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
391 uint64_t TotalFuncCount, uint32_t NumCandidates,
393 VTableGUIDCountsMap &VTableGUIDCounts);
394
395 // Return true if it's profitable to compare vtables for the callsite.
396 bool isProfitableToCompareVTables(const CallBase &CB,
398
399 // Return true if the vtable corresponding to VTableGUID should be skipped
400 // for vtable-based comparison.
401 bool shouldSkipVTable(uint64_t VTableGUID);
402
403 // Given an indirect callsite and the list of function candidates, compute
404 // the following vtable information in output parameters and return vtable
405 // pointer if type profiles exist.
406 // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
407 // metadata attached on the vtable pointer.
408 // - For each function candidate, finds out the vtables from which it gets
409 // called and stores the <vtable-guid, count> in promotion candidate.
410 Instruction *computeVTableInfos(const CallBase *CB,
411 VTableGUIDCountsMap &VTableGUIDCounts,
412 std::vector<PromotionCandidate> &Candidates);
413
414 Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
415 uint64_t AddressPointOffset);
416
417 void updateFuncValueProfiles(CallBase &CB,
419 uint64_t Sum, uint32_t MaxMDCount);
420
421 void updateVPtrValueProfiles(Instruction *VPtr,
422 VTableGUIDCountsMap &VTableGUIDCounts);
423
424 bool isValidTarget(uint64_t, Function *, const CallBase &, uint64_t);
425
426public:
427 IndirectCallPromoter(
428 Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
429 const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
430 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
431 const DenseSet<StringRef> &IgnoredBaseTypes,
432 OptimizationRemarkEmitter &ORE)
433 : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
434 VirtualCSInfo(VirtualCSInfo),
435 VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
436 IgnoredBaseTypes(IgnoredBaseTypes) {}
437 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
438 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
439
440 bool processFunction(ProfileSummaryInfo *PSI);
441};
442
443} // end anonymous namespace
444
445bool IndirectCallPromoter::isValidTarget(uint64_t Target,
446 Function *TargetFunction,
447 const CallBase &CB, uint64_t Count) {
448 // Don't promote if the symbol is not defined in the module. This avoids
449 // creating a reference to a symbol that doesn't exist in the module
450 // This can happen when we compile with a sample profile collected from
451 // one binary but used for another, which may have profiled targets that
452 // aren't used in the new binary. We might have a declaration initially in
453 // the case where the symbol is globally dead in the binary and removed by
454 // ThinLTO.
455 using namespace ore;
456 if (TargetFunction == nullptr) {
457 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
458 ORE.emit([&]() {
459 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
460 << "Cannot promote indirect call: target with md5sum "
461 << NV("target md5sum", Target)
462 << " not found (count=" << NV("Count", Count) << ")";
463 });
464 return false;
465 }
466 if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
467 LLVM_DEBUG(dbgs() << " Not promote: target definition is not available\n");
468 ORE.emit([&]() {
469 return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
470 << "Do not promote indirect call: target with md5sum "
471 << NV("target md5sum", Target)
472 << " definition not available (count=" << ore::NV("Count", Count)
473 << ")";
474 });
475 return false;
476 }
477
478 const char *Reason = nullptr;
479 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
480
481 ORE.emit([&]() {
482 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
483 << "Cannot promote indirect call to "
484 << NV("TargetFunction", TargetFunction)
485 << " (count=" << NV("Count", Count) << "): " << Reason;
486 });
487 return false;
488 }
489 return true;
490}
491
492// Indirect-call promotion heuristic. The direct targets are sorted based on
493// the count. Stop at the first target that is not promoted.
494std::vector<IndirectCallPromoter::PromotionCandidate>
495IndirectCallPromoter::getPromotionCandidatesForCallSite(
496 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
497 uint64_t TotalCount, uint32_t NumCandidates) {
498 std::vector<PromotionCandidate> Ret;
499
500 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
501 << " Num_targets: " << ValueDataRef.size()
502 << " Num_candidates: " << NumCandidates << "\n");
503 NumOfPGOICallsites++;
504 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
505 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
506 return Ret;
507 }
508
509 for (uint32_t I = 0; I < NumCandidates; I++) {
510 uint64_t Count = ValueDataRef[I].Count;
511 assert(Count <= TotalCount);
512 (void)TotalCount;
513 uint64_t Target = ValueDataRef[I].Value;
514 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
515 << " Target_func: " << Target << "\n");
516
517 if (ICPInvokeOnly && isa<CallInst>(CB)) {
518 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
519 ORE.emit([&]() {
520 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
521 << " Not promote: User options";
522 });
523 break;
524 }
525 if (ICPCallOnly && isa<InvokeInst>(CB)) {
526 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
527 ORE.emit([&]() {
528 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
529 << " Not promote: User options";
530 });
531 break;
532 }
533 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
534 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
535 ORE.emit([&]() {
536 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
537 << " Not promote: Cutoff reached";
538 });
539 break;
540 }
541
542 Function *TargetFunction = Symtab->getFunction(Target);
543 if (!isValidTarget(Target, TargetFunction, CB, Count)) {
545 continue;
546 else
547 break;
548 }
549
550 Ret.push_back(PromotionCandidate(TargetFunction, Count, I));
551 TotalCount -= Count;
552 }
553 return Ret;
554}
555
556Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
557 GlobalVariable *GV, uint64_t AddressPointOffset) {
558 auto [Iter, Inserted] =
559 VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
560 if (Inserted)
561 Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
562 return Iter->second;
563}
564
565Instruction *IndirectCallPromoter::computeVTableInfos(
566 const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
567 std::vector<PromotionCandidate> &Candidates) {
569 return nullptr;
570
571 // Take the following code sequence as an example, here is how the code works
572 // @vtable1 = {[n x ptr] [... ptr @func1]}
573 // @vtable2 = {[m x ptr] [... ptr @func2]}
574 //
575 // %vptr = load ptr, ptr %d, !prof !0
576 // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
577 // tail call void @llvm.assume(i1 %0)
578 // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
579 // %1 = load ptr, ptr %vfn
580 // call void %1(ptr %d), !prof !1
581 //
582 // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
583 // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
584 //
585 // Step 1. Find out the %vptr instruction for indirect call and use its !prof
586 // to populate `GUIDCountsMap`.
587 // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
588 // make vtable definitions visible across modules.
589 // Step 3. Compute the byte offset of the virtual call, by adding vtable
590 // address point offset and function's offset relative to vtable address
591 // point. For each function candidate, this step tells us the vtable from
592 // which it comes from, and the vtable address point to compare %vptr with.
593
594 // Only virtual calls have virtual call site info.
595 auto Iter = VirtualCSInfo.find(CB);
596 if (Iter == VirtualCSInfo.end())
597 return nullptr;
598
599 LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
600 << NumOfPGOICallsites << "\n");
601
602 const auto &VirtualCallInfo = Iter->second;
603 Instruction *VPtr = VirtualCallInfo.VPtr;
604
605 SmallDenseMap<Function *, int, 4> CalleeIndexMap;
606 for (size_t I = 0; I < Candidates.size(); I++)
607 CalleeIndexMap[Candidates[I].TargetFunction] = I;
608
609 uint64_t TotalVTableCount = 0;
610 auto VTableValueDataArray =
611 getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget,
612 MaxNumVTableAnnotations, TotalVTableCount);
613 if (VTableValueDataArray.empty())
614 return VPtr;
615
616 // Compute the functions and counts from by each vtable.
617 for (const auto &V : VTableValueDataArray) {
618 uint64_t VTableVal = V.Value;
619 GUIDCountsMap[VTableVal] = V.Count;
620 GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
621 if (!VTableVar) {
622 LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
623 << "; maybe the vtable isn't imported\n");
624 continue;
625 }
626
627 std::optional<uint64_t> MaybeAddressPointOffset =
628 getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
629 if (!MaybeAddressPointOffset)
630 continue;
631
632 const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
633
634 Function *Callee = nullptr;
635 std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
636 VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
637 if (!Callee)
638 continue;
639 auto CalleeIndexIter = CalleeIndexMap.find(Callee);
640 if (CalleeIndexIter == CalleeIndexMap.end())
641 continue;
642
643 auto &Candidate = Candidates[CalleeIndexIter->second];
644 // There shouldn't be duplicate GUIDs in one !prof metadata (except
645 // duplicated zeros), so assign counters directly won't cause overwrite or
646 // counter loss.
647 Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
648 Candidate.AddressPoints.push_back(
649 getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
650 }
651
652 return VPtr;
653}
654
655// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
656// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
657static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
658 uint64_t FalseWeight) {
659 MDBuilder MDB(Context);
660 uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
661 return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
662 scaleBranchCount(FalseWeight, Scale));
663}
664
666 uint64_t Count, uint64_t TotalCount,
667 bool AttachProfToDirectCall,
670 CB, DirectCallee,
671 createBranchWeights(CB.getContext(), Count, TotalCount - Count));
672
673 if (AttachProfToDirectCall)
674 setFittedBranchWeights(NewInst, {Count},
675 /*IsExpected=*/false);
676
677 using namespace ore;
678
679 if (ORE)
680 ORE->emit([&]() {
681 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
682 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
683 << " with count " << NV("Count", Count) << " out of "
684 << NV("TotalCount", TotalCount);
685 });
686 return NewInst;
687}
688
689// Promote indirect-call to conditional direct-call for one callsite.
690bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
692 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
693 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
694 uint32_t NumPromoted = 0;
695
696 for (const auto &C : Candidates) {
697 uint64_t FuncCount = C.Count;
698 pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
699 SamplePGO, &ORE);
700 assert(TotalCount >= FuncCount);
701 TotalCount -= FuncCount;
702 NumOfPGOICallPromotion++;
703 NumPromoted++;
704
705 // Update the count and this entry will be erased later.
706 ICallProfDataRef[C.Index].Count = 0;
707 if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
708 continue;
709
710 // After a virtual call candidate gets promoted, update the vtable's counts
711 // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
712 // a vtable from which the virtual call is loaded. Compute the sum and use
713 // 128-bit APInt to improve accuracy.
714 uint64_t SumVTableCount = 0;
715 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
716 SumVTableCount += VTableCount;
717
718 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
719 APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
720 APFuncCount *= VTableCount;
721 VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
722 }
723 }
724 if (NumPromoted == 0)
725 return false;
726
727 assert(NumPromoted <= ICallProfDataRef.size() &&
728 "Number of promoted functions should not be greater than the number "
729 "of values in profile metadata");
730
731 updateFuncValueProfiles(CB, ICallProfDataRef, TotalCount, NumCandidates);
732 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
733 return true;
734}
735
736void IndirectCallPromoter::updateFuncValueProfiles(
737 CallBase &CB, MutableArrayRef<InstrProfValueData> CallVDs,
738 uint64_t TotalCount, uint32_t MaxMDCount) {
739 // First clear the existing !prof.
740 CB.setMetadata(LLVMContext::MD_prof, nullptr);
741
742 // Sort value profiles by count in descending order.
743 llvm::stable_sort(CallVDs, [](const InstrProfValueData &LHS,
744 const InstrProfValueData &RHS) {
745 return LHS.Count > RHS.Count;
746 });
747 // Drop the <target-value, count> pair if count is zero.
749 CallVDs.begin(),
750 llvm::upper_bound(CallVDs, 0U,
751 [](uint64_t Count, const InstrProfValueData &ProfData) {
752 return ProfData.Count <= Count;
753 }));
754
755 // Annotate the remaining value profiles if counter is not zero.
756 if (TotalCount != 0)
757 annotateValueSite(M, CB, VDs, TotalCount, IPVK_IndirectCallTarget,
758 MaxMDCount);
759}
760
761void IndirectCallPromoter::updateVPtrValueProfiles(
762 Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
763 if (!EnableVTableProfileUse || VPtr == nullptr ||
764 !VPtr->getMetadata(LLVMContext::MD_prof))
765 return;
766 VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
767 std::vector<InstrProfValueData> VTableValueProfiles;
768 uint64_t TotalVTableCount = 0;
769 for (auto [GUID, Count] : VTableGUIDCounts) {
770 if (Count == 0)
771 continue;
772
773 VTableValueProfiles.push_back({GUID, Count});
774 TotalVTableCount += Count;
775 }
776 llvm::sort(VTableValueProfiles,
777 [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
778 return LHS.Count > RHS.Count;
779 });
780
781 annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
782 IPVK_VTableTarget, VTableValueProfiles.size());
783}
784
785bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
786 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
787 uint64_t TotalFuncCount, uint32_t NumCandidates,
789 VTableGUIDCountsMap &VTableGUIDCounts) {
790 SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
791
792 for (const auto &Candidate : Candidates) {
793 for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
794 VTableGUIDCounts[GUID] -= Count;
795
796 // 'OriginalBB' is the basic block of indirect call. After each candidate
797 // is promoted, a new basic block is created for the indirect fallback basic
798 // block and indirect call `CB` is moved into this new BB.
799 BasicBlock *OriginalBB = CB.getParent();
801 CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
802 createBranchWeights(CB.getContext(), Candidate.Count,
803 TotalFuncCount - Candidate.Count));
804
805 int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
806
807 ORE.emit([&]() {
808 OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
809
810 const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
811 Remark << "Promote indirect call to "
812 << ore::NV("DirectCallee", Candidate.TargetFunction)
813 << " with count " << ore::NV("Count", Candidate.Count)
814 << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
815 << ore::NV("SinkCount", SinkCount)
816 << " instruction(s) and compare "
817 << ore::NV("VTable", VTableGUIDAndCounts.size())
818 << " vtable(s): {";
819
820 // Sort GUIDs so remark message is deterministic.
821 std::set<uint64_t> GUIDSet;
822 for (auto [GUID, Count] : VTableGUIDAndCounts)
823 GUIDSet.insert(GUID);
824 for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
825 if (Iter != GUIDSet.begin())
826 Remark << ", ";
827 Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
828 }
829
830 Remark << "}";
831
832 return Remark;
833 });
834
835 PromotedFuncCount.push_back({Candidate.Index, Candidate.Count});
836
837 assert(TotalFuncCount >= Candidate.Count &&
838 "Within one prof metadata, total count is the sum of counts from "
839 "individual <target, count> pairs");
840 // Use std::min since 'TotalFuncCount' is the saturated sum of individual
841 // counts, see
842 // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
843 TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
844 NumOfPGOICallPromotion++;
845 }
846
847 if (PromotedFuncCount.empty())
848 return false;
849
850 // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
851 // a distinct 'VPtr'.
852 // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
853 // used to load multiple virtual functions. The vtable profiles needs to be
854 // updated properly in that case (e.g, for each indirect call annotate both
855 // type profiles and function profiles in one !prof).
856 for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
857 uint32_t Index = PromotedFuncCount[I].first;
858 ICallProfDataRef[Index].Count -=
859 std::max(PromotedFuncCount[I].second, ICallProfDataRef[Index].Count);
860 }
861 updateFuncValueProfiles(CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
862 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
863 return true;
864}
865
866// Traverse all the indirect-call callsite and get the value profile
867// annotation to perform indirect-call promotion.
868bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
869 bool Changed = false;
870 ICallPromotionAnalysis ICallAnalysis;
871 for (auto *CB : findIndirectCalls(F)) {
872 uint32_t NumCandidates;
873 uint64_t TotalCount;
874 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
875 CB, TotalCount, NumCandidates);
876 if (!NumCandidates)
877 continue;
878 if (PSI && PSI->hasProfileSummary()) {
879 // Don't promote cold candidates.
880 if (PSI->isColdCount(TotalCount)) {
881 LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
882 << TotalCount << "\n");
883 continue;
884 }
885 // Only pormote hot if ICPAllowHotOnly is true.
886 if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
887 LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
888 << TotalCount << "\n");
889 continue;
890 }
891 }
892
893 auto PromotionCandidates = getPromotionCandidatesForCallSite(
894 *CB, ICallProfDataRef, TotalCount, NumCandidates);
895
896 VTableGUIDCountsMap VTableGUIDCounts;
897 Instruction *VPtr =
898 computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
899
900 if (isProfitableToCompareVTables(*CB, PromotionCandidates))
901 Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
902 TotalCount, NumCandidates,
903 ICallProfDataRef, VTableGUIDCounts);
904 else
905 Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
906 TotalCount, ICallProfDataRef,
907 NumCandidates, VTableGUIDCounts);
908 }
909 return Changed;
910}
911
912// TODO: Return false if the function addressing and vtable load instructions
913// cannot sink to indirect fallback.
914bool IndirectCallPromoter::isProfitableToCompareVTables(
915 const CallBase &CB, ArrayRef<PromotionCandidate> Candidates) {
916 if (!EnableVTableProfileUse || Candidates.empty())
917 return false;
918 LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
919 << NumOfPGOICallsites << CB << "\n");
920 const size_t CandidateSize = Candidates.size();
921 for (size_t I = 0; I < CandidateSize; I++) {
922 auto &Candidate = Candidates[I];
923 auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
924
925 LLVM_DEBUG({
926 dbgs() << " Candidate " << I << " FunctionCount: " << Candidate.Count
927 << ", VTableCounts:";
928 for (const auto &[GUID, Count] : VTableGUIDAndCounts)
929 dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() << ", "
930 << Count << "}";
931 dbgs() << "\n";
932 });
933
934 uint64_t CandidateVTableCount = 0;
935
936 for (auto &[GUID, Count] : VTableGUIDAndCounts) {
937 CandidateVTableCount += Count;
938
939 if (shouldSkipVTable(GUID))
940 return false;
941 }
942
943 if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
945 dbgs() << " function count " << Candidate.Count
946 << " and its vtable sum count " << CandidateVTableCount
947 << " have discrepancies. Bail out vtable comparison.\n");
948 return false;
949 }
950
951 // 'MaxNumVTable' limits the number of vtables to make vtable comparison
952 // profitable. Comparing multiple vtables for one function candidate will
953 // insert additional instructions on the hot path, and allowing more than
954 // one vtable for non last candidates may or may not elongate the dependency
955 // chain for the subsequent candidates. Set its value to 1 for non-last
956 // candidate and allow option to override it for the last candidate.
957 int MaxNumVTable = 1;
958 if (I == CandidateSize - 1)
959 MaxNumVTable = ICPMaxNumVTableLastCandidate;
960
961 if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
962 LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
963 << Candidate.AddressPoints.size()
964 << " vtables. Bail out for vtable comparison.\n");
965 return false;
966 }
967 }
968
969 return true;
970}
971
972bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
973 if (IgnoredBaseTypes.empty())
974 return false;
975
976 auto *VTableVar = Symtab->getGlobalVariable(VTableGUID);
977
978 assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
979
981 VTableVar->getMetadata(LLVMContext::MD_type, Types);
982
983 for (auto *Type : Types)
984 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
985 if (IgnoredBaseTypes.contains(TypeId->getString())) {
986 LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
987 "out of vtable comparison.");
988 return true;
989 }
990 return false;
991}
992
993// For virtual calls in the module, collect per-callsite information which will
994// be used to associate an ICP candidate with a vtable and a specific function
995// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
996// calls in a compile-time efficient manner (by iterating its users) and more
997// importantly use the compatible type later to figure out the function byte
998// offset relative to the start of vtables.
999static void
1001 VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
1002 // Right now only llvm.type.test is used to find out virtual call sites.
1003 // With ThinLTO and whole-program-devirtualization, llvm.type.test and
1004 // llvm.public.type.test are emitted, and llvm.public.type.test is either
1005 // refined to llvm.type.test or dropped before indirect-call-promotion pass.
1006 //
1007 // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
1008 // Find out virtual calls by looking at users of llvm.type.checked.load in
1009 // that case.
1010 Function *TypeTestFunc =
1011 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
1012 if (!TypeTestFunc || TypeTestFunc->use_empty())
1013 return;
1014
1015 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1016 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
1017 return FAM.getResult<DominatorTreeAnalysis>(F);
1018 };
1019 // Iterate all type.test calls to find all indirect calls.
1020 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
1021 auto *CI = dyn_cast<CallInst>(U.getUser());
1022 if (!CI)
1023 continue;
1024 auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
1025 if (!TypeMDVal)
1026 continue;
1027 auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
1028 if (!CompatibleTypeId)
1029 continue;
1030
1031 // Find out all devirtualizable call sites given a llvm.type.test
1032 // intrinsic call.
1035 auto &DT = LookupDomTree(*CI->getFunction());
1036 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1037
1038 for (auto &DevirtCall : DevirtCalls) {
1039 CallBase &CB = DevirtCall.CB;
1040 // Given an indirect call, try find the instruction which loads a
1041 // pointer to virtual table.
1042 Instruction *VTablePtr =
1044 if (!VTablePtr)
1045 continue;
1046 VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
1047 CompatibleTypeId->getString()};
1048 }
1049 }
1050}
1051
1052// A wrapper function that does the actual work.
1053static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
1054 bool SamplePGO, ModuleAnalysisManager &MAM) {
1055 if (DisableICP)
1056 return false;
1057 InstrProfSymtab Symtab;
1058 if (Error E = Symtab.create(M, InLTO)) {
1059 std::string SymtabFailure = toString(std::move(E));
1060 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
1061 return false;
1062 }
1063 bool Changed = false;
1064 VirtualCallSiteTypeInfoMap VirtualCSInfo;
1065
1066 DenseSet<StringRef> IgnoredBaseTypes;
1067
1069 computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
1070
1071 IgnoredBaseTypes.insert_range(ICPIgnoredBaseTypes);
1072 }
1073
1074 // VTableAddressPointOffsetVal stores the vtable address points. The vtable
1075 // address point of a given <vtable, address point offset> is static (doesn't
1076 // change after being computed once).
1077 // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
1078 // entry the first time a <vtable, offset> pair is seen, as
1079 // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
1080 // repeatedly on each function.
1081 VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
1082
1083 for (auto &F : M) {
1084 if (F.isDeclaration() || F.hasOptNone())
1085 continue;
1086
1087 auto &FAM =
1088 MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1089 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1090
1091 IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
1092 VTableAddressPointOffsetVal,
1093 IgnoredBaseTypes, ORE);
1094 bool FuncChanged = CallPromoter.processFunction(PSI);
1095 if (ICPDUMPAFTER && FuncChanged) {
1096 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
1097 LLVM_DEBUG(dbgs() << "\n");
1098 }
1099 Changed |= FuncChanged;
1100 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
1101 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
1102 break;
1103 }
1104 }
1105 return Changed;
1106}
1107
1110 ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
1111
1112 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
1113 SamplePGO | ICPSamplePGOMode, MAM))
1114 return PreservedAnalyses::all();
1115
1116 return PreservedAnalyses::none();
1117}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
#define DEBUG_TYPE
This header defines various interfaces for pass management in LLVM.
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static cl::opt< bool > ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden, cl::desc("Promote the target candidate only if it is a " "hot function. Otherwise, warm functions can " "also be promoted"))
static cl::opt< float > ICPVTablePercentageThreshold("icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden, cl::desc("The percentage threshold of vtable-count / function-count for " "cost-benefit analysis."))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static void computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, VirtualCallSiteTypeInfoMap &VirtualCSInfo)
static cl::opt< bool > ICPAllowDecls("icp-allow-decls", cl::init(false), cl::Hidden, cl::desc("Promote the target candidate even when the definition " " is not available"))
static MDNode * createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, uint64_t FalseWeight)
static cl::opt< bool > ICPAllowCandidateSkip("icp-allow-candidate-skip", cl::init(false), cl::Hidden, cl::desc("Continue with the remaining targets instead of exiting " "when failing in a candidate"))
static cl::list< std::string > ICPIgnoredBaseTypes("icp-ignored-base-types", cl::Hidden, cl::desc("A list of mangled vtable type info names. Classes specified by the " "type info names and their derived ones will not be vtable-ICP'ed. " "Useful when the profiled types and actual types in the optimized " "binary could be different due to profiling limitations. Type info " "names are those string literals used in LLVM type metadata"))
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< int > ICPMaxNumVTableLastCandidate("icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, cl::desc("The maximum number of vtable for the last candidate."))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1311
This is an important base class in LLVM.
Definition Constant.h:43
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:329
MutableArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates, unsigned MaxNumValueData=0)
Returns reference to array of InstrProfValueData for the given instruction I.
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition InstrProf.h:505
GlobalVariable * getGlobalVariable(uint64_t MD5Hash) const
Return the global variable corresponding to md5 hash.
Definition InstrProf.h:797
LLVM_ABI Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
Function * getFunction(uint64_t FuncMD5Hash) const
Return function from the name's md5 hash. Return nullptr if not found.
Definition InstrProf.h:787
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
iterator begin() const
Definition ArrayRef.h:342
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
void insert_range(Range &&R)
Definition DenseSet.h:228
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
initializer< Ty > init(const Ty &Val)
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void stable_sort(R &&Range)
Definition STLExtras.h:2106
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< unsigned > MaxNumVTableAnnotations("icp-max-num-vtables", cl::init(6), cl::Hidden, cl::desc("Max number of vtables annotated for a vtable load instruction."))
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2055
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI CallBase & promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, Function *Callee, ArrayRef< Constant * > AddressPoints, MDNode *BranchWeights)
This is similar to promoteCallWithIfThenElse except that the condition to promote a virtual call is t...
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
static Instruction * tryGetVTableInstruction(CallBase *CB)