LLVM 23.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular/thin and non-LTO
28// pipelines:
29//
30// During regular LTO, the pass determines the best optimization for each
31// virtual call and applies the resolutions directly to virtual calls that are
32// eligible for virtual call optimization (i.e. calls that use either of the
33// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
34//
35// During hybrid Regular/ThinLTO, the pass operates in two phases:
36// - Export phase: this is run during the thin link over a single merged module
37// that contains all vtables with !type metadata that participate in the link.
38// The pass computes a resolution for each virtual call and stores it in the
39// type identifier summary.
40// - Import phase: this is run during the thin backends over the individual
41// modules. The pass applies the resolutions previously computed during the
42// import phase to each eligible virtual call.
43//
44// During ThinLTO, the pass operates in two phases:
45// - Export phase: this is run during the thin link over the index which
46// contains a summary of all vtables with !type metadata that participate in
47// the link. It computes a resolution for each virtual call and stores it in
48// the type identifier summary. Only single implementation devirtualization
49// is supported.
50// - Import phase: (same as with hybrid case above).
51//
52// During Speculative devirtualization mode -not restricted to LTO-:
53// - The pass applies speculative devirtualization without requiring any type of
54// visibility.
55// - Skips other features like virtual constant propagation, uniform return
56// value optimization, unique return value optimization and branch funnels as
57// they need LTO.
58// - This mode is enabled via 'devirtualize-speculatively' flag.
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/ArrayRef.h"
64#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/DenseSet.h"
67#include "llvm/ADT/MapVector.h"
69#include "llvm/ADT/Statistic.h"
79#include "llvm/IR/Constants.h"
80#include "llvm/IR/DataLayout.h"
81#include "llvm/IR/DebugLoc.h"
84#include "llvm/IR/Dominators.h"
85#include "llvm/IR/Function.h"
86#include "llvm/IR/GlobalAlias.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstrTypes.h"
90#include "llvm/IR/Instruction.h"
92#include "llvm/IR/Intrinsics.h"
93#include "llvm/IR/LLVMContext.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Metadata.h"
96#include "llvm/IR/Module.h"
98#include "llvm/IR/PassManager.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Errc.h"
104#include "llvm/Support/Error.h"
109#include "llvm/Transforms/IPO.h"
114#include <algorithm>
115#include <cmath>
116#include <cstddef>
117#include <map>
118#include <set>
119#include <string>
120
121using namespace llvm;
122using namespace wholeprogramdevirt;
123
124#define DEBUG_TYPE "wholeprogramdevirt"
125
126STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
127STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
128STATISTIC(NumBranchFunnel, "Number of branch funnels");
129STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
130STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
131STATISTIC(NumVirtConstProp1Bit,
132 "Number of 1 bit virtual constant propagations");
133STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
134DEBUG_COUNTER(CallsToDevirt, "calls-to-devirt",
135 "Controls how many calls should be devirtualized.");
136
137namespace llvm {
138
140 "wholeprogramdevirt-summary-action",
141 cl::desc("What to do with the summary when running this pass"),
142 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
144 "Import typeid resolutions from summary and globals"),
146 "Export typeid resolutions to summary and globals")),
147 cl::Hidden);
148
150 "wholeprogramdevirt-read-summary",
151 cl::desc(
152 "Read summary from given bitcode or YAML file before running pass"),
153 cl::Hidden);
154
156 "wholeprogramdevirt-write-summary",
157 cl::desc("Write summary to given bitcode or YAML file after running pass. "
158 "Output file format is deduced from extension: *.bc means writing "
159 "bitcode, otherwise YAML"),
160 cl::Hidden);
161
162// TODO: This option eventually should support any public visibility vtables
163// with/out LTO.
165 "devirtualize-speculatively",
166 cl::desc("Enable speculative devirtualization optimization"),
167 cl::init(false));
168
170 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
171 cl::init(10),
172 cl::desc("Maximum number of call targets per "
173 "call site to enable branch funnels"));
174
175static cl::opt<bool>
176 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
177 cl::desc("Print index-based devirtualization messages"));
178
179/// Provide a way to force enable whole program visibility in tests.
180/// This is needed to support legacy tests that don't contain
181/// !vcall_visibility metadata (the mere presense of type tests
182/// previously implied hidden visibility).
183static cl::opt<bool>
184 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
185 cl::desc("Enable whole program visibility"));
186
187/// Provide a way to force disable whole program for debugging or workarounds,
188/// when enabled via the linker.
190 "disable-whole-program-visibility", cl::Hidden,
191 cl::desc("Disable whole program visibility (overrides enabling options)"));
192
193/// Provide way to prevent certain function from being devirtualized
195 SkipFunctionNames("wholeprogramdevirt-skip",
196 cl::desc("Prevent function(s) from being devirtualized"),
198
200
201} // end namespace llvm
202
203/// With Clang, a pure virtual class's deleting destructor is emitted as a
204/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
205/// context of whole program devirtualization, the deleting destructor of a pure
206/// virtual class won't be invoked by the source code so safe to skip as a
207/// devirtualize target.
208///
209/// However, not all unreachable functions are safe to skip. In some cases, the
210/// program intends to run such functions and terminate, for instance, a unit
211/// test may run a death test. A non-test program might (or allowed to) invoke
212/// such functions to report failures (whether/when it's a good practice or not
213/// is a different topic).
214///
215/// This option is enabled to keep an unreachable function as a possible
216/// devirtualize target to conservatively keep the program behavior.
217///
218/// TODO: Make a pure virtual class's deleting destructor precisely identifiable
219/// in Clang's codegen for more devirtualization in LLVM.
221 "wholeprogramdevirt-keep-unreachable-function",
222 cl::desc("Regard unreachable functions as possible devirtualize targets."),
223 cl::Hidden, cl::init(true));
224
225/// Mechanism to add runtime checking of devirtualization decisions, optionally
226/// trapping or falling back to indirect call on any that are not correct.
227/// Trapping mode is useful for debugging undefined behavior leading to failures
228/// with WPD. Fallback mode is useful for ensuring safety when whole program
229/// visibility may be compromised.
232 "wholeprogramdevirt-check", cl::Hidden,
233 cl::desc("Type of checking for incorrect devirtualizations"),
234 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
235 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
237 "Fallback to indirect when incorrect")));
238
239namespace {
240struct PatternList {
241 std::vector<GlobPattern> Patterns;
242 template <class T> void init(const T &StringList) {
243 for (const auto &S : StringList)
245 Patterns.push_back(std::move(*Pat));
246 }
247 bool match(StringRef S) {
248 for (const GlobPattern &P : Patterns)
249 if (P.match(S))
250 return true;
251 return false;
252 }
253};
254} // namespace
255
256// Find the minimum offset that we may store a value of size Size bits at. If
257// IsAfter is set, look for an offset before the object, otherwise look for an
258// offset after the object.
261 bool IsAfter, uint64_t Size) {
262 // Find a minimum offset taking into account only vtable sizes.
263 uint64_t MinByte = 0;
264 for (const VirtualCallTarget &Target : Targets) {
265 if (IsAfter)
266 MinByte = std::max(MinByte, Target.minAfterBytes());
267 else
268 MinByte = std::max(MinByte, Target.minBeforeBytes());
269 }
270
271 // Build a vector of arrays of bytes covering, for each target, a slice of the
272 // used region (see AccumBitVector::BytesUsed in
273 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
274 // this aligns the used regions to start at MinByte.
275 //
276 // In this example, A, B and C are vtables, # is a byte already allocated for
277 // a virtual function pointer, AAAA... (etc.) are the used regions for the
278 // vtables and Offset(X) is the value computed for the Offset variable below
279 // for X.
280 //
281 // Offset(A)
282 // | |
283 // |MinByte
284 // A: ################AAAAAAAA|AAAAAAAA
285 // B: ########BBBBBBBBBBBBBBBB|BBBB
286 // C: ########################|CCCCCCCCCCCCCCCC
287 // | Offset(B) |
288 //
289 // This code produces the slices of A, B and C that appear after the divider
290 // at MinByte.
291 std::vector<ArrayRef<uint8_t>> Used;
292 for (const VirtualCallTarget &Target : Targets) {
293 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
294 : Target.TM->Bits->Before.BytesUsed;
295 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
296 : MinByte - Target.minBeforeBytes();
297
298 // Disregard used regions that are smaller than Offset. These are
299 // effectively all-free regions that do not need to be checked.
300 if (VTUsed.size() > Offset)
301 Used.push_back(VTUsed.slice(Offset));
302 }
303
304 if (Size == 1) {
305 // Find a free bit in each member of Used.
306 for (unsigned I = 0;; ++I) {
307 uint8_t BitsUsed = 0;
308 for (auto &&B : Used)
309 if (I < B.size())
310 BitsUsed |= B[I];
311 if (BitsUsed != 0xff)
312 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
313 }
314 } else {
315 // Find a free (Size/8) byte region in each member of Used.
316 // FIXME: see if alignment helps.
317 for (unsigned I = 0;; ++I) {
318 for (auto &&B : Used) {
319 unsigned Byte = 0;
320 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
321 if (B[I + Byte])
322 goto NextI;
323 ++Byte;
324 }
325 }
326 // Rounding up ensures the constant is always stored at address we
327 // can directly load from without misalignment.
328 return alignTo((MinByte + I) * 8, Size);
329 NextI:;
330 }
331 }
332}
333
335 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
336 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
337 if (BitWidth == 1)
338 OffsetByte = -(AllocBefore / 8 + 1);
339 else
340 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
341 OffsetBit = AllocBefore % 8;
342
343 for (VirtualCallTarget &Target : Targets) {
344 if (BitWidth == 1)
345 Target.setBeforeBit(AllocBefore);
346 else
347 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
348 }
349}
350
353 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
354 if (BitWidth == 1)
355 OffsetByte = AllocAfter / 8;
356 else
357 OffsetByte = (AllocAfter + 7) / 8;
358 OffsetBit = AllocAfter % 8;
359
360 for (VirtualCallTarget &Target : Targets) {
361 if (BitWidth == 1)
362 Target.setAfterBit(AllocAfter);
363 else
364 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
365 }
366}
367
372
373namespace {
374
375// A slot in a set of virtual tables. The TypeID identifies the set of virtual
376// tables, and the ByteOffset is the offset in bytes from the address point to
377// the virtual function pointer.
378struct VTableSlot {
379 Metadata *TypeID;
380 uint64_t ByteOffset;
381};
382
383} // end anonymous namespace
384
385template <> struct llvm::DenseMapInfo<VTableSlot> {
394 static unsigned getHashValue(const VTableSlot &I) {
397 }
398 static bool isEqual(const VTableSlot &LHS,
399 const VTableSlot &RHS) {
400 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
401 }
402};
403
413 static unsigned getHashValue(const VTableSlotSummary &I) {
416 }
417 static bool isEqual(const VTableSlotSummary &LHS,
418 const VTableSlotSummary &RHS) {
419 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
420 }
421};
422
423// Returns true if the function must be unreachable based on ValueInfo.
424//
425// In particular, identifies a function as unreachable in the following
426// conditions
427// 1) All summaries are live.
428// 2) All function summaries indicate it's unreachable
429// 3) There is no non-function with the same GUID (which is rare)
432 return false;
433
434 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
435 // Returns false if ValueInfo is absent, or the summary list is empty
436 // (e.g., function declarations).
437 return false;
438 }
439
440 for (const auto &Summary : TheFnVI.getSummaryList()) {
441 // Conservatively returns false if any non-live functions are seen.
442 // In general either all summaries should be live or all should be dead.
443 if (!Summary->isLive())
444 return false;
445 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
446 if (!FS->fflags().MustBeUnreachable)
447 return false;
448 }
449 // Be conservative if a non-function has the same GUID (which is rare).
450 else
451 return false;
452 }
453 // All function summaries are live and all of them agree that the function is
454 // unreachble.
455 return true;
456}
457
458namespace {
459// A virtual call site. VTable is the loaded virtual table pointer, and CS is
460// the indirect virtual call.
461struct VirtualCallSite {
462 Value *VTable = nullptr;
463 CallBase &CB;
464
465 // If non-null, this field points to the associated unsafe use count stored in
466 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
467 // of that field for details.
468 unsigned *NumUnsafeUses = nullptr;
469
470 void
471 emitRemark(const StringRef OptName, const StringRef TargetName,
472 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter) {
473 Function *F = CB.getCaller();
474 DebugLoc DLoc = CB.getDebugLoc();
475 BasicBlock *Block = CB.getParent();
476
477 using namespace ore;
478 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
479 << NV("Optimization", OptName)
480 << ": devirtualized a call to "
481 << NV("FunctionName", TargetName));
482 }
483
484 void replaceAndErase(
485 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
486 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter,
487 Value *New) {
488 if (RemarksEnabled)
489 emitRemark(OptName, TargetName, OREGetter);
490 CB.replaceAllUsesWith(New);
491 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
492 BranchInst::Create(II->getNormalDest(), CB.getIterator());
493 II->getUnwindDest()->removePredecessor(II->getParent());
494 }
495 CB.eraseFromParent();
496 // This use is no longer unsafe.
497 if (NumUnsafeUses)
498 --*NumUnsafeUses;
499 }
500};
501
502// Call site information collected for a specific VTableSlot and possibly a list
503// of constant integer arguments. The grouping by arguments is handled by the
504// VTableSlotInfo class.
505struct CallSiteInfo {
506 /// The set of call sites for this slot. Used during regular LTO and the
507 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
508 /// call sites that appear in the merged module itself); in each of these
509 /// cases we are directly operating on the call sites at the IR level.
510 std::vector<VirtualCallSite> CallSites;
511
512 /// Whether all call sites represented by this CallSiteInfo, including those
513 /// in summaries, have been devirtualized. This starts off as true because a
514 /// default constructed CallSiteInfo represents no call sites.
515 ///
516 /// If at the end of the pass there are still undevirtualized calls, we will
517 /// need to add a use of llvm.type.test to each of the function summaries in
518 /// the vector.
519 bool AllCallSitesDevirted = true;
520
521 // These fields are used during the export phase of ThinLTO and reflect
522 // information collected from function summaries.
523
524 /// CFI-specific: a vector containing the list of function summaries that use
525 /// the llvm.type.checked.load intrinsic and therefore will require
526 /// resolutions for llvm.type.test in order to implement CFI checks if
527 /// devirtualization was unsuccessful.
528 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
529
530 /// A vector containing the list of function summaries that use
531 /// assume(llvm.type.test).
532 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
533
534 bool isExported() const {
535 return !SummaryTypeCheckedLoadUsers.empty() ||
536 !SummaryTypeTestAssumeUsers.empty();
537 }
538
539 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
540 SummaryTypeCheckedLoadUsers.push_back(FS);
541 AllCallSitesDevirted = false;
542 }
543
544 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
545 SummaryTypeTestAssumeUsers.push_back(FS);
546 AllCallSitesDevirted = false;
547 }
548
549 void markDevirt() { AllCallSitesDevirted = true; }
550};
551
552// Call site information collected for a specific VTableSlot.
553struct VTableSlotInfo {
554 // The set of call sites which do not have all constant integer arguments
555 // (excluding "this").
556 CallSiteInfo CSInfo;
557
558 // The set of call sites with all constant integer arguments (excluding
559 // "this"), grouped by argument list.
560 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
561
562 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
563
564private:
565 CallSiteInfo &findCallSiteInfo(CallBase &CB);
566};
567
568CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
569 std::vector<uint64_t> Args;
570 auto *CBType = dyn_cast<IntegerType>(CB.getType());
571 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
572 return CSInfo;
573 for (auto &&Arg : drop_begin(CB.args())) {
574 auto *CI = dyn_cast<ConstantInt>(Arg);
575 if (!CI || CI->getBitWidth() > 64)
576 return CSInfo;
577 Args.push_back(CI->getZExtValue());
578 }
579 return ConstCSInfo[Args];
580}
581
582void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
583 unsigned *NumUnsafeUses) {
584 auto &CSI = findCallSiteInfo(CB);
585 CSI.AllCallSitesDevirted = false;
586 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
587}
588
589struct DevirtModule {
590 Module &M;
593
594 ModuleSummaryIndex *const ExportSummary;
595 const ModuleSummaryIndex *const ImportSummary;
596
597 IntegerType *const Int8Ty;
598 PointerType *const Int8PtrTy;
599 IntegerType *const Int32Ty;
600 IntegerType *const Int64Ty;
601 IntegerType *const IntPtrTy;
602 /// Sizeless array type, used for imported vtables. This provides a signal
603 /// to analyzers that these imports may alias, as they do for example
604 /// when multiple unique return values occur in the same vtable.
605 ArrayType *const Int8Arr0Ty;
606
607 const bool RemarksEnabled;
608 std::function<OptimizationRemarkEmitter &(Function &)> OREGetter;
609 MapVector<VTableSlot, VTableSlotInfo> CallSlots;
610
611 // Calls that have already been optimized. We may add a call to multiple
612 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
613 // optimize a call more than once.
614 SmallPtrSet<CallBase *, 8> OptimizedCalls;
615
616 // Store calls that had their ptrauth bundle removed. They are to be deleted
617 // at the end of the optimization.
618 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
619
620 // This map keeps track of the number of "unsafe" uses of a loaded function
621 // pointer. The key is the associated llvm.type.test intrinsic call generated
622 // by this pass. An unsafe use is one that calls the loaded function pointer
623 // directly. Every time we eliminate an unsafe use (for example, by
624 // devirtualizing it or by applying virtual constant propagation), we
625 // decrement the value stored in this map. If a value reaches zero, we can
626 // eliminate the type check by RAUWing the associated llvm.type.test call with
627 // true.
628 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
629 PatternList FunctionsToSkip;
630
631 const bool DevirtSpeculatively;
632 DevirtModule(Module &M, ModuleAnalysisManager &MAM,
633 ModuleSummaryIndex *ExportSummary,
634 const ModuleSummaryIndex *ImportSummary,
635 bool DevirtSpeculatively)
636 : M(M), MAM(MAM),
637 FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
638 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
639 Int8Ty(Type::getInt8Ty(M.getContext())),
640 Int8PtrTy(PointerType::getUnqual(M.getContext())),
641 Int32Ty(Type::getInt32Ty(M.getContext())),
642 Int64Ty(Type::getInt64Ty(M.getContext())),
643 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
644 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
645 RemarksEnabled(areRemarksEnabled()),
646 OREGetter([&](Function &F) -> OptimizationRemarkEmitter & {
647 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
648 }),
649 DevirtSpeculatively(DevirtSpeculatively) {
650 assert(!(ExportSummary && ImportSummary));
651 FunctionsToSkip.init(SkipFunctionNames);
652 }
653
654 bool areRemarksEnabled();
655
656 void
657 scanTypeTestUsers(Function *TypeTestFunc,
658 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
659 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
660
661 void buildTypeIdentifierMap(
662 std::vector<VTableBits> &Bits,
663 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
664
665 bool
666 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
667 const std::set<TypeMemberInfo> &TypeMemberInfos,
668 uint64_t ByteOffset,
669 ModuleSummaryIndex *ExportSummary);
670
671 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
672 bool &IsExported);
673 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
675 VTableSlotInfo &SlotInfo,
676 WholeProgramDevirtResolution *Res);
677
678 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Function &JT,
679 bool &IsExported);
680 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
681 VTableSlotInfo &SlotInfo,
682 WholeProgramDevirtResolution *Res, VTableSlot Slot);
683
684 bool tryEvaluateFunctionsWithArgs(
686 ArrayRef<uint64_t> Args);
687
688 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
689 uint64_t TheRetVal);
690 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
691 CallSiteInfo &CSInfo,
692 WholeProgramDevirtResolution::ByArg *Res);
693
694 // Returns the global symbol name that is used to export information about the
695 // given vtable slot and list of arguments.
696 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
697 StringRef Name);
698
699 bool shouldExportConstantsAsAbsoluteSymbols();
700
701 // This function is called during the export phase to create a symbol
702 // definition containing information about the given vtable slot and list of
703 // arguments.
704 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
705 Constant *C);
706 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
707 uint32_t Const, uint32_t &Storage);
708
709 // This function is called during the import phase to create a reference to
710 // the symbol definition created during the export phase.
711 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
712 StringRef Name);
713 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
714 StringRef Name, IntegerType *IntTy,
715 uint32_t Storage);
716
717 Constant *getMemberAddr(const TypeMemberInfo *M);
718
719 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
720 Constant *UniqueMemberAddr);
721 bool tryUniqueRetValOpt(unsigned BitWidth,
723 CallSiteInfo &CSInfo,
724 WholeProgramDevirtResolution::ByArg *Res,
725 VTableSlot Slot, ArrayRef<uint64_t> Args);
726
727 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
728 Constant *Byte, Constant *Bit);
729 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
730 VTableSlotInfo &SlotInfo,
731 WholeProgramDevirtResolution *Res, VTableSlot Slot);
732
733 void rebuildGlobal(VTableBits &B);
734
735 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
736 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
737
738 // If we were able to eliminate all unsafe uses for a type checked load,
739 // eliminate the associated type tests by replacing them with true.
740 void removeRedundantTypeTests();
741
742 bool run();
743
744 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
745 //
746 // Caller guarantees that `ExportSummary` is not nullptr.
747 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
748 ModuleSummaryIndex *ExportSummary);
749
750 // Returns true if the function definition must be unreachable.
751 //
752 // Note if this helper function returns true, `F` is guaranteed
753 // to be unreachable; if it returns false, `F` might still
754 // be unreachable but not covered by this helper function.
755 //
756 // Implementation-wise, if function definition is present, IR is analyzed; if
757 // not, look up function flags from ExportSummary as a fallback.
758 static bool mustBeUnreachableFunction(Function *const F,
759 ModuleSummaryIndex *ExportSummary);
760
761 // Lower the module using the action and summary passed as command line
762 // arguments. For testing purposes only.
763 static bool runForTesting(Module &M, ModuleAnalysisManager &MAM,
764 bool DevirtSpeculatively);
765};
766
767struct DevirtIndex {
768 ModuleSummaryIndex &ExportSummary;
769 // The set in which to record GUIDs exported from their module by
770 // devirtualization, used by client to ensure they are not internalized.
771 std::set<GlobalValue::GUID> &ExportedGUIDs;
772 // A map in which to record the information necessary to locate the WPD
773 // resolution for local targets in case they are exported by cross module
774 // importing.
775 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
776
777 MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
778
779 PatternList FunctionsToSkip;
780
781 DevirtIndex(
782 ModuleSummaryIndex &ExportSummary,
783 std::set<GlobalValue::GUID> &ExportedGUIDs,
784 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
785 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
786 LocalWPDTargetsMap(LocalWPDTargetsMap) {
787 FunctionsToSkip.init(SkipFunctionNames);
788 }
789
790 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
791 const TypeIdCompatibleVtableInfo TIdInfo,
792 uint64_t ByteOffset);
793
794 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
795 VTableSlotSummary &SlotSummary,
796 VTableSlotInfo &SlotInfo,
797 WholeProgramDevirtResolution *Res,
798 std::set<ValueInfo> &DevirtTargets);
799
800 void run();
801};
802} // end anonymous namespace
803
806 if (UseCommandLine) {
807 if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively))
808 return PreservedAnalyses::all();
810 }
811
812 std::optional<ModuleSummaryIndex> Index;
814 // Build the ExportSummary from the module.
816 "ExportSummary is expected to be empty in non-LTO mode");
817 ProfileSummaryInfo PSI(M);
818 Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
819 ExportSummary = Index.has_value() ? &Index.value() : nullptr;
820 }
821 if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively)
822 .run())
823 return PreservedAnalyses::all();
825}
826
827// Enable whole program visibility if enabled by client (e.g. linker) or
828// internal option, and not force disabled.
829bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
830 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
832}
833
834static bool
836 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
837 // TypeID for member function pointer type is an internal construct
838 // and won't exist in IsVisibleToRegularObj. The full TypeID
839 // will be present and participate in invalidation.
840 if (TypeID.ends_with(".virtual"))
841 return false;
842
843 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
844 // non-externally visible types which cannot interact with
845 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
846 if (!TypeID.consume_front("_ZTS"))
847 return false;
848
849 // TypeID is keyed off the type name symbol (_ZTS). However, the native
850 // object may not contain this symbol if it does not contain a key
851 // function for the base type and thus only contains a reference to the
852 // type info (_ZTI). To catch this case we query using the type info
853 // symbol corresponding to the TypeID.
854 std::string TypeInfo = ("_ZTI" + TypeID).str();
855 return IsVisibleToRegularObj(TypeInfo);
856}
857
858static bool
860 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
862 GV.getMetadata(LLVMContext::MD_type, Types);
863
864 for (auto *Type : Types)
865 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
866 return typeIDVisibleToRegularObj(TypeID->getString(),
867 IsVisibleToRegularObj);
868
869 return false;
870}
871
872/// If whole program visibility asserted, then upgrade all public vcall
873/// visibility metadata on vtable definitions to linkage unit visibility in
874/// Module IR (for regular or hybrid LTO).
876 Module &M, bool WholeProgramVisibilityEnabledInLTO,
877 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
878 bool ValidateAllVtablesHaveTypeInfos,
879 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
880 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
881 return;
882 for (GlobalVariable &GV : M.globals()) {
883 // Add linkage unit visibility to any variable with type metadata, which are
884 // the vtable definitions. We won't have an existing vcall_visibility
885 // metadata on vtable definitions with public visibility.
886 if (GV.hasMetadata(LLVMContext::MD_type) &&
888 // Don't upgrade the visibility for symbols exported to the dynamic
889 // linker, as we have no information on their eventual use.
890 !DynamicExportSymbols.count(GV.getGUID()) &&
891 // With validation enabled, we want to exclude symbols visible to
892 // regular objects. Local symbols will be in this group due to the
893 // current implementation but those with VCallVisibilityTranslationUnit
894 // will have already been marked in clang so are unaffected.
895 !(ValidateAllVtablesHaveTypeInfos &&
896 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
898 }
899}
900
902 bool WholeProgramVisibilityEnabledInLTO) {
903 llvm::TimeTraceScope timeScope("Update public type test calls");
904 Function *PublicTypeTestFunc =
905 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
906 if (!PublicTypeTestFunc)
907 return;
908 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
909 Function *TypeTestFunc =
910 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
911 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
912 auto *CI = cast<CallInst>(U.getUser());
913 auto *NewCI = CallInst::Create(
914 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, {}, "",
915 CI->getIterator());
916 CI->replaceAllUsesWith(NewCI);
917 CI->eraseFromParent();
918 }
919 } else {
920 // TODO: Don't replace public type tests when speculative devirtualization
921 // gets enabled in LTO mode.
922 auto *True = ConstantInt::getTrue(M.getContext());
923 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
924 auto *CI = cast<CallInst>(U.getUser());
925 CI->replaceAllUsesWith(True);
926 CI->eraseFromParent();
927 }
928 }
929}
930
931/// Based on typeID string, get all associated vtable GUIDS that are
932/// visible to regular objects.
934 ModuleSummaryIndex &Index,
935 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
936 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
937 for (const auto &TypeID : Index.typeIdCompatibleVtableMap()) {
938 if (typeIDVisibleToRegularObj(TypeID.first, IsVisibleToRegularObj))
939 for (const TypeIdOffsetVtableInfo &P : TypeID.second)
940 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
941 }
942}
943
944/// If whole program visibility asserted, then upgrade all public vcall
945/// visibility metadata on vtable definition summaries to linkage unit
946/// visibility in Module summary index (for ThinLTO).
948 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
949 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
950 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
951 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
952 return;
953 for (auto &P : Index) {
954 // Don't upgrade the visibility for symbols exported to the dynamic
955 // linker, as we have no information on their eventual use.
956 if (DynamicExportSymbols.count(P.first))
957 continue;
958 // With validation enabled, we want to exclude symbols visible to regular
959 // objects. Local symbols will be in this group due to the current
960 // implementation but those with VCallVisibilityTranslationUnit will have
961 // already been marked in clang so are unaffected.
962 if (VisibleToRegularObjSymbols.count(P.first))
963 continue;
964 for (auto &S : P.second.getSummaryList()) {
965 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
966 if (!GVar ||
967 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
968 continue;
969 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
970 }
971 }
972}
973
975 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
976 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
977 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
978}
979
981 ModuleSummaryIndex &Summary,
982 function_ref<bool(StringRef, ValueInfo)> IsExported,
983 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
984 for (auto &T : LocalWPDTargetsMap) {
985 auto &VI = T.first;
986 // This was enforced earlier during trySingleImplDevirt.
987 assert(VI.getSummaryList().size() == 1 &&
988 "Devirt of local target has more than one copy");
989 auto &S = VI.getSummaryList()[0];
990 if (!IsExported(S->modulePath(), VI))
991 continue;
992
993 // It's been exported by a cross module import.
994 for (auto &SlotSummary : T.second) {
995 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
996 assert(TIdSum);
997 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
998 assert(WPDRes != TIdSum->WPDRes.end());
999 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
1000 WPDRes->second.SingleImplName,
1001 Summary.getModuleHash(S->modulePath()));
1002 }
1003 }
1004}
1005
1007 // Check that summary index contains regular LTO module when performing
1008 // export to prevent occasional use of index from pure ThinLTO compilation
1009 // (-fno-split-lto-module). This kind of summary index is passed to
1010 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
1011 const auto &ModPaths = Summary->modulePaths();
1013 !ModPaths.contains(ModuleSummaryIndex::getRegularLTOModuleName()))
1014 return createStringError(
1016 "combined summary should contain Regular LTO module");
1017 return ErrorSuccess();
1018}
1019
1020bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM,
1021 bool DevirtSpeculatively) {
1022 std::unique_ptr<ModuleSummaryIndex> Summary =
1023 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
1024
1025 // Handle the command-line summary arguments. This code is for testing
1026 // purposes only, so we handle errors directly.
1027 if (!ClReadSummary.empty()) {
1028 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
1029 ": ");
1030 auto ReadSummaryFile =
1032 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
1033 getModuleSummaryIndex(*ReadSummaryFile)) {
1034 Summary = std::move(*SummaryOrErr);
1035 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
1036 } else {
1037 // Try YAML if we've failed with bitcode.
1038 consumeError(SummaryOrErr.takeError());
1039 yaml::Input In(ReadSummaryFile->getBuffer());
1040 In >> *Summary;
1041 ExitOnErr(errorCodeToError(In.error()));
1042 }
1043 }
1044
1045 bool Changed =
1046 DevirtModule(M, MAM,
1048 : nullptr,
1050 : nullptr,
1051 DevirtSpeculatively)
1052 .run();
1053
1054 if (!ClWriteSummary.empty()) {
1055 ExitOnError ExitOnErr(
1056 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1057 std::error_code EC;
1058 if (StringRef(ClWriteSummary).ends_with(".bc")) {
1059 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
1060 ExitOnErr(errorCodeToError(EC));
1061 writeIndexToFile(*Summary, OS);
1062 } else {
1063 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1064 ExitOnErr(errorCodeToError(EC));
1065 yaml::Output Out(OS);
1066 Out << *Summary;
1067 }
1068 }
1069
1070 return Changed;
1071}
1072
1073void DevirtModule::buildTypeIdentifierMap(
1074 std::vector<VTableBits> &Bits,
1075 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1076 DenseMap<GlobalVariable *, VTableBits *> GVToBits;
1077 Bits.reserve(M.global_size());
1079 for (GlobalVariable &GV : M.globals()) {
1080 Types.clear();
1081 GV.getMetadata(LLVMContext::MD_type, Types);
1082 if (GV.isDeclaration() || Types.empty())
1083 continue;
1084
1085 VTableBits *&BitsPtr = GVToBits[&GV];
1086 if (!BitsPtr) {
1087 Bits.emplace_back();
1088 Bits.back().GV = &GV;
1089 Bits.back().ObjectSize =
1090 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1091 BitsPtr = &Bits.back();
1092 }
1093
1094 for (MDNode *Type : Types) {
1095 auto *TypeID = Type->getOperand(1).get();
1096
1097 uint64_t Offset =
1099 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1100 ->getZExtValue();
1101
1102 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1103 }
1104 }
1105}
1106
1107bool DevirtModule::tryFindVirtualCallTargets(
1108 std::vector<VirtualCallTarget> &TargetsForSlot,
1109 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1110 ModuleSummaryIndex *ExportSummary) {
1111 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1112 if (!TM.Bits->GV->isConstant())
1113 return false;
1114
1115 // Without DevirtSpeculatively, we cannot perform whole program
1116 // devirtualization analysis on a vtable with public LTO visibility.
1117 if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() ==
1119 return false;
1120
1121 Function *Fn = nullptr;
1122 Constant *C = nullptr;
1123 std::tie(Fn, C) =
1124 getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);
1125
1126 if (!Fn)
1127 return false;
1128
1129 if (FunctionsToSkip.match(Fn->getName()))
1130 return false;
1131
1132 // We can disregard __cxa_pure_virtual as a possible call target, as
1133 // calls to pure virtuals are UB.
1134 if (Fn->getName() == "__cxa_pure_virtual")
1135 continue;
1136
1137 // In most cases empty functions will be overridden by the
1138 // implementation of the derived class, so we can skip them.
1139 if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() &&
1140 Fn->getInstructionCount() <= 1)
1141 continue;
1142
1143 // We can disregard unreachable functions as possible call targets, as
1144 // unreachable functions shouldn't be called.
1145 if (mustBeUnreachableFunction(Fn, ExportSummary))
1146 continue;
1147
1148 // Save the symbol used in the vtable to use as the devirtualization
1149 // target.
1150 auto *GV = dyn_cast<GlobalValue>(C);
1151 assert(GV);
1152 TargetsForSlot.push_back({GV, &TM});
1153 }
1154
1155 // Give up if we couldn't find any targets.
1156 return !TargetsForSlot.empty();
1157}
1158
1159bool DevirtIndex::tryFindVirtualCallTargets(
1160 std::vector<ValueInfo> &TargetsForSlot,
1161 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1162 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1163 // Find a representative copy of the vtable initializer.
1164 // We can have multiple available_externally, linkonce_odr and weak_odr
1165 // vtable initializers. We can also have multiple external vtable
1166 // initializers in the case of comdats, which we cannot check here.
1167 // The linker should give an error in this case.
1168 //
1169 // Also, handle the case of same-named local Vtables with the same path
1170 // and therefore the same GUID. This can happen if there isn't enough
1171 // distinguishing path when compiling the source file. In that case we
1172 // conservatively return false early.
1173 if (P.VTableVI.hasLocal() && P.VTableVI.getSummaryList().size() > 1)
1174 return false;
1175 const GlobalVarSummary *VS = nullptr;
1176 for (const auto &S : P.VTableVI.getSummaryList()) {
1177 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1178 if (!CurVS->vTableFuncs().empty() ||
1179 // Previously clang did not attach the necessary type metadata to
1180 // available_externally vtables, in which case there would not
1181 // be any vtable functions listed in the summary and we need
1182 // to treat this case conservatively (in case the bitcode is old).
1183 // However, we will also not have any vtable functions in the
1184 // case of a pure virtual base class. In that case we do want
1185 // to set VS to avoid treating it conservatively.
1187 VS = CurVS;
1188 // We cannot perform whole program devirtualization analysis on a vtable
1189 // with public LTO visibility.
1190 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1191 return false;
1192 break;
1193 }
1194 }
1195 // There will be no VS if all copies are available_externally having no
1196 // type metadata. In that case we can't safely perform WPD.
1197 if (!VS)
1198 return false;
1199 if (!VS->isLive())
1200 continue;
1201 for (auto VTP : VS->vTableFuncs()) {
1202 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1203 continue;
1204
1205 if (mustBeUnreachableFunction(VTP.FuncVI))
1206 continue;
1207
1208 TargetsForSlot.push_back(VTP.FuncVI);
1209 }
1210 }
1211
1212 // Give up if we couldn't find any targets.
1213 return !TargetsForSlot.empty();
1214}
1215
1216void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1217 Constant *TheFn, bool &IsExported) {
1218 // Don't devirtualize function if we're told to skip it
1219 // in -wholeprogramdevirt-skip.
1220 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1221 return;
1222 auto Apply = [&](CallSiteInfo &CSInfo) {
1223 for (auto &&VCallSite : CSInfo.CallSites) {
1224 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1225 continue;
1226
1227 // Stop when the number of devirted calls reaches the cutoff.
1228 if (!DebugCounter::shouldExecute(CallsToDevirt))
1229 continue;
1230
1231 if (RemarksEnabled)
1232 VCallSite.emitRemark("single-impl",
1233 TheFn->stripPointerCasts()->getName(), OREGetter);
1234 NumSingleImpl++;
1235 auto &CB = VCallSite.CB;
1236 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1237 IRBuilder<> Builder(&CB);
1238 Value *Callee =
1239 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1240
1241 // If trap checking is enabled, add support to compare the virtual
1242 // function pointer to the devirtualized target. In case of a mismatch,
1243 // perform a debug trap.
1245 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1247 Cond, &CB, /*Unreachable=*/false,
1248 MDBuilder(M.getContext()).createUnlikelyBranchWeights());
1249 Builder.SetInsertPoint(ThenTerm);
1250 Function *TrapFn =
1251 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::debugtrap);
1252 auto *CallTrap = Builder.CreateCall(TrapFn);
1253 CallTrap->setDebugLoc(CB.getDebugLoc());
1254 }
1255
1256 // If fallback checking or speculative devirtualization are enabled,
1257 // add support to compare the virtual function pointer to the
1258 // devirtualized target. In case of a mismatch, fall back to indirect
1259 // call.
1260 if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) {
1261 MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
1262 // Version the indirect call site. If the called value is equal to the
1263 // given callee, 'NewInst' will be executed, otherwise the original call
1264 // site will be executed.
1265 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1266 NewInst.setCalledOperand(Callee);
1267 // Since the new call site is direct, we must clear metadata that
1268 // is only appropriate for indirect calls. This includes !prof and
1269 // !callees metadata.
1270 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1271 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1272 // Additionally, we should remove them from the fallback indirect call,
1273 // so that we don't attempt to perform indirect call promotion later.
1274 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1275 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1276 }
1277
1278 // In either trapping or non-checking mode, devirtualize original call.
1279 else {
1280 // Devirtualize unconditionally.
1281 CB.setCalledOperand(Callee);
1282 // Since the call site is now direct, we must clear metadata that
1283 // is only appropriate for indirect calls. This includes !prof and
1284 // !callees metadata.
1285 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1286 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1287 if (CB.getCalledOperand() &&
1289 auto *NewCS = CallBase::removeOperandBundle(
1291 CB.replaceAllUsesWith(NewCS);
1292 // Schedule for deletion at the end of pass run.
1293 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1294 }
1295 }
1296
1297 // This use is no longer unsafe.
1298 if (VCallSite.NumUnsafeUses)
1299 --*VCallSite.NumUnsafeUses;
1300 }
1301 if (CSInfo.isExported())
1302 IsExported = true;
1303 CSInfo.markDevirt();
1304 };
1305 Apply(SlotInfo.CSInfo);
1306 for (auto &P : SlotInfo.ConstCSInfo)
1307 Apply(P.second);
1308}
1309
1310static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1311 // We can't add calls if we haven't seen a definition
1312 if (Callee.getSummaryList().empty())
1313 return false;
1314
1315 // Insert calls into the summary index so that the devirtualized targets
1316 // are eligible for import.
1317 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1318 // to better ensure we have the opportunity to inline them.
1319 bool IsExported = false;
1320 auto &S = Callee.getSummaryList()[0];
1321 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false);
1322 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1323 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1324 FS->addCall({Callee, CI});
1325 IsExported |= S->modulePath() != FS->modulePath();
1326 }
1327 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1328 FS->addCall({Callee, CI});
1329 IsExported |= S->modulePath() != FS->modulePath();
1330 }
1331 };
1332 AddCalls(SlotInfo.CSInfo);
1333 for (auto &P : SlotInfo.ConstCSInfo)
1334 AddCalls(P.second);
1335 return IsExported;
1336}
1337
1338bool DevirtModule::trySingleImplDevirt(
1339 ModuleSummaryIndex *ExportSummary,
1340 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1341 WholeProgramDevirtResolution *Res) {
1342 // See if the program contains a single implementation of this virtual
1343 // function.
1344 auto *TheFn = TargetsForSlot[0].Fn;
1345 for (auto &&Target : TargetsForSlot)
1346 if (TheFn != Target.Fn)
1347 return false;
1348
1349 // If so, update each call site to call that implementation directly.
1350 if (RemarksEnabled || AreStatisticsEnabled())
1351 TargetsForSlot[0].WasDevirt = true;
1352
1353 bool IsExported = false;
1354 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1355 if (!IsExported)
1356 return false;
1357
1358 // If the only implementation has local linkage, we must promote to external
1359 // to make it visible to thin LTO objects. We can only get here during the
1360 // ThinLTO export phase.
1361 if (TheFn->hasLocalLinkage()) {
1362 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1363
1364 // Since we are renaming the function, any comdats with the same name must
1365 // also be renamed. This is required when targeting COFF, as the comdat name
1366 // must match one of the names of the symbols in the comdat.
1367 if (Comdat *C = TheFn->getComdat()) {
1368 if (C->getName() == TheFn->getName()) {
1369 Comdat *NewC = M.getOrInsertComdat(NewName);
1370 NewC->setSelectionKind(C->getSelectionKind());
1371 for (GlobalObject &GO : M.global_objects())
1372 if (GO.getComdat() == C)
1373 GO.setComdat(NewC);
1374 }
1375 }
1376
1377 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1378 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1379 TheFn->setName(NewName);
1380 }
1381 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1382 // Any needed promotion of 'TheFn' has already been done during
1383 // LTO unit split, so we can ignore return value of AddCalls.
1384 addCalls(SlotInfo, TheFnVI);
1385
1387 Res->SingleImplName = std::string(TheFn->getName());
1388
1389 return true;
1390}
1391
1392bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1393 VTableSlotSummary &SlotSummary,
1394 VTableSlotInfo &SlotInfo,
1395 WholeProgramDevirtResolution *Res,
1396 std::set<ValueInfo> &DevirtTargets) {
1397 // See if the program contains a single implementation of this virtual
1398 // function.
1399 auto TheFn = TargetsForSlot[0];
1400 for (auto &&Target : TargetsForSlot)
1401 if (TheFn != Target)
1402 return false;
1403
1404 // Don't devirtualize if we don't have target definition.
1405 auto Size = TheFn.getSummaryList().size();
1406 if (!Size)
1407 return false;
1408
1409 // Don't devirtualize function if we're told to skip it
1410 // in -wholeprogramdevirt-skip.
1411 if (FunctionsToSkip.match(TheFn.name()))
1412 return false;
1413
1414 // If the summary list contains multiple summaries where at least one is
1415 // a local, give up, as we won't know which (possibly promoted) name to use.
1416 if (TheFn.hasLocal() && Size > 1)
1417 return false;
1418
1419 // Collect functions devirtualized at least for one call site for stats.
1421 DevirtTargets.insert(TheFn);
1422
1423 auto &S = TheFn.getSummaryList()[0];
1424 bool IsExported = addCalls(SlotInfo, TheFn);
1425 if (IsExported)
1426 ExportedGUIDs.insert(TheFn.getGUID());
1427
1428 // Record in summary for use in devirtualization during the ThinLTO import
1429 // step.
1431 if (GlobalValue::isLocalLinkage(S->linkage())) {
1432 if (IsExported)
1433 // If target is a local function and we are exporting it by
1434 // devirtualizing a call in another module, we need to record the
1435 // promoted name.
1437 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1438 else {
1439 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1440 Res->SingleImplName = std::string(TheFn.name());
1441 }
1442 } else
1443 Res->SingleImplName = std::string(TheFn.name());
1444
1445 // Name will be empty if this thin link driven off of serialized combined
1446 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1447 // legacy LTO API anyway.
1448 assert(!Res->SingleImplName.empty());
1449
1450 return true;
1451}
1452
1453void DevirtModule::tryICallBranchFunnel(
1454 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1455 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1456 Triple T(M.getTargetTriple());
1457 if (T.getArch() != Triple::x86_64)
1458 return;
1459
1460 if (TargetsForSlot.size() > ClThreshold)
1461 return;
1462
1463 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1464 if (!HasNonDevirt)
1465 for (auto &P : SlotInfo.ConstCSInfo)
1466 if (!P.second.AllCallSitesDevirted) {
1467 HasNonDevirt = true;
1468 break;
1469 }
1470
1471 if (!HasNonDevirt)
1472 return;
1473
1474 // If any GV is AvailableExternally, not to generate branch.funnel.
1475 // NOTE: It is to avoid crash in LowerTypeTest.
1476 // If the branch.funnel is generated, because GV.isDeclarationForLinker(),
1477 // in LowerTypeTestsModule::lower(), its GlobalTypeMember would NOT
1478 // be saved in GlobalTypeMembers[&GV]. Then crash happens in
1479 // buildBitSetsFromDisjointSet due to GlobalTypeMembers[&GV] is NULL.
1480 // Even doing experiment to save it in GlobalTypeMembers[&GV] and
1481 // making GlobalTypeMembers[&GV] be not NULL, crash could avoid from
1482 // buildBitSetsFromDisjointSet. But still report_fatal_error in Verifier
1483 // or SelectionDAGBuilder later, because operands linkage type consistency
1484 // check of icall.branch.funnel can not pass.
1485 for (auto &T : TargetsForSlot) {
1486 if (T.TM->Bits->GV->hasAvailableExternallyLinkage())
1487 return;
1488 }
1489
1490 FunctionType *FT =
1491 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1492 Function *JT;
1493 if (isa<MDString>(Slot.TypeID)) {
1495 M.getDataLayout().getProgramAddressSpace(),
1496 getGlobalName(Slot, {}, "branch_funnel"), &M);
1498 } else {
1500 M.getDataLayout().getProgramAddressSpace(),
1501 "branch_funnel", &M);
1502 }
1503 JT->addParamAttr(0, Attribute::Nest);
1504
1505 std::vector<Value *> JTArgs;
1506 JTArgs.push_back(JT->arg_begin());
1507 for (auto &T : TargetsForSlot) {
1508 JTArgs.push_back(getMemberAddr(T.TM));
1509 JTArgs.push_back(T.Fn);
1510 }
1511
1512 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1514 &M, llvm::Intrinsic::icall_branch_funnel, {});
1515
1516 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1517 CI->setTailCallKind(CallInst::TCK_MustTail);
1518 ReturnInst::Create(M.getContext(), nullptr, BB);
1519
1520 bool IsExported = false;
1521 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
1522 if (IsExported)
1524
1525 if (!JT->getEntryCount().has_value()) {
1526 // FIXME: we could pass through thinlto the necessary information.
1528 }
1529}
1530
1531void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1532 Function &JT, bool &IsExported) {
1533 DenseMap<Function *, double> FunctionEntryCounts;
1534 auto Apply = [&](CallSiteInfo &CSInfo) {
1535 if (CSInfo.isExported())
1536 IsExported = true;
1537 if (CSInfo.AllCallSitesDevirted)
1538 return;
1539
1540 std::map<CallBase *, CallBase *> CallBases;
1541 for (auto &&VCallSite : CSInfo.CallSites) {
1542 CallBase &CB = VCallSite.CB;
1543
1544 if (CallBases.find(&CB) != CallBases.end()) {
1545 // When finding devirtualizable calls, it's possible to find the same
1546 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1547 // calls, which can cause duplicate call sites to be recorded in
1548 // [Const]CallSites. If we've already found one of these
1549 // call instances, just ignore it. It will be replaced later.
1550 continue;
1551 }
1552
1553 // Jump tables are only profitable if the retpoline mitigation is enabled.
1554 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1555 if (!FSAttr.isValid() ||
1556 !FSAttr.getValueAsString().contains("+retpoline"))
1557 continue;
1558
1559 NumBranchFunnel++;
1560 if (RemarksEnabled)
1561 VCallSite.emitRemark("branch-funnel", JT.getName(), OREGetter);
1562
1563 // Pass the address of the vtable in the nest register, which is r10 on
1564 // x86_64.
1565 std::vector<Type *> NewArgs;
1566 NewArgs.push_back(Int8PtrTy);
1567 append_range(NewArgs, CB.getFunctionType()->params());
1568 FunctionType *NewFT =
1570 CB.getFunctionType()->isVarArg());
1571 IRBuilder<> IRB(&CB);
1572 std::vector<Value *> Args;
1573 Args.push_back(VCallSite.VTable);
1574 llvm::append_range(Args, CB.args());
1575
1576 CallBase *NewCS = nullptr;
1578 // Accumulate the call frequencies of the original call site, and use
1579 // that as total entry count for the funnel function.
1580 auto &F = *CB.getCaller();
1581 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1582 auto EC = BFI.getBlockFreq(&F.getEntryBlock());
1583 auto CC = F.getEntryCount(/*AllowSynthetic=*/true);
1584 double CallCount = 0.0;
1585 if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) {
1586 double CallFreq =
1587 static_cast<double>(
1588 BFI.getBlockFreq(CB.getParent()).getFrequency()) /
1589 EC.getFrequency();
1590 CallCount = CallFreq * CC->getCount();
1591 }
1592 FunctionEntryCounts[&JT] += CallCount;
1593 }
1594 if (isa<CallInst>(CB))
1595 NewCS = IRB.CreateCall(NewFT, &JT, Args);
1596 else
1597 NewCS =
1598 IRB.CreateInvoke(NewFT, &JT, cast<InvokeInst>(CB).getNormalDest(),
1599 cast<InvokeInst>(CB).getUnwindDest(), Args);
1600 NewCS->setCallingConv(CB.getCallingConv());
1601
1602 AttributeList Attrs = CB.getAttributes();
1603 std::vector<AttributeSet> NewArgAttrs;
1604 NewArgAttrs.push_back(AttributeSet::get(
1605 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1606 M.getContext(), Attribute::Nest)}));
1607 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1608 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1609 NewCS->setAttributes(
1610 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1611 Attrs.getRetAttrs(), NewArgAttrs));
1612
1613 CallBases[&CB] = NewCS;
1614
1615 // This use is no longer unsafe.
1616 if (VCallSite.NumUnsafeUses)
1617 --*VCallSite.NumUnsafeUses;
1618 }
1619 // Don't mark as devirtualized because there may be callers compiled without
1620 // retpoline mitigation, which would mean that they are lowered to
1621 // llvm.type.test and therefore require an llvm.type.test resolution for the
1622 // type identifier.
1623
1624 for (auto &[Old, New] : CallBases) {
1625 Old->replaceAllUsesWith(New);
1626 Old->eraseFromParent();
1627 }
1628 };
1629 Apply(SlotInfo.CSInfo);
1630 for (auto &P : SlotInfo.ConstCSInfo)
1631 Apply(P.second);
1632 for (auto &[F, C] : FunctionEntryCounts) {
1633 assert(!F->getEntryCount(/*AllowSynthetic=*/true) &&
1634 "Unexpected entry count for funnel that was freshly synthesized");
1635 F->setEntryCount(static_cast<uint64_t>(std::round(C)));
1636 }
1637}
1638
1639bool DevirtModule::tryEvaluateFunctionsWithArgs(
1641 ArrayRef<uint64_t> Args) {
1642 // Evaluate each function and store the result in each target's RetVal
1643 // field.
1644 for (VirtualCallTarget &Target : TargetsForSlot) {
1645 // TODO: Skip for now if the vtable symbol was an alias to a function,
1646 // need to evaluate whether it would be correct to analyze the aliasee
1647 // function for this optimization.
1648 auto *Fn = dyn_cast<Function>(Target.Fn);
1649 if (!Fn)
1650 return false;
1651
1652 if (Fn->arg_size() != Args.size() + 1)
1653 return false;
1654
1655 Evaluator Eval(M.getDataLayout(), nullptr);
1657 EvalArgs.push_back(
1659 for (unsigned I = 0; I != Args.size(); ++I) {
1660 auto *ArgTy =
1662 if (!ArgTy)
1663 return false;
1664 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1665 }
1666
1667 Constant *RetVal;
1668 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1669 !isa<ConstantInt>(RetVal))
1670 return false;
1671 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1672 }
1673 return true;
1674}
1675
1676void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1677 uint64_t TheRetVal) {
1678 for (auto Call : CSInfo.CallSites) {
1679 if (!OptimizedCalls.insert(&Call.CB).second)
1680 continue;
1681 NumUniformRetVal++;
1682 Call.replaceAndErase(
1683 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1684 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1685 }
1686 CSInfo.markDevirt();
1687}
1688
1689bool DevirtModule::tryUniformRetValOpt(
1690 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1691 WholeProgramDevirtResolution::ByArg *Res) {
1692 // Uniform return value optimization. If all functions return the same
1693 // constant, replace all calls with that constant.
1694 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1695 for (const VirtualCallTarget &Target : TargetsForSlot)
1696 if (Target.RetVal != TheRetVal)
1697 return false;
1698
1699 if (CSInfo.isExported()) {
1701 Res->Info = TheRetVal;
1702 }
1703
1704 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1705 if (RemarksEnabled || AreStatisticsEnabled())
1706 for (auto &&Target : TargetsForSlot)
1707 Target.WasDevirt = true;
1708 return true;
1709}
1710
1711std::string DevirtModule::getGlobalName(VTableSlot Slot,
1712 ArrayRef<uint64_t> Args,
1713 StringRef Name) {
1714 std::string FullName = "__typeid_";
1715 raw_string_ostream OS(FullName);
1716 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1717 for (uint64_t Arg : Args)
1718 OS << '_' << Arg;
1719 OS << '_' << Name;
1720 return FullName;
1721}
1722
1723bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1724 Triple T(M.getTargetTriple());
1725 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1726}
1727
1728void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1729 StringRef Name, Constant *C) {
1730 GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
1731 getGlobalName(Slot, Args, Name), C, &M);
1733}
1734
1735void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1736 StringRef Name, uint32_t Const,
1737 uint32_t &Storage) {
1738 if (shouldExportConstantsAsAbsoluteSymbols()) {
1739 exportGlobal(
1740 Slot, Args, Name,
1741 ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
1742 return;
1743 }
1744
1745 Storage = Const;
1746}
1747
1748Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1749 StringRef Name) {
1750 GlobalVariable *GV =
1751 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1753 return GV;
1754}
1755
1756Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1757 StringRef Name, IntegerType *IntTy,
1758 uint32_t Storage) {
1759 if (!shouldExportConstantsAsAbsoluteSymbols())
1760 return ConstantInt::get(IntTy, Storage);
1761
1762 Constant *C = importGlobal(Slot, Args, Name);
1763 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1764 C = ConstantExpr::getPtrToInt(C, IntTy);
1765
1766 // We only need to set metadata if the global is newly created, in which
1767 // case it would not have hidden visibility.
1768 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1769 return C;
1770
1771 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1772 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1773 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1774 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1775 MDNode::get(M.getContext(), {MinC, MaxC}));
1776 };
1777 unsigned AbsWidth = IntTy->getBitWidth();
1778 if (AbsWidth == IntPtrTy->getBitWidth()) {
1779 uint64_t AllOnes = IntTy->getBitMask();
1780 SetAbsRange(AllOnes, AllOnes); // Full set.
1781 } else {
1782 SetAbsRange(0, 1ull << AbsWidth);
1783 }
1784 return C;
1785}
1786
1787void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1788 bool IsOne,
1789 Constant *UniqueMemberAddr) {
1790 for (auto &&Call : CSInfo.CallSites) {
1791 if (!OptimizedCalls.insert(&Call.CB).second)
1792 continue;
1793 IRBuilder<> B(&Call.CB);
1794 Value *Cmp =
1795 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1796 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1797 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1798 NumUniqueRetVal++;
1799 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1800 Cmp);
1801 }
1802 CSInfo.markDevirt();
1803}
1804
1805Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1806 return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
1807 ConstantInt::get(Int64Ty, M->Offset));
1808}
1809
1810bool DevirtModule::tryUniqueRetValOpt(
1811 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1812 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1813 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1814 // IsOne controls whether we look for a 0 or a 1.
1815 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1816 const TypeMemberInfo *UniqueMember = nullptr;
1817 for (const VirtualCallTarget &Target : TargetsForSlot) {
1818 if (Target.RetVal == (IsOne ? 1 : 0)) {
1819 if (UniqueMember)
1820 return false;
1821 UniqueMember = Target.TM;
1822 }
1823 }
1824
1825 // We should have found a unique member or bailed out by now. We already
1826 // checked for a uniform return value in tryUniformRetValOpt.
1827 assert(UniqueMember);
1828
1829 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1830 if (CSInfo.isExported()) {
1832 Res->Info = IsOne;
1833
1834 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1835 }
1836
1837 // Replace each call with the comparison.
1838 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1839 UniqueMemberAddr);
1840
1841 // Update devirtualization statistics for targets.
1842 if (RemarksEnabled || AreStatisticsEnabled())
1843 for (auto &&Target : TargetsForSlot)
1844 Target.WasDevirt = true;
1845
1846 return true;
1847 };
1848
1849 if (BitWidth == 1) {
1850 if (tryUniqueRetValOptFor(true))
1851 return true;
1852 if (tryUniqueRetValOptFor(false))
1853 return true;
1854 }
1855 return false;
1856}
1857
1858void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1859 Constant *Byte, Constant *Bit) {
1860 for (auto Call : CSInfo.CallSites) {
1861 if (!OptimizedCalls.insert(&Call.CB).second)
1862 continue;
1863 auto *RetType = cast<IntegerType>(Call.CB.getType());
1864 IRBuilder<> B(&Call.CB);
1865 Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);
1866 if (RetType->getBitWidth() == 1) {
1867 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1868 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1869 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1870 NumVirtConstProp1Bit++;
1871 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1872 OREGetter, IsBitSet);
1873 } else {
1874 Value *Val = B.CreateLoad(RetType, Addr);
1875 NumVirtConstProp++;
1876 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1877 OREGetter, Val);
1878 }
1879 }
1880 CSInfo.markDevirt();
1881}
1882
1883bool DevirtModule::tryVirtualConstProp(
1884 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1885 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1886 // TODO: Skip for now if the vtable symbol was an alias to a function,
1887 // need to evaluate whether it would be correct to analyze the aliasee
1888 // function for this optimization.
1889 auto *Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1890 if (!Fn)
1891 return false;
1892 // This only works if the function returns an integer.
1893 auto *RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1894 if (!RetType)
1895 return false;
1896 unsigned BitWidth = RetType->getBitWidth();
1897
1898 // TODO: Since we can evaluated these constants at compile-time, we can save
1899 // some space by calculating the smallest range of values that all these
1900 // constants can fit in, then only allocate enough space to fit those values.
1901 // At each callsite, we can get the original type by doing a sign/zero
1902 // extension. For example, if we would store an i64, but we can see that all
1903 // the values fit into an i16, then we can store an i16 before/after the
1904 // vtable and at each callsite do a s/zext.
1905 if (BitWidth > 64)
1906 return false;
1907
1908 Align TypeAlignment = M.getDataLayout().getABIIntegerTypeAlignment(BitWidth);
1909
1910 // Make sure that each function is defined, does not access memory, takes at
1911 // least one argument, does not use its first argument (which we assume is
1912 // 'this'), and has the same return type.
1913 //
1914 // Note that we test whether this copy of the function is readnone, rather
1915 // than testing function attributes, which must hold for any copy of the
1916 // function, even a less optimized version substituted at link time. This is
1917 // sound because the virtual constant propagation optimizations effectively
1918 // inline all implementations of the virtual function into each call site,
1919 // rather than using function attributes to perform local optimization.
1920 for (VirtualCallTarget &Target : TargetsForSlot) {
1921 // TODO: Skip for now if the vtable symbol was an alias to a function,
1922 // need to evaluate whether it would be correct to analyze the aliasee
1923 // function for this optimization.
1924 auto *Fn = dyn_cast<Function>(Target.Fn);
1925 if (!Fn)
1926 return false;
1927
1928 if (Fn->isDeclaration() ||
1929 !computeFunctionBodyMemoryAccess(*Fn, FAM.getResult<AAManager>(*Fn))
1931 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1932 Fn->getReturnType() != RetType)
1933 return false;
1934
1935 // This only works if the integer size is at most the alignment of the
1936 // vtable. If the table is underaligned, then we can't guarantee that the
1937 // constant will always be aligned to the integer type alignment. For
1938 // example, if the table is `align 1`, we can never guarantee that an i32
1939 // stored before/after the vtable is 32-bit aligned without changing the
1940 // alignment of the new global.
1941 GlobalVariable *GV = Target.TM->Bits->GV;
1942 Align TableAlignment = M.getDataLayout().getValueOrABITypeAlignment(
1943 GV->getAlign(), GV->getValueType());
1944 if (TypeAlignment > TableAlignment)
1945 return false;
1946 }
1947
1948 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1949 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1950 continue;
1951
1952 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1953 if (Res)
1954 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1955
1956 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1957 continue;
1958
1959 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1960 ResByArg, Slot, CSByConstantArg.first))
1961 continue;
1962
1963 // Find an allocation offset in bits in all vtables associated with the
1964 // type.
1965 // TODO: If there would be "holes" in the vtable that were added by
1966 // padding, we could place i1s there to reduce any extra padding that
1967 // would be introduced by the i1s.
1968 uint64_t AllocBefore =
1969 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1970 uint64_t AllocAfter =
1971 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1972
1973 // Calculate the total amount of padding needed to store a value at both
1974 // ends of the object.
1975 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1976 for (auto &&Target : TargetsForSlot) {
1977 TotalPaddingBefore += std::max<int64_t>(
1978 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1979 TotalPaddingAfter += std::max<int64_t>(
1980 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1981 }
1982
1983 // If the amount of padding is too large, give up.
1984 // FIXME: do something smarter here.
1985 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1986 continue;
1987
1988 // Calculate the offset to the value as a (possibly negative) byte offset
1989 // and (if applicable) a bit offset, and store the values in the targets.
1990 int64_t OffsetByte;
1991 uint64_t OffsetBit;
1992 if (TotalPaddingBefore <= TotalPaddingAfter)
1993 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1994 OffsetBit);
1995 else
1996 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1997 OffsetBit);
1998
1999 // In an earlier check we forbade constant propagation from operating on
2000 // tables whose alignment is less than the alignment needed for loading
2001 // the constant. Thus, the address we take the offset from will always be
2002 // aligned to at least this integer alignment. Now, we need to ensure that
2003 // the offset is also aligned to this integer alignment to ensure we always
2004 // have an aligned load.
2005 assert(OffsetByte % TypeAlignment.value() == 0);
2006
2007 if (RemarksEnabled || AreStatisticsEnabled())
2008 for (auto &&Target : TargetsForSlot)
2009 Target.WasDevirt = true;
2010
2011
2012 if (CSByConstantArg.second.isExported()) {
2014 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
2015 ResByArg->Byte);
2016 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
2017 ResByArg->Bit);
2018 }
2019
2020 // Rewrite each call to a load from OffsetByte/OffsetBit.
2021 Constant *ByteConst = ConstantInt::getSigned(Int32Ty, OffsetByte);
2022 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
2023 applyVirtualConstProp(CSByConstantArg.second,
2024 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
2025 }
2026 return true;
2027}
2028
2029void DevirtModule::rebuildGlobal(VTableBits &B) {
2030 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
2031 return;
2032
2033 // Align the before byte array to the global's minimum alignment so that we
2034 // don't break any alignment requirements on the global.
2035 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
2036 B.GV->getAlign(), B.GV->getValueType());
2037 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
2038
2039 // Before was stored in reverse order; flip it now.
2040 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
2041 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
2042
2043 // Build an anonymous global containing the before bytes, followed by the
2044 // original initializer, followed by the after bytes.
2045 auto *NewInit = ConstantStruct::getAnon(
2046 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
2047 B.GV->getInitializer(),
2048 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
2049 auto *NewGV =
2050 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
2051 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
2052 NewGV->setSection(B.GV->getSection());
2053 NewGV->setComdat(B.GV->getComdat());
2054 NewGV->setAlignment(B.GV->getAlign());
2055
2056 // Copy the original vtable's metadata to the anonymous global, adjusting
2057 // offsets as required.
2058 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
2059
2060 // Build an alias named after the original global, pointing at the second
2061 // element (the original initializer).
2062 auto *Alias = GlobalAlias::create(
2063 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
2065 NewInit->getType(), NewGV,
2066 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
2067 ConstantInt::get(Int32Ty, 1)}),
2068 &M);
2069 Alias->setVisibility(B.GV->getVisibility());
2070 Alias->takeName(B.GV);
2071
2072 B.GV->replaceAllUsesWith(Alias);
2073 B.GV->eraseFromParent();
2074}
2075
2076bool DevirtModule::areRemarksEnabled() {
2077 const auto &FL = M.getFunctionList();
2078 for (const Function &Fn : FL) {
2079 if (Fn.empty())
2080 continue;
2081 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
2082 return DI.isEnabled();
2083 }
2084 return false;
2085}
2086
2087void DevirtModule::scanTypeTestUsers(
2088 Function *TypeTestFunc,
2089 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
2090 // Find all virtual calls via a virtual table pointer %p under an assumption
2091 // of the form llvm.assume(llvm.type.test(%p, %md)) or
2092 // llvm.assume(llvm.public.type.test(%p, %md)).
2093 // This indicates that %p points to a member of the type identifier %md.
2094 // Group calls by (type ID, offset) pair (effectively the identity of the
2095 // virtual function) and store to CallSlots.
2096 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
2097 auto *CI = dyn_cast<CallInst>(U.getUser());
2098 if (!CI)
2099 continue;
2100 // Search for virtual calls based on %p and add them to DevirtCalls.
2103 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2104 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
2105
2106 Metadata *TypeId =
2107 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
2108 // If we found any, add them to CallSlots.
2109 if (!Assumes.empty()) {
2110 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
2111 for (DevirtCallSite Call : DevirtCalls)
2112 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
2113 }
2114
2115 auto RemoveTypeTestAssumes = [&]() {
2116 // We no longer need the assumes or the type test.
2117 for (auto *Assume : Assumes)
2118 Assume->eraseFromParent();
2119 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
2120 // may use the vtable argument later.
2121 if (CI->use_empty())
2122 CI->eraseFromParent();
2123 };
2124
2125 // At this point we could remove all type test assume sequences, as they
2126 // were originally inserted for WPD. However, we can keep these in the
2127 // code stream for later analysis (e.g. to help drive more efficient ICP
2128 // sequences). They will eventually be removed by a second LowerTypeTests
2129 // invocation that cleans them up. In order to do this correctly, the first
2130 // LowerTypeTests invocation needs to know that they have "Unknown" type
2131 // test resolution, so that they aren't treated as Unsat and lowered to
2132 // False, which will break any uses on assumes. Below we remove any type
2133 // test assumes that will not be treated as Unknown by LTT.
2134
2135 // The type test assumes will be treated by LTT as Unsat if the type id is
2136 // not used on a global (in which case it has no entry in the TypeIdMap).
2137 if (!TypeIdMap.count(TypeId))
2138 RemoveTypeTestAssumes();
2139
2140 // For ThinLTO importing, we need to remove the type test assumes if this is
2141 // an MDString type id without a corresponding TypeIdSummary. Any
2142 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2143 // type test assumes can be kept. If the MDString type id is missing a
2144 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2145 // exporting phase of WPD from analyzing it), then it would be treated as
2146 // Unsat by LTT and we need to remove its type test assumes here. If not
2147 // used on a vcall we don't need them for later optimization use in any
2148 // case.
2149 else if (ImportSummary && isa<MDString>(TypeId)) {
2150 const TypeIdSummary *TidSummary =
2151 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2152 if (!TidSummary)
2153 RemoveTypeTestAssumes();
2154 else
2155 // If one was created it should not be Unsat, because if we reached here
2156 // the type id was used on a global.
2158 }
2159 }
2160}
2161
2162void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2163 Function *TypeTestFunc =
2164 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
2165
2166 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2167 auto *CI = dyn_cast<CallInst>(U.getUser());
2168 if (!CI)
2169 continue;
2170
2171 Value *Ptr = CI->getArgOperand(0);
2172 Value *Offset = CI->getArgOperand(1);
2173 Value *TypeIdValue = CI->getArgOperand(2);
2174 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2175
2179 bool HasNonCallUses = false;
2180 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2181 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2182 HasNonCallUses, CI, DT);
2183
2184 // Start by generating "pessimistic" code that explicitly loads the function
2185 // pointer from the vtable and performs the type check. If possible, we will
2186 // eliminate the load and the type check later.
2187
2188 // If possible, only generate the load at the point where it is used.
2189 // This helps avoid unnecessary spills.
2190 IRBuilder<> LoadB(
2191 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2192
2193 Value *LoadedValue = nullptr;
2194 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2195 Intrinsic::type_checked_load_relative) {
2197 &M, Intrinsic::load_relative, {Int32Ty});
2198 LoadedValue = LoadB.CreateCall(LoadRelFunc, {Ptr, Offset});
2199 } else {
2200 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2201 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2202 }
2203
2204 for (Instruction *LoadedPtr : LoadedPtrs) {
2205 LoadedPtr->replaceAllUsesWith(LoadedValue);
2206 LoadedPtr->eraseFromParent();
2207 }
2208
2209 // Likewise for the type test.
2210 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2211 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2212
2213 for (Instruction *Pred : Preds) {
2214 Pred->replaceAllUsesWith(TypeTestCall);
2215 Pred->eraseFromParent();
2216 }
2217
2218 // We have already erased any extractvalue instructions that refer to the
2219 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2220 // (although this is unlikely). In that case, explicitly build a pair and
2221 // RAUW it.
2222 if (!CI->use_empty()) {
2223 Value *Pair = PoisonValue::get(CI->getType());
2224 IRBuilder<> B(CI);
2225 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2226 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2227 CI->replaceAllUsesWith(Pair);
2228 }
2229
2230 // The number of unsafe uses is initially the number of uses.
2231 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2232 NumUnsafeUses = DevirtCalls.size();
2233
2234 // If the function pointer has a non-call user, we cannot eliminate the type
2235 // check, as one of those users may eventually call the pointer. Increment
2236 // the unsafe use count to make sure it cannot reach zero.
2237 if (HasNonCallUses)
2238 ++NumUnsafeUses;
2239 for (DevirtCallSite Call : DevirtCalls) {
2240 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2241 &NumUnsafeUses);
2242 }
2243
2244 CI->eraseFromParent();
2245 }
2246}
2247
2248void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2249 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2250 if (!TypeId)
2251 return;
2252 const TypeIdSummary *TidSummary =
2253 ImportSummary->getTypeIdSummary(TypeId->getString());
2254 if (!TidSummary)
2255 return;
2256 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2257 if (ResI == TidSummary->WPDRes.end())
2258 return;
2259 const WholeProgramDevirtResolution &Res = ResI->second;
2260
2262 assert(!Res.SingleImplName.empty());
2263 // The type of the function in the declaration is irrelevant because every
2264 // call site will cast it to the correct type.
2265 Constant *SingleImpl =
2266 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2267 Type::getVoidTy(M.getContext()))
2268 .getCallee());
2269
2270 // This is the import phase so we should not be exporting anything.
2271 bool IsExported = false;
2272 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2273 assert(!IsExported);
2274 }
2275
2276 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2277 auto I = Res.ResByArg.find(CSByConstantArg.first);
2278 if (I == Res.ResByArg.end())
2279 continue;
2280 auto &ResByArg = I->second;
2281 // FIXME: We should figure out what to do about the "function name" argument
2282 // to the apply* functions, as the function names are unavailable during the
2283 // importing phase. For now we just pass the empty string. This does not
2284 // impact correctness because the function names are just used for remarks.
2285 switch (ResByArg.TheKind) {
2287 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2288 break;
2290 Constant *UniqueMemberAddr =
2291 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2292 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2293 UniqueMemberAddr);
2294 break;
2295 }
2297 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2298 Int32Ty, ResByArg.Byte);
2299 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2300 ResByArg.Bit);
2301 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2302 break;
2303 }
2304 default:
2305 break;
2306 }
2307 }
2308
2310 // The type of the function is irrelevant, because it's bitcast at calls
2311 // anyhow.
2312 auto *JT = cast<Function>(
2313 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2314 Type::getVoidTy(M.getContext()))
2315 .getCallee());
2316 bool IsExported = false;
2317 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
2318 assert(!IsExported);
2319 }
2320}
2321
2322void DevirtModule::removeRedundantTypeTests() {
2323 auto *True = ConstantInt::getTrue(M.getContext());
2324 for (auto &&U : NumUnsafeUsesForTypeTest) {
2325 if (U.second == 0) {
2326 U.first->replaceAllUsesWith(True);
2327 U.first->eraseFromParent();
2328 }
2329 }
2330}
2331
2332ValueInfo
2333DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2334 ModuleSummaryIndex *ExportSummary) {
2335 assert((ExportSummary != nullptr) &&
2336 "Caller guarantees ExportSummary is not nullptr");
2337
2338 const auto TheFnGUID = TheFn->getGUID();
2339 const auto TheFnGUIDWithExportedName =
2341 // Look up ValueInfo with the GUID in the current linkage.
2342 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2343 // If no entry is found and GUID is different from GUID computed using
2344 // exported name, look up ValueInfo with the exported name unconditionally.
2345 // This is a fallback.
2346 //
2347 // The reason to have a fallback:
2348 // 1. LTO could enable global value internalization via
2349 // `enable-lto-internalization`.
2350 // 2. The GUID in ExportedSummary is computed using exported name.
2351 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2352 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2353 }
2354 return TheFnVI;
2355}
2356
2357bool DevirtModule::mustBeUnreachableFunction(
2358 Function *const F, ModuleSummaryIndex *ExportSummary) {
2360 return false;
2361 // First, learn unreachability by analyzing function IR.
2362 if (!F->isDeclaration()) {
2363 // A function must be unreachable if its entry block ends with an
2364 // 'unreachable'.
2365 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2366 }
2367 // Learn unreachability from ExportSummary if ExportSummary is present.
2368 return ExportSummary &&
2370 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2371}
2372
2373bool DevirtModule::run() {
2374 // If only some of the modules were split, we cannot correctly perform
2375 // this transformation. We already checked for the presense of type tests
2376 // with partially split modules during the thin link, and would have emitted
2377 // an error if any were found, so here we can simply return.
2378 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2379 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2380 return false;
2381
2382 Function *PublicTypeTestFunc = nullptr;
2383 // If we are in speculative devirtualization mode, we can work on the public
2384 // type test intrinsics.
2385 if (DevirtSpeculatively)
2386 PublicTypeTestFunc =
2387 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
2388 Function *TypeTestFunc =
2389 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2390 Function *TypeCheckedLoadFunc =
2391 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
2392 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
2393 &M, Intrinsic::type_checked_load_relative);
2394 Function *AssumeFunc =
2395 Intrinsic::getDeclarationIfExists(&M, Intrinsic::assume);
2396
2397 // Normally if there are no users of the devirtualization intrinsics in the
2398 // module, this pass has nothing to do. But if we are exporting, we also need
2399 // to handle any users that appear only in the function summaries.
2400 if (!ExportSummary &&
2401 (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
2402 (!TypeTestFunc || TypeTestFunc->use_empty())) ||
2403 !AssumeFunc || AssumeFunc->use_empty()) &&
2404 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2405 (!TypeCheckedLoadRelativeFunc ||
2406 TypeCheckedLoadRelativeFunc->use_empty()))
2407 return false;
2408
2409 // Rebuild type metadata into a map for easy lookup.
2410 std::vector<VTableBits> Bits;
2411 DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
2412 buildTypeIdentifierMap(Bits, TypeIdMap);
2413
2414 if (PublicTypeTestFunc && AssumeFunc)
2415 scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
2416
2417 if (TypeTestFunc && AssumeFunc)
2418 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2419
2420 if (TypeCheckedLoadFunc)
2421 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2422
2423 if (TypeCheckedLoadRelativeFunc)
2424 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2425
2426 if (ImportSummary) {
2427 for (auto &S : CallSlots)
2428 importResolution(S.first, S.second);
2429
2430 removeRedundantTypeTests();
2431
2432 // We have lowered or deleted the type intrinsics, so we will no longer have
2433 // enough information to reason about the liveness of virtual function
2434 // pointers in GlobalDCE.
2435 for (GlobalVariable &GV : M.globals())
2436 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2437
2438 // The rest of the code is only necessary when exporting or during regular
2439 // LTO, so we are done.
2440 return true;
2441 }
2442
2443 if (TypeIdMap.empty())
2444 return true;
2445
2446 // Collect information from summary about which calls to try to devirtualize.
2447 if (ExportSummary) {
2448 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2449 for (auto &P : TypeIdMap) {
2450 if (auto *TypeId = dyn_cast<MDString>(P.first))
2452 TypeId->getString())]
2453 .push_back(TypeId);
2454 }
2455
2456 for (auto &P : *ExportSummary) {
2457 for (auto &S : P.second.getSummaryList()) {
2458 auto *FS = dyn_cast<FunctionSummary>(S.get());
2459 if (!FS)
2460 continue;
2461 // FIXME: Only add live functions.
2462 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2463 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2464 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2465 }
2466 }
2467 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2468 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2469 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2470 }
2471 }
2472 for (const FunctionSummary::ConstVCall &VC :
2473 FS->type_test_assume_const_vcalls()) {
2474 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2475 CallSlots[{MD, VC.VFunc.Offset}]
2476 .ConstCSInfo[VC.Args]
2477 .addSummaryTypeTestAssumeUser(FS);
2478 }
2479 }
2480 for (const FunctionSummary::ConstVCall &VC :
2481 FS->type_checked_load_const_vcalls()) {
2482 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2483 CallSlots[{MD, VC.VFunc.Offset}]
2484 .ConstCSInfo[VC.Args]
2485 .addSummaryTypeCheckedLoadUser(FS);
2486 }
2487 }
2488 }
2489 }
2490 }
2491
2492 // For each (type, offset) pair:
2493 bool DidVirtualConstProp = false;
2494 std::map<std::string, GlobalValue *> DevirtTargets;
2495 for (auto &S : CallSlots) {
2496 // Search each of the members of the type identifier for the virtual
2497 // function implementation at offset S.first.ByteOffset, and add to
2498 // TargetsForSlot.
2499 std::vector<VirtualCallTarget> TargetsForSlot;
2500 WholeProgramDevirtResolution *Res = nullptr;
2501 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2502 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2503 TypeMemberInfos.size())
2504 // For any type id used on a global's type metadata, create the type id
2505 // summary resolution regardless of whether we can devirtualize, so that
2506 // lower type tests knows the type id is not Unsat. If it was not used on
2507 // a global's type metadata, the TypeIdMap entry set will be empty, and
2508 // we don't want to create an entry (with the default Unknown type
2509 // resolution), which can prevent detection of the Unsat.
2510 Res = &ExportSummary
2511 ->getOrInsertTypeIdSummary(
2512 cast<MDString>(S.first.TypeID)->getString())
2513 .WPDRes[S.first.ByteOffset];
2514 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2515 S.first.ByteOffset, ExportSummary)) {
2516 bool SingleImplDevirt =
2517 trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
2518 // Out of speculative devirtualization mode, Try to apply virtual constant
2519 // propagation or branch funneling.
2520 // TODO: This should eventually be enabled for non-public type tests.
2521 if (!SingleImplDevirt && !DevirtSpeculatively) {
2522 DidVirtualConstProp |=
2523 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2524
2525 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2526 }
2527
2528 // Collect functions devirtualized at least for one call site for stats.
2529 if (RemarksEnabled || AreStatisticsEnabled())
2530 for (const auto &T : TargetsForSlot)
2531 if (T.WasDevirt)
2532 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2533 }
2534
2535 // CFI-specific: if we are exporting and any llvm.type.checked.load
2536 // intrinsics were *not* devirtualized, we need to add the resulting
2537 // llvm.type.test intrinsics to the function summaries so that the
2538 // LowerTypeTests pass will export them.
2539 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2541 cast<MDString>(S.first.TypeID)->getString());
2542 auto AddTypeTestsForTypeCheckedLoads = [&](CallSiteInfo &CSI) {
2543 if (!CSI.AllCallSitesDevirted)
2544 for (auto *FS : CSI.SummaryTypeCheckedLoadUsers)
2545 FS->addTypeTest(GUID);
2546 };
2547 AddTypeTestsForTypeCheckedLoads(S.second.CSInfo);
2548 for (auto &CCS : S.second.ConstCSInfo)
2549 AddTypeTestsForTypeCheckedLoads(CCS.second);
2550 }
2551 }
2552
2553 if (RemarksEnabled) {
2554 // Generate remarks for each devirtualized function.
2555 for (const auto &DT : DevirtTargets) {
2556 GlobalValue *GV = DT.second;
2557 auto *F = dyn_cast<Function>(GV);
2558 if (!F) {
2559 auto *A = dyn_cast<GlobalAlias>(GV);
2560 assert(A && isa<Function>(A->getAliasee()));
2561 F = dyn_cast<Function>(A->getAliasee());
2562 assert(F);
2563 }
2564
2565 using namespace ore;
2566 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2567 << "devirtualized " << NV("FunctionName", DT.first));
2568 }
2569 }
2570
2571 NumDevirtTargets += DevirtTargets.size();
2572
2573 removeRedundantTypeTests();
2574
2575 // Rebuild each global we touched as part of virtual constant propagation to
2576 // include the before and after bytes.
2577 if (DidVirtualConstProp)
2578 for (VTableBits &B : Bits)
2579 rebuildGlobal(B);
2580
2581 // We have lowered or deleted the type intrinsics, so we will no longer have
2582 // enough information to reason about the liveness of virtual function
2583 // pointers in GlobalDCE.
2584 for (GlobalVariable &GV : M.globals())
2585 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2586
2587 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2588 CI->eraseFromParent();
2589
2590 return true;
2591}
2592
2593void DevirtIndex::run() {
2594 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2595 return;
2596
2597 // Assert that we haven't made any changes that would affect the hasLocal()
2598 // flag on the GUID summary info.
2599 assert(!ExportSummary.withInternalizeAndPromote() &&
2600 "Expect index-based WPD to run before internalization and promotion");
2601
2602 DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
2603 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2604 NameByGUID[GlobalValue::getGUIDAssumingExternalLinkage(P.first)].push_back(
2605 P.first);
2606 // Create the type id summary resolution regardlness of whether we can
2607 // devirtualize, so that lower type tests knows the type id is used on
2608 // a global and not Unsat. We do this here rather than in the loop over the
2609 // CallSlots, since that handling will only see type tests that directly
2610 // feed assumes, and we would miss any that aren't currently handled by WPD
2611 // (such as type tests that feed assumes via phis).
2612 ExportSummary.getOrInsertTypeIdSummary(P.first);
2613 }
2614
2615 // Collect information from summary about which calls to try to devirtualize.
2616 for (auto &P : ExportSummary) {
2617 for (auto &S : P.second.getSummaryList()) {
2618 auto *FS = dyn_cast<FunctionSummary>(S.get());
2619 if (!FS)
2620 continue;
2621 // FIXME: Only add live functions.
2622 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2623 for (StringRef Name : NameByGUID[VF.GUID]) {
2624 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2625 }
2626 }
2627 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2628 for (StringRef Name : NameByGUID[VF.GUID]) {
2629 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2630 }
2631 }
2632 for (const FunctionSummary::ConstVCall &VC :
2633 FS->type_test_assume_const_vcalls()) {
2634 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2635 CallSlots[{Name, VC.VFunc.Offset}]
2636 .ConstCSInfo[VC.Args]
2637 .addSummaryTypeTestAssumeUser(FS);
2638 }
2639 }
2640 for (const FunctionSummary::ConstVCall &VC :
2641 FS->type_checked_load_const_vcalls()) {
2642 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2643 CallSlots[{Name, VC.VFunc.Offset}]
2644 .ConstCSInfo[VC.Args]
2645 .addSummaryTypeCheckedLoadUser(FS);
2646 }
2647 }
2648 }
2649 }
2650
2651 std::set<ValueInfo> DevirtTargets;
2652 // For each (type, offset) pair:
2653 for (auto &S : CallSlots) {
2654 // Search each of the members of the type identifier for the virtual
2655 // function implementation at offset S.first.ByteOffset, and add to
2656 // TargetsForSlot.
2657 std::vector<ValueInfo> TargetsForSlot;
2658 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2659 assert(TidSummary);
2660 // The type id summary would have been created while building the NameByGUID
2661 // map earlier.
2662 WholeProgramDevirtResolution *Res =
2663 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2664 ->WPDRes[S.first.ByteOffset];
2665 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2666 S.first.ByteOffset)) {
2667
2668 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2669 DevirtTargets))
2670 continue;
2671 }
2672 }
2673
2674 // Optionally have the thin link print message for each devirtualized
2675 // function.
2677 for (const auto &DT : DevirtTargets)
2678 errs() << "Devirtualized call to " << DT << "\n";
2679
2680 NumDevirtTargets += DevirtTargets.size();
2681}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Provides passes for computing function attributes based on interprocedural analyses.
@ CallSiteInfo
#define DEBUG_TYPE
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, bool Skip)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
#define T
static bool mustBeUnreachableFunction(const Function &F)
This is the interface to build a ModuleSummaryIndex for a module.
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< bool > WholeProgramDevirtKeepUnreachableFunction("wholeprogramdevirt-keep-unreachable-function", cl::desc("Regard unreachable functions as possible devirtualize targets."), cl::Hidden, cl::init(true))
With Clang, a pure virtual class's deleting destructor is emitted as a llvm.trap intrinsic followed b...
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:259
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setCallingConv(CallingConv::ID CC)
bool arg_empty() const
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
AttributeList getAttributes() const
Return the attributes for this call.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
@ ICMP_NE
not equal
Definition InstrTypes.h:698
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:720
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1311
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1284
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition Constants.h:491
const Constant * stripPointerCasts() const
Definition Constant.h:222
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static bool shouldExecute(CounterInfo &Counter)
bool empty() const
Definition DenseMap.h:109
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition Error.h:334
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Type * getParamType(unsigned i) const
Parameter type accessors.
ArrayRef< Type * > params() const
bool isVarArg() const
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
bool empty() const
Definition Function.h:863
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
bool arg_empty() const
Definition Function.h:906
const BasicBlock & front() const
Definition Function.h:864
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
arg_iterator arg_begin()
Definition Function.h:872
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition Function.cpp:667
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
size_t arg_size() const
Definition Function.h:905
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
unsigned getInstructionCount() const
Returns the number of non-debug IR instructions in this function.
Definition Function.cpp:366
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:613
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition Value.h:602
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI VCallVisibility getVCallVisibility() const
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI void setVCallVisibilityMetadata(VCallVisibility Visibility)
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:78
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:329
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
void setVisibility(VisibilityTypes V)
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
uint64_t getBitMask() const
Return a bitmask with ones set for all of the bits that can be set by an unsigned version of this typ...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition ModRef.h:220
Root of the metadata hierarchy.
Definition Metadata.h:64
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Analysis providing profile information.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:426
Target - Wrapper for Target specific information.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
bool use_empty() const
Definition Value.h:346
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
DiagnosticInfoOptimizationBase::Argument NV
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition FileSystem.h:764
LLVM_ABI uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
LLVM_ABI void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
LLVM_ABI void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
@ Export
Export information to summary.
Definition IPO.h:57
@ None
Do nothing.
Definition IPO.h:55
@ Import
Import information from summary.
Definition IPO.h:56
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI ModuleSummaryIndex buildModuleSummaryIndex(const Module &M, std::function< BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI, std::function< const StackSafetyInfo *(const Function &F)> GetSSICallback=[](const Function &F) -> const StackSafetyInfo *{ return nullptr;})
Direct function to compute a ModuleSummaryIndex from a given module.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const ModuleToSummariesForIndexTy *ModuleToSummariesForIndex=nullptr, const GVSummaryPtrSet *DecSummaries=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
@ invalid_argument
Definition Errc.h:56
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
LLVM_ABI void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
LLVM_ABI void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
LLVM_ABI CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName)
Analogous to setExplicitlyUnknownBranchWeights, but for functions and their entry counts.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
LLVM_ABI void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition Error.h:1245
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static cl::opt< bool > ClDevirtualizeSpeculatively("devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:107
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
LLVM_ABI void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
LLVM_ABI void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
const ModuleSummaryIndex * ImportSummary
ModuleSummaryIndex * ExportSummary
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
LLVM_ABI VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)