LLVM 22.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular/thin and non-LTO
28// pipelines:
29//
30// During regular LTO, the pass determines the best optimization for each
31// virtual call and applies the resolutions directly to virtual calls that are
32// eligible for virtual call optimization (i.e. calls that use either of the
33// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
34//
35// During hybrid Regular/ThinLTO, the pass operates in two phases:
36// - Export phase: this is run during the thin link over a single merged module
37// that contains all vtables with !type metadata that participate in the link.
38// The pass computes a resolution for each virtual call and stores it in the
39// type identifier summary.
40// - Import phase: this is run during the thin backends over the individual
41// modules. The pass applies the resolutions previously computed during the
42// import phase to each eligible virtual call.
43//
44// During ThinLTO, the pass operates in two phases:
45// - Export phase: this is run during the thin link over the index which
46// contains a summary of all vtables with !type metadata that participate in
47// the link. It computes a resolution for each virtual call and stores it in
48// the type identifier summary. Only single implementation devirtualization
49// is supported.
50// - Import phase: (same as with hybrid case above).
51//
52// During Speculative devirtualization mode -not restricted to LTO-:
53// - The pass applies speculative devirtualization without requiring any type of
54// visibility.
55// - Skips other features like virtual constant propagation, uniform return
56// value optimization, unique return value optimization and branch funnels as
57// they need LTO.
58// - This mode is enabled via 'devirtualize-speculatively' flag.
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/ArrayRef.h"
64#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/DenseSet.h"
67#include "llvm/ADT/MapVector.h"
69#include "llvm/ADT/Statistic.h"
79#include "llvm/IR/Constants.h"
80#include "llvm/IR/DataLayout.h"
81#include "llvm/IR/DebugLoc.h"
84#include "llvm/IR/Dominators.h"
85#include "llvm/IR/Function.h"
86#include "llvm/IR/GlobalAlias.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstrTypes.h"
90#include "llvm/IR/Instruction.h"
92#include "llvm/IR/Intrinsics.h"
93#include "llvm/IR/LLVMContext.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Metadata.h"
96#include "llvm/IR/Module.h"
98#include "llvm/IR/PassManager.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Errc.h"
104#include "llvm/Support/Error.h"
109#include "llvm/Transforms/IPO.h"
114#include <algorithm>
115#include <cmath>
116#include <cstddef>
117#include <map>
118#include <set>
119#include <string>
120
121using namespace llvm;
122using namespace wholeprogramdevirt;
123
124#define DEBUG_TYPE "wholeprogramdevirt"
125
126STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
127STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
128STATISTIC(NumBranchFunnel, "Number of branch funnels");
129STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
130STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
131STATISTIC(NumVirtConstProp1Bit,
132 "Number of 1 bit virtual constant propagations");
133STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
134DEBUG_COUNTER(CallsToDevirt, "calls-to-devirt",
135 "Controls how many calls should be devirtualized.");
136
137namespace llvm {
138
140 "wholeprogramdevirt-summary-action",
141 cl::desc("What to do with the summary when running this pass"),
142 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
144 "Import typeid resolutions from summary and globals"),
146 "Export typeid resolutions to summary and globals")),
147 cl::Hidden);
148
150 "wholeprogramdevirt-read-summary",
151 cl::desc(
152 "Read summary from given bitcode or YAML file before running pass"),
153 cl::Hidden);
154
156 "wholeprogramdevirt-write-summary",
157 cl::desc("Write summary to given bitcode or YAML file after running pass. "
158 "Output file format is deduced from extension: *.bc means writing "
159 "bitcode, otherwise YAML"),
160 cl::Hidden);
161
162// TODO: This option eventually should support any public visibility vtables
163// with/out LTO.
165 "devirtualize-speculatively",
166 cl::desc("Enable speculative devirtualization optimization"),
167 cl::init(false));
168
170 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
171 cl::init(10),
172 cl::desc("Maximum number of call targets per "
173 "call site to enable branch funnels"));
174
175static cl::opt<bool>
176 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
177 cl::desc("Print index-based devirtualization messages"));
178
179/// Provide a way to force enable whole program visibility in tests.
180/// This is needed to support legacy tests that don't contain
181/// !vcall_visibility metadata (the mere presense of type tests
182/// previously implied hidden visibility).
183static cl::opt<bool>
184 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
185 cl::desc("Enable whole program visibility"));
186
187/// Provide a way to force disable whole program for debugging or workarounds,
188/// when enabled via the linker.
190 "disable-whole-program-visibility", cl::Hidden,
191 cl::desc("Disable whole program visibility (overrides enabling options)"));
192
193/// Provide way to prevent certain function from being devirtualized
195 SkipFunctionNames("wholeprogramdevirt-skip",
196 cl::desc("Prevent function(s) from being devirtualized"),
198
200
201} // end namespace llvm
202
203/// With Clang, a pure virtual class's deleting destructor is emitted as a
204/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
205/// context of whole program devirtualization, the deleting destructor of a pure
206/// virtual class won't be invoked by the source code so safe to skip as a
207/// devirtualize target.
208///
209/// However, not all unreachable functions are safe to skip. In some cases, the
210/// program intends to run such functions and terminate, for instance, a unit
211/// test may run a death test. A non-test program might (or allowed to) invoke
212/// such functions to report failures (whether/when it's a good practice or not
213/// is a different topic).
214///
215/// This option is enabled to keep an unreachable function as a possible
216/// devirtualize target to conservatively keep the program behavior.
217///
218/// TODO: Make a pure virtual class's deleting destructor precisely identifiable
219/// in Clang's codegen for more devirtualization in LLVM.
221 "wholeprogramdevirt-keep-unreachable-function",
222 cl::desc("Regard unreachable functions as possible devirtualize targets."),
223 cl::Hidden, cl::init(true));
224
225/// Mechanism to add runtime checking of devirtualization decisions, optionally
226/// trapping or falling back to indirect call on any that are not correct.
227/// Trapping mode is useful for debugging undefined behavior leading to failures
228/// with WPD. Fallback mode is useful for ensuring safety when whole program
229/// visibility may be compromised.
232 "wholeprogramdevirt-check", cl::Hidden,
233 cl::desc("Type of checking for incorrect devirtualizations"),
234 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
235 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
237 "Fallback to indirect when incorrect")));
238
239namespace {
240struct PatternList {
241 std::vector<GlobPattern> Patterns;
242 template <class T> void init(const T &StringList) {
243 for (const auto &S : StringList)
245 Patterns.push_back(std::move(*Pat));
246 }
247 bool match(StringRef S) {
248 for (const GlobPattern &P : Patterns)
249 if (P.match(S))
250 return true;
251 return false;
252 }
253};
254} // namespace
255
256// Find the minimum offset that we may store a value of size Size bits at. If
257// IsAfter is set, look for an offset before the object, otherwise look for an
258// offset after the object.
261 bool IsAfter, uint64_t Size) {
262 // Find a minimum offset taking into account only vtable sizes.
263 uint64_t MinByte = 0;
264 for (const VirtualCallTarget &Target : Targets) {
265 if (IsAfter)
266 MinByte = std::max(MinByte, Target.minAfterBytes());
267 else
268 MinByte = std::max(MinByte, Target.minBeforeBytes());
269 }
270
271 // Build a vector of arrays of bytes covering, for each target, a slice of the
272 // used region (see AccumBitVector::BytesUsed in
273 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
274 // this aligns the used regions to start at MinByte.
275 //
276 // In this example, A, B and C are vtables, # is a byte already allocated for
277 // a virtual function pointer, AAAA... (etc.) are the used regions for the
278 // vtables and Offset(X) is the value computed for the Offset variable below
279 // for X.
280 //
281 // Offset(A)
282 // | |
283 // |MinByte
284 // A: ################AAAAAAAA|AAAAAAAA
285 // B: ########BBBBBBBBBBBBBBBB|BBBB
286 // C: ########################|CCCCCCCCCCCCCCCC
287 // | Offset(B) |
288 //
289 // This code produces the slices of A, B and C that appear after the divider
290 // at MinByte.
291 std::vector<ArrayRef<uint8_t>> Used;
292 for (const VirtualCallTarget &Target : Targets) {
293 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
294 : Target.TM->Bits->Before.BytesUsed;
295 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
296 : MinByte - Target.minBeforeBytes();
297
298 // Disregard used regions that are smaller than Offset. These are
299 // effectively all-free regions that do not need to be checked.
300 if (VTUsed.size() > Offset)
301 Used.push_back(VTUsed.slice(Offset));
302 }
303
304 if (Size == 1) {
305 // Find a free bit in each member of Used.
306 for (unsigned I = 0;; ++I) {
307 uint8_t BitsUsed = 0;
308 for (auto &&B : Used)
309 if (I < B.size())
310 BitsUsed |= B[I];
311 if (BitsUsed != 0xff)
312 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
313 }
314 } else {
315 // Find a free (Size/8) byte region in each member of Used.
316 // FIXME: see if alignment helps.
317 for (unsigned I = 0;; ++I) {
318 for (auto &&B : Used) {
319 unsigned Byte = 0;
320 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
321 if (B[I + Byte])
322 goto NextI;
323 ++Byte;
324 }
325 }
326 // Rounding up ensures the constant is always stored at address we
327 // can directly load from without misalignment.
328 return alignTo((MinByte + I) * 8, Size);
329 NextI:;
330 }
331 }
332}
333
335 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
336 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
337 if (BitWidth == 1)
338 OffsetByte = -(AllocBefore / 8 + 1);
339 else
340 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
341 OffsetBit = AllocBefore % 8;
342
343 for (VirtualCallTarget &Target : Targets) {
344 if (BitWidth == 1)
345 Target.setBeforeBit(AllocBefore);
346 else
347 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
348 }
349}
350
353 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
354 if (BitWidth == 1)
355 OffsetByte = AllocAfter / 8;
356 else
357 OffsetByte = (AllocAfter + 7) / 8;
358 OffsetBit = AllocAfter % 8;
359
360 for (VirtualCallTarget &Target : Targets) {
361 if (BitWidth == 1)
362 Target.setAfterBit(AllocAfter);
363 else
364 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
365 }
366}
367
372
373namespace {
374
375// A slot in a set of virtual tables. The TypeID identifies the set of virtual
376// tables, and the ByteOffset is the offset in bytes from the address point to
377// the virtual function pointer.
378struct VTableSlot {
379 Metadata *TypeID;
380 uint64_t ByteOffset;
381};
382
383} // end anonymous namespace
384
385template <> struct llvm::DenseMapInfo<VTableSlot> {
394 static unsigned getHashValue(const VTableSlot &I) {
397 }
398 static bool isEqual(const VTableSlot &LHS,
399 const VTableSlot &RHS) {
400 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
401 }
402};
403
413 static unsigned getHashValue(const VTableSlotSummary &I) {
416 }
417 static bool isEqual(const VTableSlotSummary &LHS,
418 const VTableSlotSummary &RHS) {
419 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
420 }
421};
422
423// Returns true if the function must be unreachable based on ValueInfo.
424//
425// In particular, identifies a function as unreachable in the following
426// conditions
427// 1) All summaries are live.
428// 2) All function summaries indicate it's unreachable
429// 3) There is no non-function with the same GUID (which is rare)
432 return false;
433
434 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
435 // Returns false if ValueInfo is absent, or the summary list is empty
436 // (e.g., function declarations).
437 return false;
438 }
439
440 for (const auto &Summary : TheFnVI.getSummaryList()) {
441 // Conservatively returns false if any non-live functions are seen.
442 // In general either all summaries should be live or all should be dead.
443 if (!Summary->isLive())
444 return false;
445 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
446 if (!FS->fflags().MustBeUnreachable)
447 return false;
448 }
449 // Be conservative if a non-function has the same GUID (which is rare).
450 else
451 return false;
452 }
453 // All function summaries are live and all of them agree that the function is
454 // unreachble.
455 return true;
456}
457
458namespace {
459// A virtual call site. VTable is the loaded virtual table pointer, and CS is
460// the indirect virtual call.
461struct VirtualCallSite {
462 Value *VTable = nullptr;
463 CallBase &CB;
464
465 // If non-null, this field points to the associated unsafe use count stored in
466 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
467 // of that field for details.
468 unsigned *NumUnsafeUses = nullptr;
469
470 void
471 emitRemark(const StringRef OptName, const StringRef TargetName,
472 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter) {
473 Function *F = CB.getCaller();
474 DebugLoc DLoc = CB.getDebugLoc();
475 BasicBlock *Block = CB.getParent();
476
477 using namespace ore;
478 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
479 << NV("Optimization", OptName)
480 << ": devirtualized a call to "
481 << NV("FunctionName", TargetName));
482 }
483
484 void replaceAndErase(
485 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
486 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter,
487 Value *New) {
488 if (RemarksEnabled)
489 emitRemark(OptName, TargetName, OREGetter);
490 CB.replaceAllUsesWith(New);
491 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
492 BranchInst::Create(II->getNormalDest(), CB.getIterator());
493 II->getUnwindDest()->removePredecessor(II->getParent());
494 }
495 CB.eraseFromParent();
496 // This use is no longer unsafe.
497 if (NumUnsafeUses)
498 --*NumUnsafeUses;
499 }
500};
501
502// Call site information collected for a specific VTableSlot and possibly a list
503// of constant integer arguments. The grouping by arguments is handled by the
504// VTableSlotInfo class.
505struct CallSiteInfo {
506 /// The set of call sites for this slot. Used during regular LTO and the
507 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
508 /// call sites that appear in the merged module itself); in each of these
509 /// cases we are directly operating on the call sites at the IR level.
510 std::vector<VirtualCallSite> CallSites;
511
512 /// Whether all call sites represented by this CallSiteInfo, including those
513 /// in summaries, have been devirtualized. This starts off as true because a
514 /// default constructed CallSiteInfo represents no call sites.
515 ///
516 /// If at the end of the pass there are still undevirtualized calls, we will
517 /// need to add a use of llvm.type.test to each of the function summaries in
518 /// the vector.
519 bool AllCallSitesDevirted = true;
520
521 // These fields are used during the export phase of ThinLTO and reflect
522 // information collected from function summaries.
523
524 /// CFI-specific: a vector containing the list of function summaries that use
525 /// the llvm.type.checked.load intrinsic and therefore will require
526 /// resolutions for llvm.type.test in order to implement CFI checks if
527 /// devirtualization was unsuccessful.
528 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
529
530 /// A vector containing the list of function summaries that use
531 /// assume(llvm.type.test).
532 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
533
534 bool isExported() const {
535 return !SummaryTypeCheckedLoadUsers.empty() ||
536 !SummaryTypeTestAssumeUsers.empty();
537 }
538
539 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
540 SummaryTypeCheckedLoadUsers.push_back(FS);
541 AllCallSitesDevirted = false;
542 }
543
544 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
545 SummaryTypeTestAssumeUsers.push_back(FS);
546 AllCallSitesDevirted = false;
547 }
548
549 void markDevirt() { AllCallSitesDevirted = true; }
550};
551
552// Call site information collected for a specific VTableSlot.
553struct VTableSlotInfo {
554 // The set of call sites which do not have all constant integer arguments
555 // (excluding "this").
556 CallSiteInfo CSInfo;
557
558 // The set of call sites with all constant integer arguments (excluding
559 // "this"), grouped by argument list.
560 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
561
562 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
563
564private:
565 CallSiteInfo &findCallSiteInfo(CallBase &CB);
566};
567
568CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
569 std::vector<uint64_t> Args;
570 auto *CBType = dyn_cast<IntegerType>(CB.getType());
571 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
572 return CSInfo;
573 for (auto &&Arg : drop_begin(CB.args())) {
574 auto *CI = dyn_cast<ConstantInt>(Arg);
575 if (!CI || CI->getBitWidth() > 64)
576 return CSInfo;
577 Args.push_back(CI->getZExtValue());
578 }
579 return ConstCSInfo[Args];
580}
581
582void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
583 unsigned *NumUnsafeUses) {
584 auto &CSI = findCallSiteInfo(CB);
585 CSI.AllCallSitesDevirted = false;
586 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
587}
588
589struct DevirtModule {
590 Module &M;
593
594 ModuleSummaryIndex *const ExportSummary;
595 const ModuleSummaryIndex *const ImportSummary;
596
597 IntegerType *const Int8Ty;
598 PointerType *const Int8PtrTy;
599 IntegerType *const Int32Ty;
600 IntegerType *const Int64Ty;
601 IntegerType *const IntPtrTy;
602 /// Sizeless array type, used for imported vtables. This provides a signal
603 /// to analyzers that these imports may alias, as they do for example
604 /// when multiple unique return values occur in the same vtable.
605 ArrayType *const Int8Arr0Ty;
606
607 const bool RemarksEnabled;
608 std::function<OptimizationRemarkEmitter &(Function &)> OREGetter;
609 MapVector<VTableSlot, VTableSlotInfo> CallSlots;
610
611 // Calls that have already been optimized. We may add a call to multiple
612 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
613 // optimize a call more than once.
614 SmallPtrSet<CallBase *, 8> OptimizedCalls;
615
616 // Store calls that had their ptrauth bundle removed. They are to be deleted
617 // at the end of the optimization.
618 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
619
620 // This map keeps track of the number of "unsafe" uses of a loaded function
621 // pointer. The key is the associated llvm.type.test intrinsic call generated
622 // by this pass. An unsafe use is one that calls the loaded function pointer
623 // directly. Every time we eliminate an unsafe use (for example, by
624 // devirtualizing it or by applying virtual constant propagation), we
625 // decrement the value stored in this map. If a value reaches zero, we can
626 // eliminate the type check by RAUWing the associated llvm.type.test call with
627 // true.
628 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
629 PatternList FunctionsToSkip;
630
631 const bool DevirtSpeculatively;
632 DevirtModule(Module &M, ModuleAnalysisManager &MAM,
633 ModuleSummaryIndex *ExportSummary,
634 const ModuleSummaryIndex *ImportSummary,
635 bool DevirtSpeculatively)
636 : M(M), MAM(MAM),
637 FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
638 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
639 Int8Ty(Type::getInt8Ty(M.getContext())),
640 Int8PtrTy(PointerType::getUnqual(M.getContext())),
641 Int32Ty(Type::getInt32Ty(M.getContext())),
642 Int64Ty(Type::getInt64Ty(M.getContext())),
643 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
644 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
645 RemarksEnabled(areRemarksEnabled()),
646 OREGetter([&](Function &F) -> OptimizationRemarkEmitter & {
647 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
648 }),
649 DevirtSpeculatively(DevirtSpeculatively) {
650 assert(!(ExportSummary && ImportSummary));
651 FunctionsToSkip.init(SkipFunctionNames);
652 }
653
654 bool areRemarksEnabled();
655
656 void
657 scanTypeTestUsers(Function *TypeTestFunc,
658 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
659 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
660
661 void buildTypeIdentifierMap(
662 std::vector<VTableBits> &Bits,
663 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
664
665 bool
666 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
667 const std::set<TypeMemberInfo> &TypeMemberInfos,
668 uint64_t ByteOffset,
669 ModuleSummaryIndex *ExportSummary);
670
671 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
672 bool &IsExported);
673 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
675 VTableSlotInfo &SlotInfo,
676 WholeProgramDevirtResolution *Res);
677
678 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Function &JT,
679 bool &IsExported);
680 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
681 VTableSlotInfo &SlotInfo,
682 WholeProgramDevirtResolution *Res, VTableSlot Slot);
683
684 bool tryEvaluateFunctionsWithArgs(
686 ArrayRef<uint64_t> Args);
687
688 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
689 uint64_t TheRetVal);
690 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
691 CallSiteInfo &CSInfo,
692 WholeProgramDevirtResolution::ByArg *Res);
693
694 // Returns the global symbol name that is used to export information about the
695 // given vtable slot and list of arguments.
696 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
697 StringRef Name);
698
699 bool shouldExportConstantsAsAbsoluteSymbols();
700
701 // This function is called during the export phase to create a symbol
702 // definition containing information about the given vtable slot and list of
703 // arguments.
704 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
705 Constant *C);
706 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
707 uint32_t Const, uint32_t &Storage);
708
709 // This function is called during the import phase to create a reference to
710 // the symbol definition created during the export phase.
711 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
712 StringRef Name);
713 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
714 StringRef Name, IntegerType *IntTy,
715 uint32_t Storage);
716
717 Constant *getMemberAddr(const TypeMemberInfo *M);
718
719 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
720 Constant *UniqueMemberAddr);
721 bool tryUniqueRetValOpt(unsigned BitWidth,
723 CallSiteInfo &CSInfo,
724 WholeProgramDevirtResolution::ByArg *Res,
725 VTableSlot Slot, ArrayRef<uint64_t> Args);
726
727 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
728 Constant *Byte, Constant *Bit);
729 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
730 VTableSlotInfo &SlotInfo,
731 WholeProgramDevirtResolution *Res, VTableSlot Slot);
732
733 void rebuildGlobal(VTableBits &B);
734
735 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
736 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
737
738 // If we were able to eliminate all unsafe uses for a type checked load,
739 // eliminate the associated type tests by replacing them with true.
740 void removeRedundantTypeTests();
741
742 bool run();
743
744 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
745 //
746 // Caller guarantees that `ExportSummary` is not nullptr.
747 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
748 ModuleSummaryIndex *ExportSummary);
749
750 // Returns true if the function definition must be unreachable.
751 //
752 // Note if this helper function returns true, `F` is guaranteed
753 // to be unreachable; if it returns false, `F` might still
754 // be unreachable but not covered by this helper function.
755 //
756 // Implementation-wise, if function definition is present, IR is analyzed; if
757 // not, look up function flags from ExportSummary as a fallback.
758 static bool mustBeUnreachableFunction(Function *const F,
759 ModuleSummaryIndex *ExportSummary);
760
761 // Lower the module using the action and summary passed as command line
762 // arguments. For testing purposes only.
763 static bool runForTesting(Module &M, ModuleAnalysisManager &MAM,
764 bool DevirtSpeculatively);
765};
766
767struct DevirtIndex {
768 ModuleSummaryIndex &ExportSummary;
769 // The set in which to record GUIDs exported from their module by
770 // devirtualization, used by client to ensure they are not internalized.
771 std::set<GlobalValue::GUID> &ExportedGUIDs;
772 // A map in which to record the information necessary to locate the WPD
773 // resolution for local targets in case they are exported by cross module
774 // importing.
775 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
776
777 MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
778
779 PatternList FunctionsToSkip;
780
781 DevirtIndex(
782 ModuleSummaryIndex &ExportSummary,
783 std::set<GlobalValue::GUID> &ExportedGUIDs,
784 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
785 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
786 LocalWPDTargetsMap(LocalWPDTargetsMap) {
787 FunctionsToSkip.init(SkipFunctionNames);
788 }
789
790 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
791 const TypeIdCompatibleVtableInfo TIdInfo,
792 uint64_t ByteOffset);
793
794 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
795 VTableSlotSummary &SlotSummary,
796 VTableSlotInfo &SlotInfo,
797 WholeProgramDevirtResolution *Res,
798 std::set<ValueInfo> &DevirtTargets);
799
800 void run();
801};
802} // end anonymous namespace
803
806 if (UseCommandLine) {
807 if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively))
808 return PreservedAnalyses::all();
810 }
811
812 std::optional<ModuleSummaryIndex> Index;
814 // Build the ExportSummary from the module.
816 "ExportSummary is expected to be empty in non-LTO mode");
817 ProfileSummaryInfo PSI(M);
818 Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
819 ExportSummary = Index.has_value() ? &Index.value() : nullptr;
820 }
821 if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively)
822 .run())
823 return PreservedAnalyses::all();
825}
826
827// Enable whole program visibility if enabled by client (e.g. linker) or
828// internal option, and not force disabled.
829bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
830 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
832}
833
834static bool
836 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
837 // TypeID for member function pointer type is an internal construct
838 // and won't exist in IsVisibleToRegularObj. The full TypeID
839 // will be present and participate in invalidation.
840 if (TypeID.ends_with(".virtual"))
841 return false;
842
843 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
844 // non-externally visible types which cannot interact with
845 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
846 if (!TypeID.consume_front("_ZTS"))
847 return false;
848
849 // TypeID is keyed off the type name symbol (_ZTS). However, the native
850 // object may not contain this symbol if it does not contain a key
851 // function for the base type and thus only contains a reference to the
852 // type info (_ZTI). To catch this case we query using the type info
853 // symbol corresponding to the TypeID.
854 std::string TypeInfo = ("_ZTI" + TypeID).str();
855 return IsVisibleToRegularObj(TypeInfo);
856}
857
858static bool
860 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
862 GV.getMetadata(LLVMContext::MD_type, Types);
863
864 for (auto *Type : Types)
865 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
866 return typeIDVisibleToRegularObj(TypeID->getString(),
867 IsVisibleToRegularObj);
868
869 return false;
870}
871
872/// If whole program visibility asserted, then upgrade all public vcall
873/// visibility metadata on vtable definitions to linkage unit visibility in
874/// Module IR (for regular or hybrid LTO).
876 Module &M, bool WholeProgramVisibilityEnabledInLTO,
877 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
878 bool ValidateAllVtablesHaveTypeInfos,
879 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
880 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
881 return;
882 for (GlobalVariable &GV : M.globals()) {
883 // Add linkage unit visibility to any variable with type metadata, which are
884 // the vtable definitions. We won't have an existing vcall_visibility
885 // metadata on vtable definitions with public visibility.
886 if (GV.hasMetadata(LLVMContext::MD_type) &&
888 // Don't upgrade the visibility for symbols exported to the dynamic
889 // linker, as we have no information on their eventual use.
890 !DynamicExportSymbols.count(GV.getGUID()) &&
891 // With validation enabled, we want to exclude symbols visible to
892 // regular objects. Local symbols will be in this group due to the
893 // current implementation but those with VCallVisibilityTranslationUnit
894 // will have already been marked in clang so are unaffected.
895 !(ValidateAllVtablesHaveTypeInfos &&
896 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
898 }
899}
900
902 bool WholeProgramVisibilityEnabledInLTO) {
903 llvm::TimeTraceScope timeScope("Update public type test calls");
904 Function *PublicTypeTestFunc =
905 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
906 if (!PublicTypeTestFunc)
907 return;
908 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
909 Function *TypeTestFunc =
910 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
911 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
912 auto *CI = cast<CallInst>(U.getUser());
913 auto *NewCI = CallInst::Create(
914 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, {}, "",
915 CI->getIterator());
916 CI->replaceAllUsesWith(NewCI);
917 CI->eraseFromParent();
918 }
919 } else {
920 // TODO: Don't replace public type tests when speculative devirtualization
921 // gets enabled in LTO mode.
922 auto *True = ConstantInt::getTrue(M.getContext());
923 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
924 auto *CI = cast<CallInst>(U.getUser());
925 CI->replaceAllUsesWith(True);
926 CI->eraseFromParent();
927 }
928 }
929}
930
931/// Based on typeID string, get all associated vtable GUIDS that are
932/// visible to regular objects.
934 ModuleSummaryIndex &Index,
935 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
936 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
937 for (const auto &TypeID : Index.typeIdCompatibleVtableMap()) {
938 if (typeIDVisibleToRegularObj(TypeID.first, IsVisibleToRegularObj))
939 for (const TypeIdOffsetVtableInfo &P : TypeID.second)
940 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
941 }
942}
943
944/// If whole program visibility asserted, then upgrade all public vcall
945/// visibility metadata on vtable definition summaries to linkage unit
946/// visibility in Module summary index (for ThinLTO).
948 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
949 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
950 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
951 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
952 return;
953 for (auto &P : Index) {
954 // Don't upgrade the visibility for symbols exported to the dynamic
955 // linker, as we have no information on their eventual use.
956 if (DynamicExportSymbols.count(P.first))
957 continue;
958 // With validation enabled, we want to exclude symbols visible to regular
959 // objects. Local symbols will be in this group due to the current
960 // implementation but those with VCallVisibilityTranslationUnit will have
961 // already been marked in clang so are unaffected.
962 if (VisibleToRegularObjSymbols.count(P.first))
963 continue;
964 for (auto &S : P.second.getSummaryList()) {
965 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
966 if (!GVar ||
967 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
968 continue;
969 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
970 }
971 }
972}
973
975 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
976 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
977 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
978}
979
981 ModuleSummaryIndex &Summary,
982 function_ref<bool(StringRef, ValueInfo)> IsExported,
983 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
984 for (auto &T : LocalWPDTargetsMap) {
985 auto &VI = T.first;
986 // This was enforced earlier during trySingleImplDevirt.
987 assert(VI.getSummaryList().size() == 1 &&
988 "Devirt of local target has more than one copy");
989 auto &S = VI.getSummaryList()[0];
990 if (!IsExported(S->modulePath(), VI))
991 continue;
992
993 // It's been exported by a cross module import.
994 for (auto &SlotSummary : T.second) {
995 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
996 assert(TIdSum);
997 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
998 assert(WPDRes != TIdSum->WPDRes.end());
999 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
1000 WPDRes->second.SingleImplName,
1001 Summary.getModuleHash(S->modulePath()));
1002 }
1003 }
1004}
1005
1007 // Check that summary index contains regular LTO module when performing
1008 // export to prevent occasional use of index from pure ThinLTO compilation
1009 // (-fno-split-lto-module). This kind of summary index is passed to
1010 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
1011 const auto &ModPaths = Summary->modulePaths();
1013 !ModPaths.contains(ModuleSummaryIndex::getRegularLTOModuleName()))
1014 return createStringError(
1016 "combined summary should contain Regular LTO module");
1017 return ErrorSuccess();
1018}
1019
1020bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM,
1021 bool DevirtSpeculatively) {
1022 std::unique_ptr<ModuleSummaryIndex> Summary =
1023 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
1024
1025 // Handle the command-line summary arguments. This code is for testing
1026 // purposes only, so we handle errors directly.
1027 if (!ClReadSummary.empty()) {
1028 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
1029 ": ");
1030 auto ReadSummaryFile =
1032 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
1033 getModuleSummaryIndex(*ReadSummaryFile)) {
1034 Summary = std::move(*SummaryOrErr);
1035 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
1036 } else {
1037 // Try YAML if we've failed with bitcode.
1038 consumeError(SummaryOrErr.takeError());
1039 yaml::Input In(ReadSummaryFile->getBuffer());
1040 In >> *Summary;
1041 ExitOnErr(errorCodeToError(In.error()));
1042 }
1043 }
1044
1045 bool Changed =
1046 DevirtModule(M, MAM,
1048 : nullptr,
1050 : nullptr,
1051 DevirtSpeculatively)
1052 .run();
1053
1054 if (!ClWriteSummary.empty()) {
1055 ExitOnError ExitOnErr(
1056 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1057 std::error_code EC;
1058 if (StringRef(ClWriteSummary).ends_with(".bc")) {
1059 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
1060 ExitOnErr(errorCodeToError(EC));
1061 writeIndexToFile(*Summary, OS);
1062 } else {
1063 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1064 ExitOnErr(errorCodeToError(EC));
1065 yaml::Output Out(OS);
1066 Out << *Summary;
1067 }
1068 }
1069
1070 return Changed;
1071}
1072
1073void DevirtModule::buildTypeIdentifierMap(
1074 std::vector<VTableBits> &Bits,
1075 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1076 DenseMap<GlobalVariable *, VTableBits *> GVToBits;
1077 Bits.reserve(M.global_size());
1079 for (GlobalVariable &GV : M.globals()) {
1080 Types.clear();
1081 GV.getMetadata(LLVMContext::MD_type, Types);
1082 if (GV.isDeclaration() || Types.empty())
1083 continue;
1084
1085 VTableBits *&BitsPtr = GVToBits[&GV];
1086 if (!BitsPtr) {
1087 Bits.emplace_back();
1088 Bits.back().GV = &GV;
1089 Bits.back().ObjectSize =
1090 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1091 BitsPtr = &Bits.back();
1092 }
1093
1094 for (MDNode *Type : Types) {
1095 auto *TypeID = Type->getOperand(1).get();
1096
1097 uint64_t Offset =
1099 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1100 ->getZExtValue();
1101
1102 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1103 }
1104 }
1105}
1106
1107bool DevirtModule::tryFindVirtualCallTargets(
1108 std::vector<VirtualCallTarget> &TargetsForSlot,
1109 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1110 ModuleSummaryIndex *ExportSummary) {
1111 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1112 if (!TM.Bits->GV->isConstant())
1113 return false;
1114
1115 // Without DevirtSpeculatively, we cannot perform whole program
1116 // devirtualization analysis on a vtable with public LTO visibility.
1117 if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() ==
1119 return false;
1120
1121 Function *Fn = nullptr;
1122 Constant *C = nullptr;
1123 std::tie(Fn, C) =
1124 getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);
1125
1126 if (!Fn)
1127 return false;
1128
1129 if (FunctionsToSkip.match(Fn->getName()))
1130 return false;
1131
1132 // We can disregard __cxa_pure_virtual as a possible call target, as
1133 // calls to pure virtuals are UB.
1134 if (Fn->getName() == "__cxa_pure_virtual")
1135 continue;
1136
1137 // In most cases empty functions will be overridden by the
1138 // implementation of the derived class, so we can skip them.
1139 if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() &&
1140 Fn->getInstructionCount() <= 1)
1141 continue;
1142
1143 // We can disregard unreachable functions as possible call targets, as
1144 // unreachable functions shouldn't be called.
1145 if (mustBeUnreachableFunction(Fn, ExportSummary))
1146 continue;
1147
1148 // Save the symbol used in the vtable to use as the devirtualization
1149 // target.
1150 auto *GV = dyn_cast<GlobalValue>(C);
1151 assert(GV);
1152 TargetsForSlot.push_back({GV, &TM});
1153 }
1154
1155 // Give up if we couldn't find any targets.
1156 return !TargetsForSlot.empty();
1157}
1158
1159bool DevirtIndex::tryFindVirtualCallTargets(
1160 std::vector<ValueInfo> &TargetsForSlot,
1161 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1162 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1163 // Find a representative copy of the vtable initializer.
1164 // We can have multiple available_externally, linkonce_odr and weak_odr
1165 // vtable initializers. We can also have multiple external vtable
1166 // initializers in the case of comdats, which we cannot check here.
1167 // The linker should give an error in this case.
1168 //
1169 // Also, handle the case of same-named local Vtables with the same path
1170 // and therefore the same GUID. This can happen if there isn't enough
1171 // distinguishing path when compiling the source file. In that case we
1172 // conservatively return false early.
1173 if (P.VTableVI.hasLocal() && P.VTableVI.getSummaryList().size() > 1)
1174 return false;
1175 const GlobalVarSummary *VS = nullptr;
1176 for (const auto &S : P.VTableVI.getSummaryList()) {
1177 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1178 if (!CurVS->vTableFuncs().empty() ||
1179 // Previously clang did not attach the necessary type metadata to
1180 // available_externally vtables, in which case there would not
1181 // be any vtable functions listed in the summary and we need
1182 // to treat this case conservatively (in case the bitcode is old).
1183 // However, we will also not have any vtable functions in the
1184 // case of a pure virtual base class. In that case we do want
1185 // to set VS to avoid treating it conservatively.
1187 VS = CurVS;
1188 // We cannot perform whole program devirtualization analysis on a vtable
1189 // with public LTO visibility.
1190 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1191 return false;
1192 break;
1193 }
1194 }
1195 // There will be no VS if all copies are available_externally having no
1196 // type metadata. In that case we can't safely perform WPD.
1197 if (!VS)
1198 return false;
1199 if (!VS->isLive())
1200 continue;
1201 for (auto VTP : VS->vTableFuncs()) {
1202 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1203 continue;
1204
1205 if (mustBeUnreachableFunction(VTP.FuncVI))
1206 continue;
1207
1208 TargetsForSlot.push_back(VTP.FuncVI);
1209 }
1210 }
1211
1212 // Give up if we couldn't find any targets.
1213 return !TargetsForSlot.empty();
1214}
1215
1216void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1217 Constant *TheFn, bool &IsExported) {
1218 // Don't devirtualize function if we're told to skip it
1219 // in -wholeprogramdevirt-skip.
1220 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1221 return;
1222 auto Apply = [&](CallSiteInfo &CSInfo) {
1223 for (auto &&VCallSite : CSInfo.CallSites) {
1224 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1225 continue;
1226
1227 // Stop when the number of devirted calls reaches the cutoff.
1228 if (!DebugCounter::shouldExecute(CallsToDevirt))
1229 continue;
1230
1231 if (RemarksEnabled)
1232 VCallSite.emitRemark("single-impl",
1233 TheFn->stripPointerCasts()->getName(), OREGetter);
1234 NumSingleImpl++;
1235 auto &CB = VCallSite.CB;
1236 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1237 IRBuilder<> Builder(&CB);
1238 Value *Callee =
1239 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1240
1241 // If trap checking is enabled, add support to compare the virtual
1242 // function pointer to the devirtualized target. In case of a mismatch,
1243 // perform a debug trap.
1245 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1247 Cond, &CB, /*Unreachable=*/false,
1248 MDBuilder(M.getContext()).createUnlikelyBranchWeights());
1249 Builder.SetInsertPoint(ThenTerm);
1250 Function *TrapFn =
1251 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::debugtrap);
1252 auto *CallTrap = Builder.CreateCall(TrapFn);
1253 CallTrap->setDebugLoc(CB.getDebugLoc());
1254 }
1255
1256 // If fallback checking or speculative devirtualization are enabled,
1257 // add support to compare the virtual function pointer to the
1258 // devirtualized target. In case of a mismatch, fall back to indirect
1259 // call.
1260 if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) {
1261 MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
1262 // Version the indirect call site. If the called value is equal to the
1263 // given callee, 'NewInst' will be executed, otherwise the original call
1264 // site will be executed.
1265 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1266 NewInst.setCalledOperand(Callee);
1267 // Since the new call site is direct, we must clear metadata that
1268 // is only appropriate for indirect calls. This includes !prof and
1269 // !callees metadata.
1270 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1271 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1272 // Additionally, we should remove them from the fallback indirect call,
1273 // so that we don't attempt to perform indirect call promotion later.
1274 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1275 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1276 }
1277
1278 // In either trapping or non-checking mode, devirtualize original call.
1279 else {
1280 // Devirtualize unconditionally.
1281 CB.setCalledOperand(Callee);
1282 // Since the call site is now direct, we must clear metadata that
1283 // is only appropriate for indirect calls. This includes !prof and
1284 // !callees metadata.
1285 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1286 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1287 if (CB.getCalledOperand() &&
1289 auto *NewCS = CallBase::removeOperandBundle(
1291 CB.replaceAllUsesWith(NewCS);
1292 // Schedule for deletion at the end of pass run.
1293 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1294 }
1295 }
1296
1297 // This use is no longer unsafe.
1298 if (VCallSite.NumUnsafeUses)
1299 --*VCallSite.NumUnsafeUses;
1300 }
1301 if (CSInfo.isExported())
1302 IsExported = true;
1303 CSInfo.markDevirt();
1304 };
1305 Apply(SlotInfo.CSInfo);
1306 for (auto &P : SlotInfo.ConstCSInfo)
1307 Apply(P.second);
1308}
1309
1310static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1311 // We can't add calls if we haven't seen a definition
1312 if (Callee.getSummaryList().empty())
1313 return false;
1314
1315 // Insert calls into the summary index so that the devirtualized targets
1316 // are eligible for import.
1317 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1318 // to better ensure we have the opportunity to inline them.
1319 bool IsExported = false;
1320 auto &S = Callee.getSummaryList()[0];
1321 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,
1322 /* RelBF = */ 0);
1323 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1324 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1325 FS->addCall({Callee, CI});
1326 IsExported |= S->modulePath() != FS->modulePath();
1327 }
1328 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1329 FS->addCall({Callee, CI});
1330 IsExported |= S->modulePath() != FS->modulePath();
1331 }
1332 };
1333 AddCalls(SlotInfo.CSInfo);
1334 for (auto &P : SlotInfo.ConstCSInfo)
1335 AddCalls(P.second);
1336 return IsExported;
1337}
1338
1339bool DevirtModule::trySingleImplDevirt(
1340 ModuleSummaryIndex *ExportSummary,
1341 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1342 WholeProgramDevirtResolution *Res) {
1343 // See if the program contains a single implementation of this virtual
1344 // function.
1345 auto *TheFn = TargetsForSlot[0].Fn;
1346 for (auto &&Target : TargetsForSlot)
1347 if (TheFn != Target.Fn)
1348 return false;
1349
1350 // If so, update each call site to call that implementation directly.
1351 if (RemarksEnabled || AreStatisticsEnabled())
1352 TargetsForSlot[0].WasDevirt = true;
1353
1354 bool IsExported = false;
1355 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1356 if (!IsExported)
1357 return false;
1358
1359 // If the only implementation has local linkage, we must promote to external
1360 // to make it visible to thin LTO objects. We can only get here during the
1361 // ThinLTO export phase.
1362 if (TheFn->hasLocalLinkage()) {
1363 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1364
1365 // Since we are renaming the function, any comdats with the same name must
1366 // also be renamed. This is required when targeting COFF, as the comdat name
1367 // must match one of the names of the symbols in the comdat.
1368 if (Comdat *C = TheFn->getComdat()) {
1369 if (C->getName() == TheFn->getName()) {
1370 Comdat *NewC = M.getOrInsertComdat(NewName);
1371 NewC->setSelectionKind(C->getSelectionKind());
1372 for (GlobalObject &GO : M.global_objects())
1373 if (GO.getComdat() == C)
1374 GO.setComdat(NewC);
1375 }
1376 }
1377
1378 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1379 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1380 TheFn->setName(NewName);
1381 }
1382 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1383 // Any needed promotion of 'TheFn' has already been done during
1384 // LTO unit split, so we can ignore return value of AddCalls.
1385 addCalls(SlotInfo, TheFnVI);
1386
1388 Res->SingleImplName = std::string(TheFn->getName());
1389
1390 return true;
1391}
1392
1393bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1394 VTableSlotSummary &SlotSummary,
1395 VTableSlotInfo &SlotInfo,
1396 WholeProgramDevirtResolution *Res,
1397 std::set<ValueInfo> &DevirtTargets) {
1398 // See if the program contains a single implementation of this virtual
1399 // function.
1400 auto TheFn = TargetsForSlot[0];
1401 for (auto &&Target : TargetsForSlot)
1402 if (TheFn != Target)
1403 return false;
1404
1405 // Don't devirtualize if we don't have target definition.
1406 auto Size = TheFn.getSummaryList().size();
1407 if (!Size)
1408 return false;
1409
1410 // Don't devirtualize function if we're told to skip it
1411 // in -wholeprogramdevirt-skip.
1412 if (FunctionsToSkip.match(TheFn.name()))
1413 return false;
1414
1415 // If the summary list contains multiple summaries where at least one is
1416 // a local, give up, as we won't know which (possibly promoted) name to use.
1417 if (TheFn.hasLocal() && Size > 1)
1418 return false;
1419
1420 // Collect functions devirtualized at least for one call site for stats.
1422 DevirtTargets.insert(TheFn);
1423
1424 auto &S = TheFn.getSummaryList()[0];
1425 bool IsExported = addCalls(SlotInfo, TheFn);
1426 if (IsExported)
1427 ExportedGUIDs.insert(TheFn.getGUID());
1428
1429 // Record in summary for use in devirtualization during the ThinLTO import
1430 // step.
1432 if (GlobalValue::isLocalLinkage(S->linkage())) {
1433 if (IsExported)
1434 // If target is a local function and we are exporting it by
1435 // devirtualizing a call in another module, we need to record the
1436 // promoted name.
1438 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1439 else {
1440 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1441 Res->SingleImplName = std::string(TheFn.name());
1442 }
1443 } else
1444 Res->SingleImplName = std::string(TheFn.name());
1445
1446 // Name will be empty if this thin link driven off of serialized combined
1447 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1448 // legacy LTO API anyway.
1449 assert(!Res->SingleImplName.empty());
1450
1451 return true;
1452}
1453
1454void DevirtModule::tryICallBranchFunnel(
1455 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1456 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1457 Triple T(M.getTargetTriple());
1458 if (T.getArch() != Triple::x86_64)
1459 return;
1460
1461 if (TargetsForSlot.size() > ClThreshold)
1462 return;
1463
1464 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1465 if (!HasNonDevirt)
1466 for (auto &P : SlotInfo.ConstCSInfo)
1467 if (!P.second.AllCallSitesDevirted) {
1468 HasNonDevirt = true;
1469 break;
1470 }
1471
1472 if (!HasNonDevirt)
1473 return;
1474
1475 // If any GV is AvailableExternally, not to generate branch.funnel.
1476 // NOTE: It is to avoid crash in LowerTypeTest.
1477 // If the branch.funnel is generated, because GV.isDeclarationForLinker(),
1478 // in LowerTypeTestsModule::lower(), its GlobalTypeMember would NOT
1479 // be saved in GlobalTypeMembers[&GV]. Then crash happens in
1480 // buildBitSetsFromDisjointSet due to GlobalTypeMembers[&GV] is NULL.
1481 // Even doing experiment to save it in GlobalTypeMembers[&GV] and
1482 // making GlobalTypeMembers[&GV] be not NULL, crash could avoid from
1483 // buildBitSetsFromDisjointSet. But still report_fatal_error in Verifier
1484 // or SelectionDAGBuilder later, because operands linkage type consistency
1485 // check of icall.branch.funnel can not pass.
1486 for (auto &T : TargetsForSlot) {
1487 if (T.TM->Bits->GV->hasAvailableExternallyLinkage())
1488 return;
1489 }
1490
1491 FunctionType *FT =
1492 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1493 Function *JT;
1494 if (isa<MDString>(Slot.TypeID)) {
1496 M.getDataLayout().getProgramAddressSpace(),
1497 getGlobalName(Slot, {}, "branch_funnel"), &M);
1498 JT->setVisibility(GlobalValue::HiddenVisibility);
1499 } else {
1501 M.getDataLayout().getProgramAddressSpace(),
1502 "branch_funnel", &M);
1503 }
1504 JT->addParamAttr(0, Attribute::Nest);
1505
1506 std::vector<Value *> JTArgs;
1507 JTArgs.push_back(JT->arg_begin());
1508 for (auto &T : TargetsForSlot) {
1509 JTArgs.push_back(getMemberAddr(T.TM));
1510 JTArgs.push_back(T.Fn);
1511 }
1512
1513 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1515 &M, llvm::Intrinsic::icall_branch_funnel, {});
1516
1517 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1518 CI->setTailCallKind(CallInst::TCK_MustTail);
1519 ReturnInst::Create(M.getContext(), nullptr, BB);
1520
1521 bool IsExported = false;
1522 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
1523 if (IsExported)
1525
1526 if (!JT->getEntryCount().has_value()) {
1527 // FIXME: we could pass through thinlto the necessary information.
1529 }
1530}
1531
1532void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1533 Function &JT, bool &IsExported) {
1534 DenseMap<Function *, double> FunctionEntryCounts;
1535 auto Apply = [&](CallSiteInfo &CSInfo) {
1536 if (CSInfo.isExported())
1537 IsExported = true;
1538 if (CSInfo.AllCallSitesDevirted)
1539 return;
1540
1541 std::map<CallBase *, CallBase *> CallBases;
1542 for (auto &&VCallSite : CSInfo.CallSites) {
1543 CallBase &CB = VCallSite.CB;
1544
1545 if (CallBases.find(&CB) != CallBases.end()) {
1546 // When finding devirtualizable calls, it's possible to find the same
1547 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1548 // calls, which can cause duplicate call sites to be recorded in
1549 // [Const]CallSites. If we've already found one of these
1550 // call instances, just ignore it. It will be replaced later.
1551 continue;
1552 }
1553
1554 // Jump tables are only profitable if the retpoline mitigation is enabled.
1555 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1556 if (!FSAttr.isValid() ||
1557 !FSAttr.getValueAsString().contains("+retpoline"))
1558 continue;
1559
1560 NumBranchFunnel++;
1561 if (RemarksEnabled)
1562 VCallSite.emitRemark("branch-funnel", JT.getName(), OREGetter);
1563
1564 // Pass the address of the vtable in the nest register, which is r10 on
1565 // x86_64.
1566 std::vector<Type *> NewArgs;
1567 NewArgs.push_back(Int8PtrTy);
1568 append_range(NewArgs, CB.getFunctionType()->params());
1569 FunctionType *NewFT =
1571 CB.getFunctionType()->isVarArg());
1572 IRBuilder<> IRB(&CB);
1573 std::vector<Value *> Args;
1574 Args.push_back(VCallSite.VTable);
1575 llvm::append_range(Args, CB.args());
1576
1577 CallBase *NewCS = nullptr;
1578 if (!JT.isDeclaration() && !ProfcheckDisableMetadataFixes) {
1579 // Accumulate the call frequencies of the original call site, and use
1580 // that as total entry count for the funnel function.
1581 auto &F = *CB.getCaller();
1582 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1583 auto EC = BFI.getBlockFreq(&F.getEntryBlock());
1584 auto CC = F.getEntryCount(/*AllowSynthetic=*/true);
1585 double CallCount = 0.0;
1586 if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) {
1587 double CallFreq =
1588 static_cast<double>(
1589 BFI.getBlockFreq(CB.getParent()).getFrequency()) /
1590 EC.getFrequency();
1591 CallCount = CallFreq * CC->getCount();
1592 }
1593 FunctionEntryCounts[&JT] += CallCount;
1594 }
1595 if (isa<CallInst>(CB))
1596 NewCS = IRB.CreateCall(NewFT, &JT, Args);
1597 else
1598 NewCS =
1599 IRB.CreateInvoke(NewFT, &JT, cast<InvokeInst>(CB).getNormalDest(),
1600 cast<InvokeInst>(CB).getUnwindDest(), Args);
1601 NewCS->setCallingConv(CB.getCallingConv());
1602
1603 AttributeList Attrs = CB.getAttributes();
1604 std::vector<AttributeSet> NewArgAttrs;
1605 NewArgAttrs.push_back(AttributeSet::get(
1606 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1607 M.getContext(), Attribute::Nest)}));
1608 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1609 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1610 NewCS->setAttributes(
1611 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1612 Attrs.getRetAttrs(), NewArgAttrs));
1613
1614 CallBases[&CB] = NewCS;
1615
1616 // This use is no longer unsafe.
1617 if (VCallSite.NumUnsafeUses)
1618 --*VCallSite.NumUnsafeUses;
1619 }
1620 // Don't mark as devirtualized because there may be callers compiled without
1621 // retpoline mitigation, which would mean that they are lowered to
1622 // llvm.type.test and therefore require an llvm.type.test resolution for the
1623 // type identifier.
1624
1625 for (auto &[Old, New] : CallBases) {
1626 Old->replaceAllUsesWith(New);
1627 Old->eraseFromParent();
1628 }
1629 };
1630 Apply(SlotInfo.CSInfo);
1631 for (auto &P : SlotInfo.ConstCSInfo)
1632 Apply(P.second);
1633 for (auto &[F, C] : FunctionEntryCounts) {
1634 assert(!F->getEntryCount(/*AllowSynthetic=*/true) &&
1635 "Unexpected entry count for funnel that was freshly synthesized");
1636 F->setEntryCount(static_cast<uint64_t>(std::round(C)));
1637 }
1638}
1639
1640bool DevirtModule::tryEvaluateFunctionsWithArgs(
1642 ArrayRef<uint64_t> Args) {
1643 // Evaluate each function and store the result in each target's RetVal
1644 // field.
1645 for (VirtualCallTarget &Target : TargetsForSlot) {
1646 // TODO: Skip for now if the vtable symbol was an alias to a function,
1647 // need to evaluate whether it would be correct to analyze the aliasee
1648 // function for this optimization.
1649 auto *Fn = dyn_cast<Function>(Target.Fn);
1650 if (!Fn)
1651 return false;
1652
1653 if (Fn->arg_size() != Args.size() + 1)
1654 return false;
1655
1656 Evaluator Eval(M.getDataLayout(), nullptr);
1658 EvalArgs.push_back(
1660 for (unsigned I = 0; I != Args.size(); ++I) {
1661 auto *ArgTy =
1663 if (!ArgTy)
1664 return false;
1665 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1666 }
1667
1668 Constant *RetVal;
1669 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1670 !isa<ConstantInt>(RetVal))
1671 return false;
1672 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1673 }
1674 return true;
1675}
1676
1677void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1678 uint64_t TheRetVal) {
1679 for (auto Call : CSInfo.CallSites) {
1680 if (!OptimizedCalls.insert(&Call.CB).second)
1681 continue;
1682 NumUniformRetVal++;
1683 Call.replaceAndErase(
1684 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1685 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1686 }
1687 CSInfo.markDevirt();
1688}
1689
1690bool DevirtModule::tryUniformRetValOpt(
1691 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1692 WholeProgramDevirtResolution::ByArg *Res) {
1693 // Uniform return value optimization. If all functions return the same
1694 // constant, replace all calls with that constant.
1695 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1696 for (const VirtualCallTarget &Target : TargetsForSlot)
1697 if (Target.RetVal != TheRetVal)
1698 return false;
1699
1700 if (CSInfo.isExported()) {
1702 Res->Info = TheRetVal;
1703 }
1704
1705 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1706 if (RemarksEnabled || AreStatisticsEnabled())
1707 for (auto &&Target : TargetsForSlot)
1708 Target.WasDevirt = true;
1709 return true;
1710}
1711
1712std::string DevirtModule::getGlobalName(VTableSlot Slot,
1713 ArrayRef<uint64_t> Args,
1714 StringRef Name) {
1715 std::string FullName = "__typeid_";
1716 raw_string_ostream OS(FullName);
1717 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1718 for (uint64_t Arg : Args)
1719 OS << '_' << Arg;
1720 OS << '_' << Name;
1721 return FullName;
1722}
1723
1724bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1725 Triple T(M.getTargetTriple());
1726 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1727}
1728
1729void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1730 StringRef Name, Constant *C) {
1731 GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
1732 getGlobalName(Slot, Args, Name), C, &M);
1734}
1735
1736void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1737 StringRef Name, uint32_t Const,
1738 uint32_t &Storage) {
1739 if (shouldExportConstantsAsAbsoluteSymbols()) {
1740 exportGlobal(
1741 Slot, Args, Name,
1742 ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
1743 return;
1744 }
1745
1746 Storage = Const;
1747}
1748
1749Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1750 StringRef Name) {
1751 GlobalVariable *GV =
1752 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1754 return GV;
1755}
1756
1757Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1758 StringRef Name, IntegerType *IntTy,
1759 uint32_t Storage) {
1760 if (!shouldExportConstantsAsAbsoluteSymbols())
1761 return ConstantInt::get(IntTy, Storage);
1762
1763 Constant *C = importGlobal(Slot, Args, Name);
1764 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1765 C = ConstantExpr::getPtrToInt(C, IntTy);
1766
1767 // We only need to set metadata if the global is newly created, in which
1768 // case it would not have hidden visibility.
1769 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1770 return C;
1771
1772 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1773 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1774 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1775 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1776 MDNode::get(M.getContext(), {MinC, MaxC}));
1777 };
1778 unsigned AbsWidth = IntTy->getBitWidth();
1779 if (AbsWidth == IntPtrTy->getBitWidth()) {
1780 uint64_t AllOnes = IntTy->getBitMask();
1781 SetAbsRange(AllOnes, AllOnes); // Full set.
1782 } else {
1783 SetAbsRange(0, 1ull << AbsWidth);
1784 }
1785 return C;
1786}
1787
1788void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1789 bool IsOne,
1790 Constant *UniqueMemberAddr) {
1791 for (auto &&Call : CSInfo.CallSites) {
1792 if (!OptimizedCalls.insert(&Call.CB).second)
1793 continue;
1794 IRBuilder<> B(&Call.CB);
1795 Value *Cmp =
1796 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1797 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1798 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1799 NumUniqueRetVal++;
1800 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1801 Cmp);
1802 }
1803 CSInfo.markDevirt();
1804}
1805
1806Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1807 return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
1808 ConstantInt::get(Int64Ty, M->Offset));
1809}
1810
1811bool DevirtModule::tryUniqueRetValOpt(
1812 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1813 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1814 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1815 // IsOne controls whether we look for a 0 or a 1.
1816 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1817 const TypeMemberInfo *UniqueMember = nullptr;
1818 for (const VirtualCallTarget &Target : TargetsForSlot) {
1819 if (Target.RetVal == (IsOne ? 1 : 0)) {
1820 if (UniqueMember)
1821 return false;
1822 UniqueMember = Target.TM;
1823 }
1824 }
1825
1826 // We should have found a unique member or bailed out by now. We already
1827 // checked for a uniform return value in tryUniformRetValOpt.
1828 assert(UniqueMember);
1829
1830 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1831 if (CSInfo.isExported()) {
1833 Res->Info = IsOne;
1834
1835 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1836 }
1837
1838 // Replace each call with the comparison.
1839 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1840 UniqueMemberAddr);
1841
1842 // Update devirtualization statistics for targets.
1843 if (RemarksEnabled || AreStatisticsEnabled())
1844 for (auto &&Target : TargetsForSlot)
1845 Target.WasDevirt = true;
1846
1847 return true;
1848 };
1849
1850 if (BitWidth == 1) {
1851 if (tryUniqueRetValOptFor(true))
1852 return true;
1853 if (tryUniqueRetValOptFor(false))
1854 return true;
1855 }
1856 return false;
1857}
1858
1859void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1860 Constant *Byte, Constant *Bit) {
1861 for (auto Call : CSInfo.CallSites) {
1862 if (!OptimizedCalls.insert(&Call.CB).second)
1863 continue;
1864 auto *RetType = cast<IntegerType>(Call.CB.getType());
1865 IRBuilder<> B(&Call.CB);
1866 Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);
1867 if (RetType->getBitWidth() == 1) {
1868 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1869 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1870 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1871 NumVirtConstProp1Bit++;
1872 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1873 OREGetter, IsBitSet);
1874 } else {
1875 Value *Val = B.CreateLoad(RetType, Addr);
1876 NumVirtConstProp++;
1877 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1878 OREGetter, Val);
1879 }
1880 }
1881 CSInfo.markDevirt();
1882}
1883
1884bool DevirtModule::tryVirtualConstProp(
1885 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1886 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1887 // TODO: Skip for now if the vtable symbol was an alias to a function,
1888 // need to evaluate whether it would be correct to analyze the aliasee
1889 // function for this optimization.
1890 auto *Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1891 if (!Fn)
1892 return false;
1893 // This only works if the function returns an integer.
1894 auto *RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1895 if (!RetType)
1896 return false;
1897 unsigned BitWidth = RetType->getBitWidth();
1898
1899 // TODO: Since we can evaluated these constants at compile-time, we can save
1900 // some space by calculating the smallest range of values that all these
1901 // constants can fit in, then only allocate enough space to fit those values.
1902 // At each callsite, we can get the original type by doing a sign/zero
1903 // extension. For example, if we would store an i64, but we can see that all
1904 // the values fit into an i16, then we can store an i16 before/after the
1905 // vtable and at each callsite do a s/zext.
1906 if (BitWidth > 64)
1907 return false;
1908
1909 Align TypeAlignment = M.getDataLayout().getABIIntegerTypeAlignment(BitWidth);
1910
1911 // Make sure that each function is defined, does not access memory, takes at
1912 // least one argument, does not use its first argument (which we assume is
1913 // 'this'), and has the same return type.
1914 //
1915 // Note that we test whether this copy of the function is readnone, rather
1916 // than testing function attributes, which must hold for any copy of the
1917 // function, even a less optimized version substituted at link time. This is
1918 // sound because the virtual constant propagation optimizations effectively
1919 // inline all implementations of the virtual function into each call site,
1920 // rather than using function attributes to perform local optimization.
1921 for (VirtualCallTarget &Target : TargetsForSlot) {
1922 // TODO: Skip for now if the vtable symbol was an alias to a function,
1923 // need to evaluate whether it would be correct to analyze the aliasee
1924 // function for this optimization.
1925 auto *Fn = dyn_cast<Function>(Target.Fn);
1926 if (!Fn)
1927 return false;
1928
1929 if (Fn->isDeclaration() ||
1930 !computeFunctionBodyMemoryAccess(*Fn, FAM.getResult<AAManager>(*Fn))
1932 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1933 Fn->getReturnType() != RetType)
1934 return false;
1935
1936 // This only works if the integer size is at most the alignment of the
1937 // vtable. If the table is underaligned, then we can't guarantee that the
1938 // constant will always be aligned to the integer type alignment. For
1939 // example, if the table is `align 1`, we can never guarantee that an i32
1940 // stored before/after the vtable is 32-bit aligned without changing the
1941 // alignment of the new global.
1942 GlobalVariable *GV = Target.TM->Bits->GV;
1943 Align TableAlignment = M.getDataLayout().getValueOrABITypeAlignment(
1944 GV->getAlign(), GV->getValueType());
1945 if (TypeAlignment > TableAlignment)
1946 return false;
1947 }
1948
1949 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1950 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1951 continue;
1952
1953 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1954 if (Res)
1955 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1956
1957 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1958 continue;
1959
1960 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1961 ResByArg, Slot, CSByConstantArg.first))
1962 continue;
1963
1964 // Find an allocation offset in bits in all vtables associated with the
1965 // type.
1966 // TODO: If there would be "holes" in the vtable that were added by
1967 // padding, we could place i1s there to reduce any extra padding that
1968 // would be introduced by the i1s.
1969 uint64_t AllocBefore =
1970 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1971 uint64_t AllocAfter =
1972 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1973
1974 // Calculate the total amount of padding needed to store a value at both
1975 // ends of the object.
1976 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1977 for (auto &&Target : TargetsForSlot) {
1978 TotalPaddingBefore += std::max<int64_t>(
1979 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1980 TotalPaddingAfter += std::max<int64_t>(
1981 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1982 }
1983
1984 // If the amount of padding is too large, give up.
1985 // FIXME: do something smarter here.
1986 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1987 continue;
1988
1989 // Calculate the offset to the value as a (possibly negative) byte offset
1990 // and (if applicable) a bit offset, and store the values in the targets.
1991 int64_t OffsetByte;
1992 uint64_t OffsetBit;
1993 if (TotalPaddingBefore <= TotalPaddingAfter)
1994 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1995 OffsetBit);
1996 else
1997 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1998 OffsetBit);
1999
2000 // In an earlier check we forbade constant propagation from operating on
2001 // tables whose alignment is less than the alignment needed for loading
2002 // the constant. Thus, the address we take the offset from will always be
2003 // aligned to at least this integer alignment. Now, we need to ensure that
2004 // the offset is also aligned to this integer alignment to ensure we always
2005 // have an aligned load.
2006 assert(OffsetByte % TypeAlignment.value() == 0);
2007
2008 if (RemarksEnabled || AreStatisticsEnabled())
2009 for (auto &&Target : TargetsForSlot)
2010 Target.WasDevirt = true;
2011
2012
2013 if (CSByConstantArg.second.isExported()) {
2015 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
2016 ResByArg->Byte);
2017 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
2018 ResByArg->Bit);
2019 }
2020
2021 // Rewrite each call to a load from OffsetByte/OffsetBit.
2022 Constant *ByteConst = ConstantInt::getSigned(Int32Ty, OffsetByte);
2023 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
2024 applyVirtualConstProp(CSByConstantArg.second,
2025 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
2026 }
2027 return true;
2028}
2029
2030void DevirtModule::rebuildGlobal(VTableBits &B) {
2031 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
2032 return;
2033
2034 // Align the before byte array to the global's minimum alignment so that we
2035 // don't break any alignment requirements on the global.
2036 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
2037 B.GV->getAlign(), B.GV->getValueType());
2038 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
2039
2040 // Before was stored in reverse order; flip it now.
2041 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
2042 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
2043
2044 // Build an anonymous global containing the before bytes, followed by the
2045 // original initializer, followed by the after bytes.
2046 auto *NewInit = ConstantStruct::getAnon(
2047 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
2048 B.GV->getInitializer(),
2049 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
2050 auto *NewGV =
2051 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
2052 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
2053 NewGV->setSection(B.GV->getSection());
2054 NewGV->setComdat(B.GV->getComdat());
2055 NewGV->setAlignment(B.GV->getAlign());
2056
2057 // Copy the original vtable's metadata to the anonymous global, adjusting
2058 // offsets as required.
2059 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
2060
2061 // Build an alias named after the original global, pointing at the second
2062 // element (the original initializer).
2063 auto *Alias = GlobalAlias::create(
2064 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
2066 NewInit->getType(), NewGV,
2067 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
2068 ConstantInt::get(Int32Ty, 1)}),
2069 &M);
2070 Alias->setVisibility(B.GV->getVisibility());
2071 Alias->takeName(B.GV);
2072
2073 B.GV->replaceAllUsesWith(Alias);
2074 B.GV->eraseFromParent();
2075}
2076
2077bool DevirtModule::areRemarksEnabled() {
2078 const auto &FL = M.getFunctionList();
2079 for (const Function &Fn : FL) {
2080 if (Fn.empty())
2081 continue;
2082 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
2083 return DI.isEnabled();
2084 }
2085 return false;
2086}
2087
2088void DevirtModule::scanTypeTestUsers(
2089 Function *TypeTestFunc,
2090 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
2091 // Find all virtual calls via a virtual table pointer %p under an assumption
2092 // of the form llvm.assume(llvm.type.test(%p, %md)) or
2093 // llvm.assume(llvm.public.type.test(%p, %md)).
2094 // This indicates that %p points to a member of the type identifier %md.
2095 // Group calls by (type ID, offset) pair (effectively the identity of the
2096 // virtual function) and store to CallSlots.
2097 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
2098 auto *CI = dyn_cast<CallInst>(U.getUser());
2099 if (!CI)
2100 continue;
2101 // Search for virtual calls based on %p and add them to DevirtCalls.
2104 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2105 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
2106
2107 Metadata *TypeId =
2108 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
2109 // If we found any, add them to CallSlots.
2110 if (!Assumes.empty()) {
2111 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
2112 for (DevirtCallSite Call : DevirtCalls)
2113 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
2114 }
2115
2116 auto RemoveTypeTestAssumes = [&]() {
2117 // We no longer need the assumes or the type test.
2118 for (auto *Assume : Assumes)
2119 Assume->eraseFromParent();
2120 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
2121 // may use the vtable argument later.
2122 if (CI->use_empty())
2123 CI->eraseFromParent();
2124 };
2125
2126 // At this point we could remove all type test assume sequences, as they
2127 // were originally inserted for WPD. However, we can keep these in the
2128 // code stream for later analysis (e.g. to help drive more efficient ICP
2129 // sequences). They will eventually be removed by a second LowerTypeTests
2130 // invocation that cleans them up. In order to do this correctly, the first
2131 // LowerTypeTests invocation needs to know that they have "Unknown" type
2132 // test resolution, so that they aren't treated as Unsat and lowered to
2133 // False, which will break any uses on assumes. Below we remove any type
2134 // test assumes that will not be treated as Unknown by LTT.
2135
2136 // The type test assumes will be treated by LTT as Unsat if the type id is
2137 // not used on a global (in which case it has no entry in the TypeIdMap).
2138 if (!TypeIdMap.count(TypeId))
2139 RemoveTypeTestAssumes();
2140
2141 // For ThinLTO importing, we need to remove the type test assumes if this is
2142 // an MDString type id without a corresponding TypeIdSummary. Any
2143 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2144 // type test assumes can be kept. If the MDString type id is missing a
2145 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2146 // exporting phase of WPD from analyzing it), then it would be treated as
2147 // Unsat by LTT and we need to remove its type test assumes here. If not
2148 // used on a vcall we don't need them for later optimization use in any
2149 // case.
2150 else if (ImportSummary && isa<MDString>(TypeId)) {
2151 const TypeIdSummary *TidSummary =
2152 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2153 if (!TidSummary)
2154 RemoveTypeTestAssumes();
2155 else
2156 // If one was created it should not be Unsat, because if we reached here
2157 // the type id was used on a global.
2159 }
2160 }
2161}
2162
2163void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2164 Function *TypeTestFunc =
2165 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
2166
2167 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2168 auto *CI = dyn_cast<CallInst>(U.getUser());
2169 if (!CI)
2170 continue;
2171
2172 Value *Ptr = CI->getArgOperand(0);
2173 Value *Offset = CI->getArgOperand(1);
2174 Value *TypeIdValue = CI->getArgOperand(2);
2175 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2176
2180 bool HasNonCallUses = false;
2181 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2182 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2183 HasNonCallUses, CI, DT);
2184
2185 // Start by generating "pessimistic" code that explicitly loads the function
2186 // pointer from the vtable and performs the type check. If possible, we will
2187 // eliminate the load and the type check later.
2188
2189 // If possible, only generate the load at the point where it is used.
2190 // This helps avoid unnecessary spills.
2191 IRBuilder<> LoadB(
2192 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2193
2194 Value *LoadedValue = nullptr;
2195 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2196 Intrinsic::type_checked_load_relative) {
2198 &M, Intrinsic::load_relative, {Int32Ty});
2199 LoadedValue = LoadB.CreateCall(LoadRelFunc, {Ptr, Offset});
2200 } else {
2201 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2202 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2203 }
2204
2205 for (Instruction *LoadedPtr : LoadedPtrs) {
2206 LoadedPtr->replaceAllUsesWith(LoadedValue);
2207 LoadedPtr->eraseFromParent();
2208 }
2209
2210 // Likewise for the type test.
2211 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2212 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2213
2214 for (Instruction *Pred : Preds) {
2215 Pred->replaceAllUsesWith(TypeTestCall);
2216 Pred->eraseFromParent();
2217 }
2218
2219 // We have already erased any extractvalue instructions that refer to the
2220 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2221 // (although this is unlikely). In that case, explicitly build a pair and
2222 // RAUW it.
2223 if (!CI->use_empty()) {
2224 Value *Pair = PoisonValue::get(CI->getType());
2225 IRBuilder<> B(CI);
2226 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2227 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2228 CI->replaceAllUsesWith(Pair);
2229 }
2230
2231 // The number of unsafe uses is initially the number of uses.
2232 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2233 NumUnsafeUses = DevirtCalls.size();
2234
2235 // If the function pointer has a non-call user, we cannot eliminate the type
2236 // check, as one of those users may eventually call the pointer. Increment
2237 // the unsafe use count to make sure it cannot reach zero.
2238 if (HasNonCallUses)
2239 ++NumUnsafeUses;
2240 for (DevirtCallSite Call : DevirtCalls) {
2241 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2242 &NumUnsafeUses);
2243 }
2244
2245 CI->eraseFromParent();
2246 }
2247}
2248
2249void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2250 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2251 if (!TypeId)
2252 return;
2253 const TypeIdSummary *TidSummary =
2254 ImportSummary->getTypeIdSummary(TypeId->getString());
2255 if (!TidSummary)
2256 return;
2257 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2258 if (ResI == TidSummary->WPDRes.end())
2259 return;
2260 const WholeProgramDevirtResolution &Res = ResI->second;
2261
2263 assert(!Res.SingleImplName.empty());
2264 // The type of the function in the declaration is irrelevant because every
2265 // call site will cast it to the correct type.
2266 Constant *SingleImpl =
2267 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2268 Type::getVoidTy(M.getContext()))
2269 .getCallee());
2270
2271 // This is the import phase so we should not be exporting anything.
2272 bool IsExported = false;
2273 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2274 assert(!IsExported);
2275 }
2276
2277 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2278 auto I = Res.ResByArg.find(CSByConstantArg.first);
2279 if (I == Res.ResByArg.end())
2280 continue;
2281 auto &ResByArg = I->second;
2282 // FIXME: We should figure out what to do about the "function name" argument
2283 // to the apply* functions, as the function names are unavailable during the
2284 // importing phase. For now we just pass the empty string. This does not
2285 // impact correctness because the function names are just used for remarks.
2286 switch (ResByArg.TheKind) {
2288 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2289 break;
2291 Constant *UniqueMemberAddr =
2292 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2293 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2294 UniqueMemberAddr);
2295 break;
2296 }
2298 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2299 Int32Ty, ResByArg.Byte);
2300 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2301 ResByArg.Bit);
2302 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2303 break;
2304 }
2305 default:
2306 break;
2307 }
2308 }
2309
2311 // The type of the function is irrelevant, because it's bitcast at calls
2312 // anyhow.
2313 auto *JT = cast<Function>(
2314 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2315 Type::getVoidTy(M.getContext()))
2316 .getCallee());
2317 bool IsExported = false;
2318 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
2319 assert(!IsExported);
2320 }
2321}
2322
2323void DevirtModule::removeRedundantTypeTests() {
2324 auto *True = ConstantInt::getTrue(M.getContext());
2325 for (auto &&U : NumUnsafeUsesForTypeTest) {
2326 if (U.second == 0) {
2327 U.first->replaceAllUsesWith(True);
2328 U.first->eraseFromParent();
2329 }
2330 }
2331}
2332
2333ValueInfo
2334DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2335 ModuleSummaryIndex *ExportSummary) {
2336 assert((ExportSummary != nullptr) &&
2337 "Caller guarantees ExportSummary is not nullptr");
2338
2339 const auto TheFnGUID = TheFn->getGUID();
2340 const auto TheFnGUIDWithExportedName =
2342 // Look up ValueInfo with the GUID in the current linkage.
2343 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2344 // If no entry is found and GUID is different from GUID computed using
2345 // exported name, look up ValueInfo with the exported name unconditionally.
2346 // This is a fallback.
2347 //
2348 // The reason to have a fallback:
2349 // 1. LTO could enable global value internalization via
2350 // `enable-lto-internalization`.
2351 // 2. The GUID in ExportedSummary is computed using exported name.
2352 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2353 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2354 }
2355 return TheFnVI;
2356}
2357
2358bool DevirtModule::mustBeUnreachableFunction(
2359 Function *const F, ModuleSummaryIndex *ExportSummary) {
2361 return false;
2362 // First, learn unreachability by analyzing function IR.
2363 if (!F->isDeclaration()) {
2364 // A function must be unreachable if its entry block ends with an
2365 // 'unreachable'.
2366 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2367 }
2368 // Learn unreachability from ExportSummary if ExportSummary is present.
2369 return ExportSummary &&
2371 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2372}
2373
2374bool DevirtModule::run() {
2375 // If only some of the modules were split, we cannot correctly perform
2376 // this transformation. We already checked for the presense of type tests
2377 // with partially split modules during the thin link, and would have emitted
2378 // an error if any were found, so here we can simply return.
2379 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2380 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2381 return false;
2382
2383 Function *PublicTypeTestFunc = nullptr;
2384 // If we are in speculative devirtualization mode, we can work on the public
2385 // type test intrinsics.
2386 if (DevirtSpeculatively)
2387 PublicTypeTestFunc =
2388 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
2389 Function *TypeTestFunc =
2390 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2391 Function *TypeCheckedLoadFunc =
2392 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
2393 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
2394 &M, Intrinsic::type_checked_load_relative);
2395 Function *AssumeFunc =
2396 Intrinsic::getDeclarationIfExists(&M, Intrinsic::assume);
2397
2398 // Normally if there are no users of the devirtualization intrinsics in the
2399 // module, this pass has nothing to do. But if we are exporting, we also need
2400 // to handle any users that appear only in the function summaries.
2401 if (!ExportSummary &&
2402 (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
2403 (!TypeTestFunc || TypeTestFunc->use_empty())) ||
2404 !AssumeFunc || AssumeFunc->use_empty()) &&
2405 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2406 (!TypeCheckedLoadRelativeFunc ||
2407 TypeCheckedLoadRelativeFunc->use_empty()))
2408 return false;
2409
2410 // Rebuild type metadata into a map for easy lookup.
2411 std::vector<VTableBits> Bits;
2412 DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
2413 buildTypeIdentifierMap(Bits, TypeIdMap);
2414
2415 if (PublicTypeTestFunc && AssumeFunc)
2416 scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
2417
2418 if (TypeTestFunc && AssumeFunc)
2419 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2420
2421 if (TypeCheckedLoadFunc)
2422 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2423
2424 if (TypeCheckedLoadRelativeFunc)
2425 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2426
2427 if (ImportSummary) {
2428 for (auto &S : CallSlots)
2429 importResolution(S.first, S.second);
2430
2431 removeRedundantTypeTests();
2432
2433 // We have lowered or deleted the type intrinsics, so we will no longer have
2434 // enough information to reason about the liveness of virtual function
2435 // pointers in GlobalDCE.
2436 for (GlobalVariable &GV : M.globals())
2437 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2438
2439 // The rest of the code is only necessary when exporting or during regular
2440 // LTO, so we are done.
2441 return true;
2442 }
2443
2444 if (TypeIdMap.empty())
2445 return true;
2446
2447 // Collect information from summary about which calls to try to devirtualize.
2448 if (ExportSummary) {
2449 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2450 for (auto &P : TypeIdMap) {
2451 if (auto *TypeId = dyn_cast<MDString>(P.first))
2453 TypeId->getString())]
2454 .push_back(TypeId);
2455 }
2456
2457 for (auto &P : *ExportSummary) {
2458 for (auto &S : P.second.getSummaryList()) {
2459 auto *FS = dyn_cast<FunctionSummary>(S.get());
2460 if (!FS)
2461 continue;
2462 // FIXME: Only add live functions.
2463 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2464 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2465 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2466 }
2467 }
2468 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2469 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2470 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2471 }
2472 }
2473 for (const FunctionSummary::ConstVCall &VC :
2474 FS->type_test_assume_const_vcalls()) {
2475 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2476 CallSlots[{MD, VC.VFunc.Offset}]
2477 .ConstCSInfo[VC.Args]
2478 .addSummaryTypeTestAssumeUser(FS);
2479 }
2480 }
2481 for (const FunctionSummary::ConstVCall &VC :
2482 FS->type_checked_load_const_vcalls()) {
2483 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2484 CallSlots[{MD, VC.VFunc.Offset}]
2485 .ConstCSInfo[VC.Args]
2486 .addSummaryTypeCheckedLoadUser(FS);
2487 }
2488 }
2489 }
2490 }
2491 }
2492
2493 // For each (type, offset) pair:
2494 bool DidVirtualConstProp = false;
2495 std::map<std::string, GlobalValue *> DevirtTargets;
2496 for (auto &S : CallSlots) {
2497 // Search each of the members of the type identifier for the virtual
2498 // function implementation at offset S.first.ByteOffset, and add to
2499 // TargetsForSlot.
2500 std::vector<VirtualCallTarget> TargetsForSlot;
2501 WholeProgramDevirtResolution *Res = nullptr;
2502 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2503 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2504 TypeMemberInfos.size())
2505 // For any type id used on a global's type metadata, create the type id
2506 // summary resolution regardless of whether we can devirtualize, so that
2507 // lower type tests knows the type id is not Unsat. If it was not used on
2508 // a global's type metadata, the TypeIdMap entry set will be empty, and
2509 // we don't want to create an entry (with the default Unknown type
2510 // resolution), which can prevent detection of the Unsat.
2511 Res = &ExportSummary
2512 ->getOrInsertTypeIdSummary(
2513 cast<MDString>(S.first.TypeID)->getString())
2514 .WPDRes[S.first.ByteOffset];
2515 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2516 S.first.ByteOffset, ExportSummary)) {
2517 bool SingleImplDevirt =
2518 trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
2519 // Out of speculative devirtualization mode, Try to apply virtual constant
2520 // propagation or branch funneling.
2521 // TODO: This should eventually be enabled for non-public type tests.
2522 if (!SingleImplDevirt && !DevirtSpeculatively) {
2523 DidVirtualConstProp |=
2524 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2525
2526 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2527 }
2528
2529 // Collect functions devirtualized at least for one call site for stats.
2530 if (RemarksEnabled || AreStatisticsEnabled())
2531 for (const auto &T : TargetsForSlot)
2532 if (T.WasDevirt)
2533 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2534 }
2535
2536 // CFI-specific: if we are exporting and any llvm.type.checked.load
2537 // intrinsics were *not* devirtualized, we need to add the resulting
2538 // llvm.type.test intrinsics to the function summaries so that the
2539 // LowerTypeTests pass will export them.
2540 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2542 cast<MDString>(S.first.TypeID)->getString());
2543 auto AddTypeTestsForTypeCheckedLoads = [&](CallSiteInfo &CSI) {
2544 if (!CSI.AllCallSitesDevirted)
2545 for (auto *FS : CSI.SummaryTypeCheckedLoadUsers)
2546 FS->addTypeTest(GUID);
2547 };
2548 AddTypeTestsForTypeCheckedLoads(S.second.CSInfo);
2549 for (auto &CCS : S.second.ConstCSInfo)
2550 AddTypeTestsForTypeCheckedLoads(CCS.second);
2551 }
2552 }
2553
2554 if (RemarksEnabled) {
2555 // Generate remarks for each devirtualized function.
2556 for (const auto &DT : DevirtTargets) {
2557 GlobalValue *GV = DT.second;
2558 auto *F = dyn_cast<Function>(GV);
2559 if (!F) {
2560 auto *A = dyn_cast<GlobalAlias>(GV);
2561 assert(A && isa<Function>(A->getAliasee()));
2562 F = dyn_cast<Function>(A->getAliasee());
2563 assert(F);
2564 }
2565
2566 using namespace ore;
2567 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2568 << "devirtualized " << NV("FunctionName", DT.first));
2569 }
2570 }
2571
2572 NumDevirtTargets += DevirtTargets.size();
2573
2574 removeRedundantTypeTests();
2575
2576 // Rebuild each global we touched as part of virtual constant propagation to
2577 // include the before and after bytes.
2578 if (DidVirtualConstProp)
2579 for (VTableBits &B : Bits)
2580 rebuildGlobal(B);
2581
2582 // We have lowered or deleted the type intrinsics, so we will no longer have
2583 // enough information to reason about the liveness of virtual function
2584 // pointers in GlobalDCE.
2585 for (GlobalVariable &GV : M.globals())
2586 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2587
2588 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2589 CI->eraseFromParent();
2590
2591 return true;
2592}
2593
2594void DevirtIndex::run() {
2595 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2596 return;
2597
2598 // Assert that we haven't made any changes that would affect the hasLocal()
2599 // flag on the GUID summary info.
2600 assert(!ExportSummary.withInternalizeAndPromote() &&
2601 "Expect index-based WPD to run before internalization and promotion");
2602
2603 DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
2604 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2605 NameByGUID[GlobalValue::getGUIDAssumingExternalLinkage(P.first)].push_back(
2606 P.first);
2607 // Create the type id summary resolution regardlness of whether we can
2608 // devirtualize, so that lower type tests knows the type id is used on
2609 // a global and not Unsat. We do this here rather than in the loop over the
2610 // CallSlots, since that handling will only see type tests that directly
2611 // feed assumes, and we would miss any that aren't currently handled by WPD
2612 // (such as type tests that feed assumes via phis).
2613 ExportSummary.getOrInsertTypeIdSummary(P.first);
2614 }
2615
2616 // Collect information from summary about which calls to try to devirtualize.
2617 for (auto &P : ExportSummary) {
2618 for (auto &S : P.second.getSummaryList()) {
2619 auto *FS = dyn_cast<FunctionSummary>(S.get());
2620 if (!FS)
2621 continue;
2622 // FIXME: Only add live functions.
2623 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2624 for (StringRef Name : NameByGUID[VF.GUID]) {
2625 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2626 }
2627 }
2628 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2629 for (StringRef Name : NameByGUID[VF.GUID]) {
2630 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2631 }
2632 }
2633 for (const FunctionSummary::ConstVCall &VC :
2634 FS->type_test_assume_const_vcalls()) {
2635 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2636 CallSlots[{Name, VC.VFunc.Offset}]
2637 .ConstCSInfo[VC.Args]
2638 .addSummaryTypeTestAssumeUser(FS);
2639 }
2640 }
2641 for (const FunctionSummary::ConstVCall &VC :
2642 FS->type_checked_load_const_vcalls()) {
2643 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2644 CallSlots[{Name, VC.VFunc.Offset}]
2645 .ConstCSInfo[VC.Args]
2646 .addSummaryTypeCheckedLoadUser(FS);
2647 }
2648 }
2649 }
2650 }
2651
2652 std::set<ValueInfo> DevirtTargets;
2653 // For each (type, offset) pair:
2654 for (auto &S : CallSlots) {
2655 // Search each of the members of the type identifier for the virtual
2656 // function implementation at offset S.first.ByteOffset, and add to
2657 // TargetsForSlot.
2658 std::vector<ValueInfo> TargetsForSlot;
2659 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2660 assert(TidSummary);
2661 // The type id summary would have been created while building the NameByGUID
2662 // map earlier.
2663 WholeProgramDevirtResolution *Res =
2664 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2665 ->WPDRes[S.first.ByteOffset];
2666 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2667 S.first.ByteOffset)) {
2668
2669 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2670 DevirtTargets))
2671 continue;
2672 }
2673 }
2674
2675 // Optionally have the thin link print message for each devirtualized
2676 // function.
2678 for (const auto &DT : DevirtTargets)
2679 errs() << "Devirtualized call to " << DT << "\n";
2680
2681 NumDevirtTargets += DevirtTargets.size();
2682}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Provides passes for computing function attributes based on interprocedural analyses.
@ CallSiteInfo
#define DEBUG_TYPE
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, bool Skip)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
#define T
static bool mustBeUnreachableFunction(const Function &F)
This is the interface to build a ModuleSummaryIndex for a module.
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< bool > WholeProgramDevirtKeepUnreachableFunction("wholeprogramdevirt-keep-unreachable-function", cl::desc("Regard unreachable functions as possible devirtualize targets."), cl::Hidden, cl::init(true))
With Clang, a pure virtual class's deleting destructor is emitted as a llvm.trap intrinsic followed b...
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:223
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setCallingConv(CallingConv::ID CC)
bool arg_empty() const
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
AttributeList getAttributes() const
Return the attributes for this call.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
@ ICMP_NE
not equal
Definition InstrTypes.h:698
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:720
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1311
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1284
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:136
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition Constants.h:491
const Constant * stripPointerCasts() const
Definition Constant.h:222
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static bool shouldExecute(CounterInfo &Counter)
bool empty() const
Definition DenseMap.h:109
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition Error.h:334
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Type * getParamType(unsigned i) const
Parameter type accessors.
ArrayRef< Type * > params() const
bool isVarArg() const
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
bool empty() const
Definition Function.h:857
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
bool arg_empty() const
Definition Function.h:900
const BasicBlock & front() const
Definition Function.h:858
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
arg_iterator arg_begin()
Definition Function.h:866
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
unsigned getInstructionCount() const
Returns the number of non-debug IR instructions in this function.
Definition Function.cpp:367
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:598
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition Value.h:602
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI VCallVisibility getVCallVisibility() const
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI void setVCallVisibilityMetadata(VCallVisibility Visibility)
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
void setVisibility(VisibilityTypes V)
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
uint64_t getBitMask() const
Return a bitmask with ones set for all of the bits that can be set by an unsigned version of this typ...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition ModRef.h:220
Root of the metadata hierarchy.
Definition Metadata.h:64
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Analysis providing profile information.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:426
Target - Wrapper for Target specific information.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
bool use_empty() const
Definition Value.h:346
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
DiagnosticInfoOptimizationBase::Argument NV
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition FileSystem.h:764
LLVM_ABI uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
LLVM_ABI void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
LLVM_ABI void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
@ Export
Export information to summary.
Definition IPO.h:57
@ None
Do nothing.
Definition IPO.h:55
@ Import
Import information from summary.
Definition IPO.h:56
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI ModuleSummaryIndex buildModuleSummaryIndex(const Module &M, std::function< BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI, std::function< const StackSafetyInfo *(const Function &F)> GetSSICallback=[](const Function &F) -> const StackSafetyInfo *{ return nullptr;})
Direct function to compute a ModuleSummaryIndex from a given module.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const ModuleToSummariesForIndexTy *ModuleToSummariesForIndex=nullptr, const GVSummaryPtrSet *DecSummaries=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
@ invalid_argument
Definition Errc.h:56
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
LLVM_ABI void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
LLVM_ABI void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
LLVM_ABI CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName)
Analogous to setExplicitlyUnknownBranchWeights, but for functions and their entry counts.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
LLVM_ABI void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition Error.h:1245
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static cl::opt< bool > ClDevirtualizeSpeculatively("devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
LLVM_ABI void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
LLVM_ABI void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
const ModuleSummaryIndex * ImportSummary
ModuleSummaryIndex * ExportSummary
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
LLVM_ABI VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)