LLVM 22.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular and thin LTO pipelines:
28//
29// During regular LTO, the pass determines the best optimization for each
30// virtual call and applies the resolutions directly to virtual calls that are
31// eligible for virtual call optimization (i.e. calls that use either of the
32// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
33//
34// During hybrid Regular/ThinLTO, the pass operates in two phases:
35// - Export phase: this is run during the thin link over a single merged module
36// that contains all vtables with !type metadata that participate in the link.
37// The pass computes a resolution for each virtual call and stores it in the
38// type identifier summary.
39// - Import phase: this is run during the thin backends over the individual
40// modules. The pass applies the resolutions previously computed during the
41// import phase to each eligible virtual call.
42//
43// During ThinLTO, the pass operates in two phases:
44// - Export phase: this is run during the thin link over the index which
45// contains a summary of all vtables with !type metadata that participate in
46// the link. It computes a resolution for each virtual call and stores it in
47// the type identifier summary. Only single implementation devirtualization
48// is supported.
49// - Import phase: (same as with hybrid case above).
50//
51//===----------------------------------------------------------------------===//
52
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/MapVector.h"
60#include "llvm/ADT/Statistic.h"
68#include "llvm/IR/Constants.h"
69#include "llvm/IR/DataLayout.h"
70#include "llvm/IR/DebugLoc.h"
73#include "llvm/IR/Dominators.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
77#include "llvm/IR/IRBuilder.h"
78#include "llvm/IR/InstrTypes.h"
79#include "llvm/IR/Instruction.h"
81#include "llvm/IR/Intrinsics.h"
82#include "llvm/IR/LLVMContext.h"
83#include "llvm/IR/MDBuilder.h"
84#include "llvm/IR/Metadata.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/PassManager.h"
91#include "llvm/Support/Errc.h"
92#include "llvm/Support/Error.h"
97#include "llvm/Transforms/IPO.h"
102#include <algorithm>
103#include <cmath>
104#include <cstddef>
105#include <map>
106#include <set>
107#include <string>
108
109using namespace llvm;
110using namespace wholeprogramdevirt;
111
112#define DEBUG_TYPE "wholeprogramdevirt"
113
114STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
115STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
116STATISTIC(NumBranchFunnel, "Number of branch funnels");
117STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
118STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
119STATISTIC(NumVirtConstProp1Bit,
120 "Number of 1 bit virtual constant propagations");
121STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
122
123namespace llvm {
124
126 "wholeprogramdevirt-summary-action",
127 cl::desc("What to do with the summary when running this pass"),
128 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
130 "Import typeid resolutions from summary and globals"),
132 "Export typeid resolutions to summary and globals")),
133 cl::Hidden);
134
136 "wholeprogramdevirt-read-summary",
137 cl::desc(
138 "Read summary from given bitcode or YAML file before running pass"),
139 cl::Hidden);
140
142 "wholeprogramdevirt-write-summary",
143 cl::desc("Write summary to given bitcode or YAML file after running pass. "
144 "Output file format is deduced from extension: *.bc means writing "
145 "bitcode, otherwise YAML"),
146 cl::Hidden);
147
149 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
150 cl::init(10),
151 cl::desc("Maximum number of call targets per "
152 "call site to enable branch funnels"));
153
154static cl::opt<bool>
155 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
156 cl::desc("Print index-based devirtualization messages"));
157
158/// Provide a way to force enable whole program visibility in tests.
159/// This is needed to support legacy tests that don't contain
160/// !vcall_visibility metadata (the mere presense of type tests
161/// previously implied hidden visibility).
162static cl::opt<bool>
163 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
164 cl::desc("Enable whole program visibility"));
165
166/// Provide a way to force disable whole program for debugging or workarounds,
167/// when enabled via the linker.
169 "disable-whole-program-visibility", cl::Hidden,
170 cl::desc("Disable whole program visibility (overrides enabling options)"));
171
172/// Provide way to prevent certain function from being devirtualized
174 SkipFunctionNames("wholeprogramdevirt-skip",
175 cl::desc("Prevent function(s) from being devirtualized"),
177
179
180} // end namespace llvm
181
182/// With Clang, a pure virtual class's deleting destructor is emitted as a
183/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
184/// context of whole program devirtualization, the deleting destructor of a pure
185/// virtual class won't be invoked by the source code so safe to skip as a
186/// devirtualize target.
187///
188/// However, not all unreachable functions are safe to skip. In some cases, the
189/// program intends to run such functions and terminate, for instance, a unit
190/// test may run a death test. A non-test program might (or allowed to) invoke
191/// such functions to report failures (whether/when it's a good practice or not
192/// is a different topic).
193///
194/// This option is enabled to keep an unreachable function as a possible
195/// devirtualize target to conservatively keep the program behavior.
196///
197/// TODO: Make a pure virtual class's deleting destructor precisely identifiable
198/// in Clang's codegen for more devirtualization in LLVM.
200 "wholeprogramdevirt-keep-unreachable-function",
201 cl::desc("Regard unreachable functions as possible devirtualize targets."),
202 cl::Hidden, cl::init(true));
203
204/// If explicitly specified, the devirt module pass will stop transformation
205/// once the total number of devirtualizations reach the cutoff value. Setting
206/// this option to 0 explicitly will do 0 devirtualization.
208 "wholeprogramdevirt-cutoff",
209 cl::desc("Max number of devirtualizations for devirt module pass"),
210 cl::init(0));
211
212/// Mechanism to add runtime checking of devirtualization decisions, optionally
213/// trapping or falling back to indirect call on any that are not correct.
214/// Trapping mode is useful for debugging undefined behavior leading to failures
215/// with WPD. Fallback mode is useful for ensuring safety when whole program
216/// visibility may be compromised.
219 "wholeprogramdevirt-check", cl::Hidden,
220 cl::desc("Type of checking for incorrect devirtualizations"),
221 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
222 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
224 "Fallback to indirect when incorrect")));
225
226namespace {
227struct PatternList {
228 std::vector<GlobPattern> Patterns;
229 template <class T> void init(const T &StringList) {
230 for (const auto &S : StringList)
232 Patterns.push_back(std::move(*Pat));
233 }
234 bool match(StringRef S) {
235 for (const GlobPattern &P : Patterns)
236 if (P.match(S))
237 return true;
238 return false;
239 }
240};
241} // namespace
242
243// Find the minimum offset that we may store a value of size Size bits at. If
244// IsAfter is set, look for an offset before the object, otherwise look for an
245// offset after the object.
248 bool IsAfter, uint64_t Size) {
249 // Find a minimum offset taking into account only vtable sizes.
250 uint64_t MinByte = 0;
251 for (const VirtualCallTarget &Target : Targets) {
252 if (IsAfter)
253 MinByte = std::max(MinByte, Target.minAfterBytes());
254 else
255 MinByte = std::max(MinByte, Target.minBeforeBytes());
256 }
257
258 // Build a vector of arrays of bytes covering, for each target, a slice of the
259 // used region (see AccumBitVector::BytesUsed in
260 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
261 // this aligns the used regions to start at MinByte.
262 //
263 // In this example, A, B and C are vtables, # is a byte already allocated for
264 // a virtual function pointer, AAAA... (etc.) are the used regions for the
265 // vtables and Offset(X) is the value computed for the Offset variable below
266 // for X.
267 //
268 // Offset(A)
269 // | |
270 // |MinByte
271 // A: ################AAAAAAAA|AAAAAAAA
272 // B: ########BBBBBBBBBBBBBBBB|BBBB
273 // C: ########################|CCCCCCCCCCCCCCCC
274 // | Offset(B) |
275 //
276 // This code produces the slices of A, B and C that appear after the divider
277 // at MinByte.
278 std::vector<ArrayRef<uint8_t>> Used;
279 for (const VirtualCallTarget &Target : Targets) {
280 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
281 : Target.TM->Bits->Before.BytesUsed;
282 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
283 : MinByte - Target.minBeforeBytes();
284
285 // Disregard used regions that are smaller than Offset. These are
286 // effectively all-free regions that do not need to be checked.
287 if (VTUsed.size() > Offset)
288 Used.push_back(VTUsed.slice(Offset));
289 }
290
291 if (Size == 1) {
292 // Find a free bit in each member of Used.
293 for (unsigned I = 0;; ++I) {
294 uint8_t BitsUsed = 0;
295 for (auto &&B : Used)
296 if (I < B.size())
297 BitsUsed |= B[I];
298 if (BitsUsed != 0xff)
299 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
300 }
301 } else {
302 // Find a free (Size/8) byte region in each member of Used.
303 // FIXME: see if alignment helps.
304 for (unsigned I = 0;; ++I) {
305 for (auto &&B : Used) {
306 unsigned Byte = 0;
307 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
308 if (B[I + Byte])
309 goto NextI;
310 ++Byte;
311 }
312 }
313 // Rounding up ensures the constant is always stored at address we
314 // can directly load from without misalignment.
315 return alignTo((MinByte + I) * 8, Size);
316 NextI:;
317 }
318 }
319}
320
322 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
323 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
324 if (BitWidth == 1)
325 OffsetByte = -(AllocBefore / 8 + 1);
326 else
327 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
328 OffsetBit = AllocBefore % 8;
329
330 for (VirtualCallTarget &Target : Targets) {
331 if (BitWidth == 1)
332 Target.setBeforeBit(AllocBefore);
333 else
334 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
335 }
336}
337
340 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
341 if (BitWidth == 1)
342 OffsetByte = AllocAfter / 8;
343 else
344 OffsetByte = (AllocAfter + 7) / 8;
345 OffsetBit = AllocAfter % 8;
346
347 for (VirtualCallTarget &Target : Targets) {
348 if (BitWidth == 1)
349 Target.setAfterBit(AllocAfter);
350 else
351 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
352 }
353}
354
359
360namespace {
361
362// Tracks the number of devirted calls in the IR transformation.
363static unsigned NumDevirtCalls = 0;
364
365// A slot in a set of virtual tables. The TypeID identifies the set of virtual
366// tables, and the ByteOffset is the offset in bytes from the address point to
367// the virtual function pointer.
368struct VTableSlot {
369 Metadata *TypeID;
370 uint64_t ByteOffset;
371};
372
373} // end anonymous namespace
374
375namespace llvm {
376
377template <> struct DenseMapInfo<VTableSlot> {
386 static unsigned getHashValue(const VTableSlot &I) {
389 }
390 static bool isEqual(const VTableSlot &LHS,
391 const VTableSlot &RHS) {
392 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
393 }
394};
395
396template <> struct DenseMapInfo<VTableSlotSummary> {
405 static unsigned getHashValue(const VTableSlotSummary &I) {
408 }
409 static bool isEqual(const VTableSlotSummary &LHS,
410 const VTableSlotSummary &RHS) {
411 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
412 }
413};
414
415} // end namespace llvm
416
417// Returns true if the function must be unreachable based on ValueInfo.
418//
419// In particular, identifies a function as unreachable in the following
420// conditions
421// 1) All summaries are live.
422// 2) All function summaries indicate it's unreachable
423// 3) There is no non-function with the same GUID (which is rare)
426 return false;
427
428 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
429 // Returns false if ValueInfo is absent, or the summary list is empty
430 // (e.g., function declarations).
431 return false;
432 }
433
434 for (const auto &Summary : TheFnVI.getSummaryList()) {
435 // Conservatively returns false if any non-live functions are seen.
436 // In general either all summaries should be live or all should be dead.
437 if (!Summary->isLive())
438 return false;
439 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
440 if (!FS->fflags().MustBeUnreachable)
441 return false;
442 }
443 // Be conservative if a non-function has the same GUID (which is rare).
444 else
445 return false;
446 }
447 // All function summaries are live and all of them agree that the function is
448 // unreachble.
449 return true;
450}
451
452namespace {
453// A virtual call site. VTable is the loaded virtual table pointer, and CS is
454// the indirect virtual call.
455struct VirtualCallSite {
456 Value *VTable = nullptr;
457 CallBase &CB;
458
459 // If non-null, this field points to the associated unsafe use count stored in
460 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
461 // of that field for details.
462 unsigned *NumUnsafeUses = nullptr;
463
464 void
465 emitRemark(const StringRef OptName, const StringRef TargetName,
466 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter) {
467 Function *F = CB.getCaller();
468 DebugLoc DLoc = CB.getDebugLoc();
469 BasicBlock *Block = CB.getParent();
470
471 using namespace ore;
472 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
473 << NV("Optimization", OptName)
474 << ": devirtualized a call to "
475 << NV("FunctionName", TargetName));
476 }
477
478 void replaceAndErase(
479 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
480 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter,
481 Value *New) {
482 if (RemarksEnabled)
483 emitRemark(OptName, TargetName, OREGetter);
484 CB.replaceAllUsesWith(New);
485 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
486 BranchInst::Create(II->getNormalDest(), CB.getIterator());
487 II->getUnwindDest()->removePredecessor(II->getParent());
488 }
489 CB.eraseFromParent();
490 // This use is no longer unsafe.
491 if (NumUnsafeUses)
492 --*NumUnsafeUses;
493 }
494};
495
496// Call site information collected for a specific VTableSlot and possibly a list
497// of constant integer arguments. The grouping by arguments is handled by the
498// VTableSlotInfo class.
499struct CallSiteInfo {
500 /// The set of call sites for this slot. Used during regular LTO and the
501 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
502 /// call sites that appear in the merged module itself); in each of these
503 /// cases we are directly operating on the call sites at the IR level.
504 std::vector<VirtualCallSite> CallSites;
505
506 /// Whether all call sites represented by this CallSiteInfo, including those
507 /// in summaries, have been devirtualized. This starts off as true because a
508 /// default constructed CallSiteInfo represents no call sites.
509 ///
510 /// If at the end of the pass there are still undevirtualized calls, we will
511 /// need to add a use of llvm.type.test to each of the function summaries in
512 /// the vector.
513 bool AllCallSitesDevirted = true;
514
515 // These fields are used during the export phase of ThinLTO and reflect
516 // information collected from function summaries.
517
518 /// CFI-specific: a vector containing the list of function summaries that use
519 /// the llvm.type.checked.load intrinsic and therefore will require
520 /// resolutions for llvm.type.test in order to implement CFI checks if
521 /// devirtualization was unsuccessful.
522 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
523
524 /// A vector containing the list of function summaries that use
525 /// assume(llvm.type.test).
526 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
527
528 bool isExported() const {
529 return !SummaryTypeCheckedLoadUsers.empty() ||
530 !SummaryTypeTestAssumeUsers.empty();
531 }
532
533 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
534 SummaryTypeCheckedLoadUsers.push_back(FS);
535 AllCallSitesDevirted = false;
536 }
537
538 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
539 SummaryTypeTestAssumeUsers.push_back(FS);
540 AllCallSitesDevirted = false;
541 }
542
543 void markDevirt() { AllCallSitesDevirted = true; }
544};
545
546// Call site information collected for a specific VTableSlot.
547struct VTableSlotInfo {
548 // The set of call sites which do not have all constant integer arguments
549 // (excluding "this").
550 CallSiteInfo CSInfo;
551
552 // The set of call sites with all constant integer arguments (excluding
553 // "this"), grouped by argument list.
554 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
555
556 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
557
558private:
559 CallSiteInfo &findCallSiteInfo(CallBase &CB);
560};
561
562CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
563 std::vector<uint64_t> Args;
564 auto *CBType = dyn_cast<IntegerType>(CB.getType());
565 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
566 return CSInfo;
567 for (auto &&Arg : drop_begin(CB.args())) {
568 auto *CI = dyn_cast<ConstantInt>(Arg);
569 if (!CI || CI->getBitWidth() > 64)
570 return CSInfo;
571 Args.push_back(CI->getZExtValue());
572 }
573 return ConstCSInfo[Args];
574}
575
576void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
577 unsigned *NumUnsafeUses) {
578 auto &CSI = findCallSiteInfo(CB);
579 CSI.AllCallSitesDevirted = false;
580 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
581}
582
583struct DevirtModule {
584 Module &M;
587
588 ModuleSummaryIndex *const ExportSummary;
589 const ModuleSummaryIndex *const ImportSummary;
590
591 IntegerType *const Int8Ty;
592 PointerType *const Int8PtrTy;
593 IntegerType *const Int32Ty;
594 IntegerType *const Int64Ty;
595 IntegerType *const IntPtrTy;
596 /// Sizeless array type, used for imported vtables. This provides a signal
597 /// to analyzers that these imports may alias, as they do for example
598 /// when multiple unique return values occur in the same vtable.
599 ArrayType *const Int8Arr0Ty;
600
601 const bool RemarksEnabled;
602 std::function<OptimizationRemarkEmitter &(Function &)> OREGetter;
603 MapVector<VTableSlot, VTableSlotInfo> CallSlots;
604
605 // Calls that have already been optimized. We may add a call to multiple
606 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
607 // optimize a call more than once.
608 SmallPtrSet<CallBase *, 8> OptimizedCalls;
609
610 // Store calls that had their ptrauth bundle removed. They are to be deleted
611 // at the end of the optimization.
612 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
613
614 // This map keeps track of the number of "unsafe" uses of a loaded function
615 // pointer. The key is the associated llvm.type.test intrinsic call generated
616 // by this pass. An unsafe use is one that calls the loaded function pointer
617 // directly. Every time we eliminate an unsafe use (for example, by
618 // devirtualizing it or by applying virtual constant propagation), we
619 // decrement the value stored in this map. If a value reaches zero, we can
620 // eliminate the type check by RAUWing the associated llvm.type.test call with
621 // true.
622 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
623 PatternList FunctionsToSkip;
624
625 DevirtModule(Module &M, ModuleAnalysisManager &MAM,
626 ModuleSummaryIndex *ExportSummary,
627 const ModuleSummaryIndex *ImportSummary)
628 : M(M), MAM(MAM),
629 FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
630 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
631 Int8Ty(Type::getInt8Ty(M.getContext())),
632 Int8PtrTy(PointerType::getUnqual(M.getContext())),
633 Int32Ty(Type::getInt32Ty(M.getContext())),
634 Int64Ty(Type::getInt64Ty(M.getContext())),
635 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
636 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
637 RemarksEnabled(areRemarksEnabled()),
638 OREGetter([&](Function &F) -> OptimizationRemarkEmitter & {
639 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
640 }) {
641 assert(!(ExportSummary && ImportSummary));
642 FunctionsToSkip.init(SkipFunctionNames);
643 }
644
645 bool areRemarksEnabled();
646
647 void
648 scanTypeTestUsers(Function *TypeTestFunc,
649 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
650 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
651
652 void buildTypeIdentifierMap(
653 std::vector<VTableBits> &Bits,
654 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
655
656 bool
657 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
658 const std::set<TypeMemberInfo> &TypeMemberInfos,
659 uint64_t ByteOffset,
660 ModuleSummaryIndex *ExportSummary);
661
662 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
663 bool &IsExported);
664 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
666 VTableSlotInfo &SlotInfo,
667 WholeProgramDevirtResolution *Res);
668
669 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Function &JT,
670 bool &IsExported);
671 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
672 VTableSlotInfo &SlotInfo,
673 WholeProgramDevirtResolution *Res, VTableSlot Slot);
674
675 bool tryEvaluateFunctionsWithArgs(
677 ArrayRef<uint64_t> Args);
678
679 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
680 uint64_t TheRetVal);
681 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
682 CallSiteInfo &CSInfo,
683 WholeProgramDevirtResolution::ByArg *Res);
684
685 // Returns the global symbol name that is used to export information about the
686 // given vtable slot and list of arguments.
687 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
688 StringRef Name);
689
690 bool shouldExportConstantsAsAbsoluteSymbols();
691
692 // This function is called during the export phase to create a symbol
693 // definition containing information about the given vtable slot and list of
694 // arguments.
695 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
696 Constant *C);
697 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
698 uint32_t Const, uint32_t &Storage);
699
700 // This function is called during the import phase to create a reference to
701 // the symbol definition created during the export phase.
702 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
703 StringRef Name);
704 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
705 StringRef Name, IntegerType *IntTy,
706 uint32_t Storage);
707
708 Constant *getMemberAddr(const TypeMemberInfo *M);
709
710 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
711 Constant *UniqueMemberAddr);
712 bool tryUniqueRetValOpt(unsigned BitWidth,
714 CallSiteInfo &CSInfo,
715 WholeProgramDevirtResolution::ByArg *Res,
716 VTableSlot Slot, ArrayRef<uint64_t> Args);
717
718 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
719 Constant *Byte, Constant *Bit);
720 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
721 VTableSlotInfo &SlotInfo,
722 WholeProgramDevirtResolution *Res, VTableSlot Slot);
723
724 void rebuildGlobal(VTableBits &B);
725
726 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
727 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
728
729 // If we were able to eliminate all unsafe uses for a type checked load,
730 // eliminate the associated type tests by replacing them with true.
731 void removeRedundantTypeTests();
732
733 bool run();
734
735 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
736 //
737 // Caller guarantees that `ExportSummary` is not nullptr.
738 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
739 ModuleSummaryIndex *ExportSummary);
740
741 // Returns true if the function definition must be unreachable.
742 //
743 // Note if this helper function returns true, `F` is guaranteed
744 // to be unreachable; if it returns false, `F` might still
745 // be unreachable but not covered by this helper function.
746 //
747 // Implementation-wise, if function definition is present, IR is analyzed; if
748 // not, look up function flags from ExportSummary as a fallback.
749 static bool mustBeUnreachableFunction(Function *const F,
750 ModuleSummaryIndex *ExportSummary);
751
752 // Lower the module using the action and summary passed as command line
753 // arguments. For testing purposes only.
754 static bool runForTesting(Module &M, ModuleAnalysisManager &MAM);
755};
756
757struct DevirtIndex {
758 ModuleSummaryIndex &ExportSummary;
759 // The set in which to record GUIDs exported from their module by
760 // devirtualization, used by client to ensure they are not internalized.
761 std::set<GlobalValue::GUID> &ExportedGUIDs;
762 // A map in which to record the information necessary to locate the WPD
763 // resolution for local targets in case they are exported by cross module
764 // importing.
765 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
766
767 MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
768
769 PatternList FunctionsToSkip;
770
771 DevirtIndex(
772 ModuleSummaryIndex &ExportSummary,
773 std::set<GlobalValue::GUID> &ExportedGUIDs,
774 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
775 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
776 LocalWPDTargetsMap(LocalWPDTargetsMap) {
777 FunctionsToSkip.init(SkipFunctionNames);
778 }
779
780 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
781 const TypeIdCompatibleVtableInfo TIdInfo,
782 uint64_t ByteOffset);
783
784 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
785 VTableSlotSummary &SlotSummary,
786 VTableSlotInfo &SlotInfo,
787 WholeProgramDevirtResolution *Res,
788 std::set<ValueInfo> &DevirtTargets);
789
790 void run();
791};
792} // end anonymous namespace
793
796 if (UseCommandLine) {
797 if (!DevirtModule::runForTesting(M, MAM))
798 return PreservedAnalyses::all();
800 }
801 if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run())
802 return PreservedAnalyses::all();
804}
805
806// Enable whole program visibility if enabled by client (e.g. linker) or
807// internal option, and not force disabled.
808bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
809 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
811}
812
813static bool
815 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
816 // TypeID for member function pointer type is an internal construct
817 // and won't exist in IsVisibleToRegularObj. The full TypeID
818 // will be present and participate in invalidation.
819 if (TypeID.ends_with(".virtual"))
820 return false;
821
822 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
823 // non-externally visible types which cannot interact with
824 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
825 if (!TypeID.consume_front("_ZTS"))
826 return false;
827
828 // TypeID is keyed off the type name symbol (_ZTS). However, the native
829 // object may not contain this symbol if it does not contain a key
830 // function for the base type and thus only contains a reference to the
831 // type info (_ZTI). To catch this case we query using the type info
832 // symbol corresponding to the TypeID.
833 std::string TypeInfo = ("_ZTI" + TypeID).str();
834 return IsVisibleToRegularObj(TypeInfo);
835}
836
837static bool
839 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
841 GV.getMetadata(LLVMContext::MD_type, Types);
842
843 for (auto *Type : Types)
844 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
845 return typeIDVisibleToRegularObj(TypeID->getString(),
846 IsVisibleToRegularObj);
847
848 return false;
849}
850
851/// If whole program visibility asserted, then upgrade all public vcall
852/// visibility metadata on vtable definitions to linkage unit visibility in
853/// Module IR (for regular or hybrid LTO).
855 Module &M, bool WholeProgramVisibilityEnabledInLTO,
856 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
857 bool ValidateAllVtablesHaveTypeInfos,
858 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
859 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
860 return;
861 for (GlobalVariable &GV : M.globals()) {
862 // Add linkage unit visibility to any variable with type metadata, which are
863 // the vtable definitions. We won't have an existing vcall_visibility
864 // metadata on vtable definitions with public visibility.
865 if (GV.hasMetadata(LLVMContext::MD_type) &&
867 // Don't upgrade the visibility for symbols exported to the dynamic
868 // linker, as we have no information on their eventual use.
869 !DynamicExportSymbols.count(GV.getGUID()) &&
870 // With validation enabled, we want to exclude symbols visible to
871 // regular objects. Local symbols will be in this group due to the
872 // current implementation but those with VCallVisibilityTranslationUnit
873 // will have already been marked in clang so are unaffected.
874 !(ValidateAllVtablesHaveTypeInfos &&
875 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
877 }
878}
879
881 bool WholeProgramVisibilityEnabledInLTO) {
882 llvm::TimeTraceScope timeScope("Update public type test calls");
883 Function *PublicTypeTestFunc =
884 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
885 if (!PublicTypeTestFunc)
886 return;
887 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
888 Function *TypeTestFunc =
889 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
890 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
891 auto *CI = cast<CallInst>(U.getUser());
892 auto *NewCI = CallInst::Create(
893 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, {}, "",
894 CI->getIterator());
895 CI->replaceAllUsesWith(NewCI);
896 CI->eraseFromParent();
897 }
898 } else {
899 auto *True = ConstantInt::getTrue(M.getContext());
900 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
901 auto *CI = cast<CallInst>(U.getUser());
902 CI->replaceAllUsesWith(True);
903 CI->eraseFromParent();
904 }
905 }
906}
907
908/// Based on typeID string, get all associated vtable GUIDS that are
909/// visible to regular objects.
911 ModuleSummaryIndex &Index,
912 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
913 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
914 for (const auto &TypeID : Index.typeIdCompatibleVtableMap()) {
915 if (typeIDVisibleToRegularObj(TypeID.first, IsVisibleToRegularObj))
916 for (const TypeIdOffsetVtableInfo &P : TypeID.second)
917 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
918 }
919}
920
921/// If whole program visibility asserted, then upgrade all public vcall
922/// visibility metadata on vtable definition summaries to linkage unit
923/// visibility in Module summary index (for ThinLTO).
925 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
926 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
927 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
928 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
929 return;
930 for (auto &P : Index) {
931 // Don't upgrade the visibility for symbols exported to the dynamic
932 // linker, as we have no information on their eventual use.
933 if (DynamicExportSymbols.count(P.first))
934 continue;
935 for (auto &S : P.second.SummaryList) {
936 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
937 if (!GVar ||
938 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
939 continue;
940 // With validation enabled, we want to exclude symbols visible to regular
941 // objects. Local symbols will be in this group due to the current
942 // implementation but those with VCallVisibilityTranslationUnit will have
943 // already been marked in clang so are unaffected.
944 if (VisibleToRegularObjSymbols.count(P.first))
945 continue;
946 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
947 }
948 }
949}
950
952 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
953 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
954 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
955}
956
958 ModuleSummaryIndex &Summary,
959 function_ref<bool(StringRef, ValueInfo)> IsExported,
960 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
961 for (auto &T : LocalWPDTargetsMap) {
962 auto &VI = T.first;
963 // This was enforced earlier during trySingleImplDevirt.
964 assert(VI.getSummaryList().size() == 1 &&
965 "Devirt of local target has more than one copy");
966 auto &S = VI.getSummaryList()[0];
967 if (!IsExported(S->modulePath(), VI))
968 continue;
969
970 // It's been exported by a cross module import.
971 for (auto &SlotSummary : T.second) {
972 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
973 assert(TIdSum);
974 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
975 assert(WPDRes != TIdSum->WPDRes.end());
976 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
977 WPDRes->second.SingleImplName,
978 Summary.getModuleHash(S->modulePath()));
979 }
980 }
981}
982
984 // Check that summary index contains regular LTO module when performing
985 // export to prevent occasional use of index from pure ThinLTO compilation
986 // (-fno-split-lto-module). This kind of summary index is passed to
987 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
988 const auto &ModPaths = Summary->modulePaths();
991 return createStringError(
993 "combined summary should contain Regular LTO module");
994 return ErrorSuccess();
995}
996
997bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) {
998 std::unique_ptr<ModuleSummaryIndex> Summary =
999 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
1000
1001 // Handle the command-line summary arguments. This code is for testing
1002 // purposes only, so we handle errors directly.
1003 if (!ClReadSummary.empty()) {
1004 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
1005 ": ");
1006 auto ReadSummaryFile =
1008 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
1009 getModuleSummaryIndex(*ReadSummaryFile)) {
1010 Summary = std::move(*SummaryOrErr);
1011 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
1012 } else {
1013 // Try YAML if we've failed with bitcode.
1014 consumeError(SummaryOrErr.takeError());
1015 yaml::Input In(ReadSummaryFile->getBuffer());
1016 In >> *Summary;
1017 ExitOnErr(errorCodeToError(In.error()));
1018 }
1019 }
1020
1021 bool Changed =
1022 DevirtModule(M, MAM,
1023 ClSummaryAction == PassSummaryAction::Export ? Summary.get()
1024 : nullptr,
1025 ClSummaryAction == PassSummaryAction::Import ? Summary.get()
1026 : nullptr)
1027 .run();
1028
1029 if (!ClWriteSummary.empty()) {
1030 ExitOnError ExitOnErr(
1031 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1032 std::error_code EC;
1033 if (StringRef(ClWriteSummary).ends_with(".bc")) {
1034 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
1035 ExitOnErr(errorCodeToError(EC));
1036 writeIndexToFile(*Summary, OS);
1037 } else {
1038 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1039 ExitOnErr(errorCodeToError(EC));
1040 yaml::Output Out(OS);
1041 Out << *Summary;
1042 }
1043 }
1044
1045 return Changed;
1046}
1047
1048void DevirtModule::buildTypeIdentifierMap(
1049 std::vector<VTableBits> &Bits,
1050 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1051 DenseMap<GlobalVariable *, VTableBits *> GVToBits;
1052 Bits.reserve(M.global_size());
1054 for (GlobalVariable &GV : M.globals()) {
1055 Types.clear();
1056 GV.getMetadata(LLVMContext::MD_type, Types);
1057 if (GV.isDeclaration() || Types.empty())
1058 continue;
1059
1060 VTableBits *&BitsPtr = GVToBits[&GV];
1061 if (!BitsPtr) {
1062 Bits.emplace_back();
1063 Bits.back().GV = &GV;
1064 Bits.back().ObjectSize =
1065 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1066 BitsPtr = &Bits.back();
1067 }
1068
1069 for (MDNode *Type : Types) {
1070 auto *TypeID = Type->getOperand(1).get();
1071
1072 uint64_t Offset =
1074 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1075 ->getZExtValue();
1076
1077 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1078 }
1079 }
1080}
1081
1082bool DevirtModule::tryFindVirtualCallTargets(
1083 std::vector<VirtualCallTarget> &TargetsForSlot,
1084 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1085 ModuleSummaryIndex *ExportSummary) {
1086 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1087 if (!TM.Bits->GV->isConstant())
1088 return false;
1089
1090 // We cannot perform whole program devirtualization analysis on a vtable
1091 // with public LTO visibility.
1092 if (TM.Bits->GV->getVCallVisibility() ==
1094 return false;
1095
1096 Function *Fn = nullptr;
1097 Constant *C = nullptr;
1098 std::tie(Fn, C) =
1099 getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);
1100
1101 if (!Fn)
1102 return false;
1103
1104 if (FunctionsToSkip.match(Fn->getName()))
1105 return false;
1106
1107 // We can disregard __cxa_pure_virtual as a possible call target, as
1108 // calls to pure virtuals are UB.
1109 if (Fn->getName() == "__cxa_pure_virtual")
1110 continue;
1111
1112 // We can disregard unreachable functions as possible call targets, as
1113 // unreachable functions shouldn't be called.
1114 if (mustBeUnreachableFunction(Fn, ExportSummary))
1115 continue;
1116
1117 // Save the symbol used in the vtable to use as the devirtualization
1118 // target.
1119 auto *GV = dyn_cast<GlobalValue>(C);
1120 assert(GV);
1121 TargetsForSlot.push_back({GV, &TM});
1122 }
1123
1124 // Give up if we couldn't find any targets.
1125 return !TargetsForSlot.empty();
1126}
1127
1128bool DevirtIndex::tryFindVirtualCallTargets(
1129 std::vector<ValueInfo> &TargetsForSlot,
1130 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1131 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1132 // Find a representative copy of the vtable initializer.
1133 // We can have multiple available_externally, linkonce_odr and weak_odr
1134 // vtable initializers. We can also have multiple external vtable
1135 // initializers in the case of comdats, which we cannot check here.
1136 // The linker should give an error in this case.
1137 //
1138 // Also, handle the case of same-named local Vtables with the same path
1139 // and therefore the same GUID. This can happen if there isn't enough
1140 // distinguishing path when compiling the source file. In that case we
1141 // conservatively return false early.
1142 const GlobalVarSummary *VS = nullptr;
1143 bool LocalFound = false;
1144 for (const auto &S : P.VTableVI.getSummaryList()) {
1145 if (GlobalValue::isLocalLinkage(S->linkage())) {
1146 if (LocalFound)
1147 return false;
1148 LocalFound = true;
1149 }
1150 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1151 if (!CurVS->vTableFuncs().empty() ||
1152 // Previously clang did not attach the necessary type metadata to
1153 // available_externally vtables, in which case there would not
1154 // be any vtable functions listed in the summary and we need
1155 // to treat this case conservatively (in case the bitcode is old).
1156 // However, we will also not have any vtable functions in the
1157 // case of a pure virtual base class. In that case we do want
1158 // to set VS to avoid treating it conservatively.
1160 VS = CurVS;
1161 // We cannot perform whole program devirtualization analysis on a vtable
1162 // with public LTO visibility.
1163 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1164 return false;
1165 }
1166 }
1167 // There will be no VS if all copies are available_externally having no
1168 // type metadata. In that case we can't safely perform WPD.
1169 if (!VS)
1170 return false;
1171 if (!VS->isLive())
1172 continue;
1173 for (auto VTP : VS->vTableFuncs()) {
1174 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1175 continue;
1176
1177 if (mustBeUnreachableFunction(VTP.FuncVI))
1178 continue;
1179
1180 TargetsForSlot.push_back(VTP.FuncVI);
1181 }
1182 }
1183
1184 // Give up if we couldn't find any targets.
1185 return !TargetsForSlot.empty();
1186}
1187
1188void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1189 Constant *TheFn, bool &IsExported) {
1190 // Don't devirtualize function if we're told to skip it
1191 // in -wholeprogramdevirt-skip.
1192 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1193 return;
1194 auto Apply = [&](CallSiteInfo &CSInfo) {
1195 for (auto &&VCallSite : CSInfo.CallSites) {
1196 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1197 continue;
1198
1199 // Stop when the number of devirted calls reaches the cutoff.
1200 if (WholeProgramDevirtCutoff.getNumOccurrences() > 0 &&
1201 NumDevirtCalls >= WholeProgramDevirtCutoff)
1202 return;
1203
1204 if (RemarksEnabled)
1205 VCallSite.emitRemark("single-impl",
1206 TheFn->stripPointerCasts()->getName(), OREGetter);
1207 NumSingleImpl++;
1208 NumDevirtCalls++;
1209 auto &CB = VCallSite.CB;
1210 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1211 IRBuilder<> Builder(&CB);
1212 Value *Callee =
1213 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1214
1215 // If trap checking is enabled, add support to compare the virtual
1216 // function pointer to the devirtualized target. In case of a mismatch,
1217 // perform a debug trap.
1219 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1221 Cond, &CB, /*Unreachable=*/false,
1222 MDBuilder(M.getContext()).createUnlikelyBranchWeights());
1223 Builder.SetInsertPoint(ThenTerm);
1224 Function *TrapFn =
1225 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::debugtrap);
1226 auto *CallTrap = Builder.CreateCall(TrapFn);
1227 CallTrap->setDebugLoc(CB.getDebugLoc());
1228 }
1229
1230 // If fallback checking is enabled, add support to compare the virtual
1231 // function pointer to the devirtualized target. In case of a mismatch,
1232 // fall back to indirect call.
1234 MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
1235 // Version the indirect call site. If the called value is equal to the
1236 // given callee, 'NewInst' will be executed, otherwise the original call
1237 // site will be executed.
1238 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1239 NewInst.setCalledOperand(Callee);
1240 // Since the new call site is direct, we must clear metadata that
1241 // is only appropriate for indirect calls. This includes !prof and
1242 // !callees metadata.
1243 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1244 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1245 // Additionally, we should remove them from the fallback indirect call,
1246 // so that we don't attempt to perform indirect call promotion later.
1247 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1248 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1249 }
1250
1251 // In either trapping or non-checking mode, devirtualize original call.
1252 else {
1253 // Devirtualize unconditionally.
1254 CB.setCalledOperand(Callee);
1255 // Since the call site is now direct, we must clear metadata that
1256 // is only appropriate for indirect calls. This includes !prof and
1257 // !callees metadata.
1258 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1259 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1260 if (CB.getCalledOperand() &&
1262 auto *NewCS = CallBase::removeOperandBundle(
1264 CB.replaceAllUsesWith(NewCS);
1265 // Schedule for deletion at the end of pass run.
1266 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1267 }
1268 }
1269
1270 // This use is no longer unsafe.
1271 if (VCallSite.NumUnsafeUses)
1272 --*VCallSite.NumUnsafeUses;
1273 }
1274 if (CSInfo.isExported())
1275 IsExported = true;
1276 CSInfo.markDevirt();
1277 };
1278 Apply(SlotInfo.CSInfo);
1279 for (auto &P : SlotInfo.ConstCSInfo)
1280 Apply(P.second);
1281}
1282
1283static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1284 // We can't add calls if we haven't seen a definition
1285 if (Callee.getSummaryList().empty())
1286 return false;
1287
1288 // Insert calls into the summary index so that the devirtualized targets
1289 // are eligible for import.
1290 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1291 // to better ensure we have the opportunity to inline them.
1292 bool IsExported = false;
1293 auto &S = Callee.getSummaryList()[0];
1294 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,
1295 /* RelBF = */ 0);
1296 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1297 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1298 FS->addCall({Callee, CI});
1299 IsExported |= S->modulePath() != FS->modulePath();
1300 }
1301 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1302 FS->addCall({Callee, CI});
1303 IsExported |= S->modulePath() != FS->modulePath();
1304 }
1305 };
1306 AddCalls(SlotInfo.CSInfo);
1307 for (auto &P : SlotInfo.ConstCSInfo)
1308 AddCalls(P.second);
1309 return IsExported;
1310}
1311
1312bool DevirtModule::trySingleImplDevirt(
1313 ModuleSummaryIndex *ExportSummary,
1314 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1315 WholeProgramDevirtResolution *Res) {
1316 // See if the program contains a single implementation of this virtual
1317 // function.
1318 auto *TheFn = TargetsForSlot[0].Fn;
1319 for (auto &&Target : TargetsForSlot)
1320 if (TheFn != Target.Fn)
1321 return false;
1322
1323 // If so, update each call site to call that implementation directly.
1324 if (RemarksEnabled || AreStatisticsEnabled())
1325 TargetsForSlot[0].WasDevirt = true;
1326
1327 bool IsExported = false;
1328 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1329 if (!IsExported)
1330 return false;
1331
1332 // If the only implementation has local linkage, we must promote to external
1333 // to make it visible to thin LTO objects. We can only get here during the
1334 // ThinLTO export phase.
1335 if (TheFn->hasLocalLinkage()) {
1336 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1337
1338 // Since we are renaming the function, any comdats with the same name must
1339 // also be renamed. This is required when targeting COFF, as the comdat name
1340 // must match one of the names of the symbols in the comdat.
1341 if (Comdat *C = TheFn->getComdat()) {
1342 if (C->getName() == TheFn->getName()) {
1343 Comdat *NewC = M.getOrInsertComdat(NewName);
1344 NewC->setSelectionKind(C->getSelectionKind());
1345 for (GlobalObject &GO : M.global_objects())
1346 if (GO.getComdat() == C)
1347 GO.setComdat(NewC);
1348 }
1349 }
1350
1351 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1352 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1353 TheFn->setName(NewName);
1354 }
1355 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1356 // Any needed promotion of 'TheFn' has already been done during
1357 // LTO unit split, so we can ignore return value of AddCalls.
1358 addCalls(SlotInfo, TheFnVI);
1359
1361 Res->SingleImplName = std::string(TheFn->getName());
1362
1363 return true;
1364}
1365
1366bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1367 VTableSlotSummary &SlotSummary,
1368 VTableSlotInfo &SlotInfo,
1369 WholeProgramDevirtResolution *Res,
1370 std::set<ValueInfo> &DevirtTargets) {
1371 // See if the program contains a single implementation of this virtual
1372 // function.
1373 auto TheFn = TargetsForSlot[0];
1374 for (auto &&Target : TargetsForSlot)
1375 if (TheFn != Target)
1376 return false;
1377
1378 // Don't devirtualize if we don't have target definition.
1379 auto Size = TheFn.getSummaryList().size();
1380 if (!Size)
1381 return false;
1382
1383 // Don't devirtualize function if we're told to skip it
1384 // in -wholeprogramdevirt-skip.
1385 if (FunctionsToSkip.match(TheFn.name()))
1386 return false;
1387
1388 // If the summary list contains multiple summaries where at least one is
1389 // a local, give up, as we won't know which (possibly promoted) name to use.
1390 for (const auto &S : TheFn.getSummaryList())
1391 if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
1392 return false;
1393
1394 // Collect functions devirtualized at least for one call site for stats.
1396 DevirtTargets.insert(TheFn);
1397
1398 auto &S = TheFn.getSummaryList()[0];
1399 bool IsExported = addCalls(SlotInfo, TheFn);
1400 if (IsExported)
1401 ExportedGUIDs.insert(TheFn.getGUID());
1402
1403 // Record in summary for use in devirtualization during the ThinLTO import
1404 // step.
1406 if (GlobalValue::isLocalLinkage(S->linkage())) {
1407 if (IsExported)
1408 // If target is a local function and we are exporting it by
1409 // devirtualizing a call in another module, we need to record the
1410 // promoted name.
1412 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1413 else {
1414 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1415 Res->SingleImplName = std::string(TheFn.name());
1416 }
1417 } else
1418 Res->SingleImplName = std::string(TheFn.name());
1419
1420 // Name will be empty if this thin link driven off of serialized combined
1421 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1422 // legacy LTO API anyway.
1423 assert(!Res->SingleImplName.empty());
1424
1425 return true;
1426}
1427
1428void DevirtModule::tryICallBranchFunnel(
1429 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1430 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1431 Triple T(M.getTargetTriple());
1432 if (T.getArch() != Triple::x86_64)
1433 return;
1434
1435 if (TargetsForSlot.size() > ClThreshold)
1436 return;
1437
1438 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1439 if (!HasNonDevirt)
1440 for (auto &P : SlotInfo.ConstCSInfo)
1441 if (!P.second.AllCallSitesDevirted) {
1442 HasNonDevirt = true;
1443 break;
1444 }
1445
1446 if (!HasNonDevirt)
1447 return;
1448
1449 // If any GV is AvailableExternally, not to generate branch.funnel.
1450 // NOTE: It is to avoid crash in LowerTypeTest.
1451 // If the branch.funnel is generated, because GV.isDeclarationForLinker(),
1452 // in LowerTypeTestsModule::lower(), its GlobalTypeMember would NOT
1453 // be saved in GlobalTypeMembers[&GV]. Then crash happens in
1454 // buildBitSetsFromDisjointSet due to GlobalTypeMembers[&GV] is NULL.
1455 // Even doing experiment to save it in GlobalTypeMembers[&GV] and
1456 // making GlobalTypeMembers[&GV] be not NULL, crash could avoid from
1457 // buildBitSetsFromDisjointSet. But still report_fatal_error in Verifier
1458 // or SelectionDAGBuilder later, because operands linkage type consistency
1459 // check of icall.branch.funnel can not pass.
1460 for (auto &T : TargetsForSlot) {
1461 if (T.TM->Bits->GV->hasAvailableExternallyLinkage())
1462 return;
1463 }
1464
1465 FunctionType *FT =
1466 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1467 Function *JT;
1468 if (isa<MDString>(Slot.TypeID)) {
1469 JT = Function::Create(FT, Function::ExternalLinkage,
1470 M.getDataLayout().getProgramAddressSpace(),
1471 getGlobalName(Slot, {}, "branch_funnel"), &M);
1472 JT->setVisibility(GlobalValue::HiddenVisibility);
1473 } else {
1474 JT = Function::Create(FT, Function::InternalLinkage,
1475 M.getDataLayout().getProgramAddressSpace(),
1476 "branch_funnel", &M);
1477 }
1478 JT->addParamAttr(0, Attribute::Nest);
1479
1480 std::vector<Value *> JTArgs;
1481 JTArgs.push_back(JT->arg_begin());
1482 for (auto &T : TargetsForSlot) {
1483 JTArgs.push_back(getMemberAddr(T.TM));
1484 JTArgs.push_back(T.Fn);
1485 }
1486
1487 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1489 &M, llvm::Intrinsic::icall_branch_funnel, {});
1490
1491 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1492 CI->setTailCallKind(CallInst::TCK_MustTail);
1493 ReturnInst::Create(M.getContext(), nullptr, BB);
1494
1495 bool IsExported = false;
1496 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
1497 if (IsExported)
1499
1500 if (!JT->getEntryCount().has_value()) {
1501 // FIXME: we could pass through thinlto the necessary information.
1503 }
1504}
1505
1506void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1507 Function &JT, bool &IsExported) {
1508 DenseMap<Function *, double> FunctionEntryCounts;
1509 auto Apply = [&](CallSiteInfo &CSInfo) {
1510 if (CSInfo.isExported())
1511 IsExported = true;
1512 if (CSInfo.AllCallSitesDevirted)
1513 return;
1514
1515 std::map<CallBase *, CallBase *> CallBases;
1516 for (auto &&VCallSite : CSInfo.CallSites) {
1517 CallBase &CB = VCallSite.CB;
1518
1519 if (CallBases.find(&CB) != CallBases.end()) {
1520 // When finding devirtualizable calls, it's possible to find the same
1521 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1522 // calls, which can cause duplicate call sites to be recorded in
1523 // [Const]CallSites. If we've already found one of these
1524 // call instances, just ignore it. It will be replaced later.
1525 continue;
1526 }
1527
1528 // Jump tables are only profitable if the retpoline mitigation is enabled.
1529 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1530 if (!FSAttr.isValid() ||
1531 !FSAttr.getValueAsString().contains("+retpoline"))
1532 continue;
1533
1534 NumBranchFunnel++;
1535 if (RemarksEnabled)
1536 VCallSite.emitRemark("branch-funnel", JT.getName(), OREGetter);
1537
1538 // Pass the address of the vtable in the nest register, which is r10 on
1539 // x86_64.
1540 std::vector<Type *> NewArgs;
1541 NewArgs.push_back(Int8PtrTy);
1542 append_range(NewArgs, CB.getFunctionType()->params());
1543 FunctionType *NewFT =
1544 FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs,
1545 CB.getFunctionType()->isVarArg());
1546 IRBuilder<> IRB(&CB);
1547 std::vector<Value *> Args;
1548 Args.push_back(VCallSite.VTable);
1549 llvm::append_range(Args, CB.args());
1550
1551 CallBase *NewCS = nullptr;
1552 if (!JT.isDeclaration() && !ProfcheckDisableMetadataFixes) {
1553 // Accumulate the call frequencies of the original call site, and use
1554 // that as total entry count for the funnel function.
1555 auto &F = *CB.getCaller();
1556 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1557 auto EC = BFI.getBlockFreq(&F.getEntryBlock());
1558 auto CC = F.getEntryCount(/*AllowSynthetic=*/true);
1559 double CallCount = 0.0;
1560 if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) {
1561 double CallFreq =
1562 static_cast<double>(
1563 BFI.getBlockFreq(CB.getParent()).getFrequency()) /
1564 EC.getFrequency();
1565 CallCount = CallFreq * CC->getCount();
1566 }
1567 FunctionEntryCounts[&JT] += CallCount;
1568 }
1569 if (isa<CallInst>(CB))
1570 NewCS = IRB.CreateCall(NewFT, &JT, Args);
1571 else
1572 NewCS =
1573 IRB.CreateInvoke(NewFT, &JT, cast<InvokeInst>(CB).getNormalDest(),
1574 cast<InvokeInst>(CB).getUnwindDest(), Args);
1575 NewCS->setCallingConv(CB.getCallingConv());
1576
1577 AttributeList Attrs = CB.getAttributes();
1578 std::vector<AttributeSet> NewArgAttrs;
1579 NewArgAttrs.push_back(AttributeSet::get(
1580 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1581 M.getContext(), Attribute::Nest)}));
1582 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1583 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1584 NewCS->setAttributes(
1585 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1586 Attrs.getRetAttrs(), NewArgAttrs));
1587
1588 CallBases[&CB] = NewCS;
1589
1590 // This use is no longer unsafe.
1591 if (VCallSite.NumUnsafeUses)
1592 --*VCallSite.NumUnsafeUses;
1593 }
1594 // Don't mark as devirtualized because there may be callers compiled without
1595 // retpoline mitigation, which would mean that they are lowered to
1596 // llvm.type.test and therefore require an llvm.type.test resolution for the
1597 // type identifier.
1598
1599 for (auto &[Old, New] : CallBases) {
1600 Old->replaceAllUsesWith(New);
1601 Old->eraseFromParent();
1602 }
1603 };
1604 Apply(SlotInfo.CSInfo);
1605 for (auto &P : SlotInfo.ConstCSInfo)
1606 Apply(P.second);
1607 for (auto &[F, C] : FunctionEntryCounts) {
1608 assert(!F->getEntryCount(/*AllowSynthetic=*/true) &&
1609 "Unexpected entry count for funnel that was freshly synthesized");
1610 F->setEntryCount(static_cast<uint64_t>(std::round(C)));
1611 }
1612}
1613
1614bool DevirtModule::tryEvaluateFunctionsWithArgs(
1616 ArrayRef<uint64_t> Args) {
1617 // Evaluate each function and store the result in each target's RetVal
1618 // field.
1619 for (VirtualCallTarget &Target : TargetsForSlot) {
1620 // TODO: Skip for now if the vtable symbol was an alias to a function,
1621 // need to evaluate whether it would be correct to analyze the aliasee
1622 // function for this optimization.
1623 auto *Fn = dyn_cast<Function>(Target.Fn);
1624 if (!Fn)
1625 return false;
1626
1627 if (Fn->arg_size() != Args.size() + 1)
1628 return false;
1629
1630 Evaluator Eval(M.getDataLayout(), nullptr);
1632 EvalArgs.push_back(
1634 for (unsigned I = 0; I != Args.size(); ++I) {
1635 auto *ArgTy =
1637 if (!ArgTy)
1638 return false;
1639 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1640 }
1641
1642 Constant *RetVal;
1643 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1644 !isa<ConstantInt>(RetVal))
1645 return false;
1646 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1647 }
1648 return true;
1649}
1650
1651void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1652 uint64_t TheRetVal) {
1653 for (auto Call : CSInfo.CallSites) {
1654 if (!OptimizedCalls.insert(&Call.CB).second)
1655 continue;
1656 NumUniformRetVal++;
1657 Call.replaceAndErase(
1658 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1659 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1660 }
1661 CSInfo.markDevirt();
1662}
1663
1664bool DevirtModule::tryUniformRetValOpt(
1665 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1666 WholeProgramDevirtResolution::ByArg *Res) {
1667 // Uniform return value optimization. If all functions return the same
1668 // constant, replace all calls with that constant.
1669 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1670 for (const VirtualCallTarget &Target : TargetsForSlot)
1671 if (Target.RetVal != TheRetVal)
1672 return false;
1673
1674 if (CSInfo.isExported()) {
1676 Res->Info = TheRetVal;
1677 }
1678
1679 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1680 if (RemarksEnabled || AreStatisticsEnabled())
1681 for (auto &&Target : TargetsForSlot)
1682 Target.WasDevirt = true;
1683 return true;
1684}
1685
1686std::string DevirtModule::getGlobalName(VTableSlot Slot,
1687 ArrayRef<uint64_t> Args,
1688 StringRef Name) {
1689 std::string FullName = "__typeid_";
1690 raw_string_ostream OS(FullName);
1691 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1692 for (uint64_t Arg : Args)
1693 OS << '_' << Arg;
1694 OS << '_' << Name;
1695 return FullName;
1696}
1697
1698bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1699 Triple T(M.getTargetTriple());
1700 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1701}
1702
1703void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1704 StringRef Name, Constant *C) {
1705 GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
1706 getGlobalName(Slot, Args, Name), C, &M);
1708}
1709
1710void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1711 StringRef Name, uint32_t Const,
1712 uint32_t &Storage) {
1713 if (shouldExportConstantsAsAbsoluteSymbols()) {
1714 exportGlobal(
1715 Slot, Args, Name,
1716 ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
1717 return;
1718 }
1719
1720 Storage = Const;
1721}
1722
1723Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1724 StringRef Name) {
1725 GlobalVariable *GV =
1726 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1728 return GV;
1729}
1730
1731Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1732 StringRef Name, IntegerType *IntTy,
1733 uint32_t Storage) {
1734 if (!shouldExportConstantsAsAbsoluteSymbols())
1735 return ConstantInt::get(IntTy, Storage);
1736
1737 Constant *C = importGlobal(Slot, Args, Name);
1738 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1739 C = ConstantExpr::getPtrToInt(C, IntTy);
1740
1741 // We only need to set metadata if the global is newly created, in which
1742 // case it would not have hidden visibility.
1743 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1744 return C;
1745
1746 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1747 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1748 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1749 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1750 MDNode::get(M.getContext(), {MinC, MaxC}));
1751 };
1752 unsigned AbsWidth = IntTy->getBitWidth();
1753 if (AbsWidth == IntPtrTy->getBitWidth())
1754 SetAbsRange(~0ull, ~0ull); // Full set.
1755 else
1756 SetAbsRange(0, 1ull << AbsWidth);
1757 return C;
1758}
1759
1760void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1761 bool IsOne,
1762 Constant *UniqueMemberAddr) {
1763 for (auto &&Call : CSInfo.CallSites) {
1764 if (!OptimizedCalls.insert(&Call.CB).second)
1765 continue;
1766 IRBuilder<> B(&Call.CB);
1767 Value *Cmp =
1768 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1769 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1770 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1771 NumUniqueRetVal++;
1772 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1773 Cmp);
1774 }
1775 CSInfo.markDevirt();
1776}
1777
1778Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1779 return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
1780 ConstantInt::get(Int64Ty, M->Offset));
1781}
1782
1783bool DevirtModule::tryUniqueRetValOpt(
1784 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1785 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1786 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1787 // IsOne controls whether we look for a 0 or a 1.
1788 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1789 const TypeMemberInfo *UniqueMember = nullptr;
1790 for (const VirtualCallTarget &Target : TargetsForSlot) {
1791 if (Target.RetVal == (IsOne ? 1 : 0)) {
1792 if (UniqueMember)
1793 return false;
1794 UniqueMember = Target.TM;
1795 }
1796 }
1797
1798 // We should have found a unique member or bailed out by now. We already
1799 // checked for a uniform return value in tryUniformRetValOpt.
1800 assert(UniqueMember);
1801
1802 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1803 if (CSInfo.isExported()) {
1805 Res->Info = IsOne;
1806
1807 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1808 }
1809
1810 // Replace each call with the comparison.
1811 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1812 UniqueMemberAddr);
1813
1814 // Update devirtualization statistics for targets.
1815 if (RemarksEnabled || AreStatisticsEnabled())
1816 for (auto &&Target : TargetsForSlot)
1817 Target.WasDevirt = true;
1818
1819 return true;
1820 };
1821
1822 if (BitWidth == 1) {
1823 if (tryUniqueRetValOptFor(true))
1824 return true;
1825 if (tryUniqueRetValOptFor(false))
1826 return true;
1827 }
1828 return false;
1829}
1830
1831void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1832 Constant *Byte, Constant *Bit) {
1833 for (auto Call : CSInfo.CallSites) {
1834 if (!OptimizedCalls.insert(&Call.CB).second)
1835 continue;
1836 auto *RetType = cast<IntegerType>(Call.CB.getType());
1837 IRBuilder<> B(&Call.CB);
1838 Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);
1839 if (RetType->getBitWidth() == 1) {
1840 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1841 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1842 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1843 NumVirtConstProp1Bit++;
1844 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1845 OREGetter, IsBitSet);
1846 } else {
1847 Value *Val = B.CreateLoad(RetType, Addr);
1848 NumVirtConstProp++;
1849 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1850 OREGetter, Val);
1851 }
1852 }
1853 CSInfo.markDevirt();
1854}
1855
1856bool DevirtModule::tryVirtualConstProp(
1857 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1858 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1859 // TODO: Skip for now if the vtable symbol was an alias to a function,
1860 // need to evaluate whether it would be correct to analyze the aliasee
1861 // function for this optimization.
1862 auto *Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1863 if (!Fn)
1864 return false;
1865 // This only works if the function returns an integer.
1866 auto *RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1867 if (!RetType)
1868 return false;
1869 unsigned BitWidth = RetType->getBitWidth();
1870
1871 // TODO: Since we can evaluated these constants at compile-time, we can save
1872 // some space by calculating the smallest range of values that all these
1873 // constants can fit in, then only allocate enough space to fit those values.
1874 // At each callsite, we can get the original type by doing a sign/zero
1875 // extension. For example, if we would store an i64, but we can see that all
1876 // the values fit into an i16, then we can store an i16 before/after the
1877 // vtable and at each callsite do a s/zext.
1878 if (BitWidth > 64)
1879 return false;
1880
1881 Align TypeAlignment = M.getDataLayout().getABIIntegerTypeAlignment(BitWidth);
1882
1883 // Make sure that each function is defined, does not access memory, takes at
1884 // least one argument, does not use its first argument (which we assume is
1885 // 'this'), and has the same return type.
1886 //
1887 // Note that we test whether this copy of the function is readnone, rather
1888 // than testing function attributes, which must hold for any copy of the
1889 // function, even a less optimized version substituted at link time. This is
1890 // sound because the virtual constant propagation optimizations effectively
1891 // inline all implementations of the virtual function into each call site,
1892 // rather than using function attributes to perform local optimization.
1893 for (VirtualCallTarget &Target : TargetsForSlot) {
1894 // TODO: Skip for now if the vtable symbol was an alias to a function,
1895 // need to evaluate whether it would be correct to analyze the aliasee
1896 // function for this optimization.
1897 auto *Fn = dyn_cast<Function>(Target.Fn);
1898 if (!Fn)
1899 return false;
1900
1901 if (Fn->isDeclaration() ||
1902 !computeFunctionBodyMemoryAccess(*Fn, FAM.getResult<AAManager>(*Fn))
1904 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1905 Fn->getReturnType() != RetType)
1906 return false;
1907
1908 // This only works if the integer size is at most the alignment of the
1909 // vtable. If the table is underaligned, then we can't guarantee that the
1910 // constant will always be aligned to the integer type alignment. For
1911 // example, if the table is `align 1`, we can never guarantee that an i32
1912 // stored before/after the vtable is 32-bit aligned without changing the
1913 // alignment of the new global.
1914 GlobalVariable *GV = Target.TM->Bits->GV;
1915 Align TableAlignment = M.getDataLayout().getValueOrABITypeAlignment(
1916 GV->getAlign(), GV->getValueType());
1917 if (TypeAlignment > TableAlignment)
1918 return false;
1919 }
1920
1921 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1922 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1923 continue;
1924
1925 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1926 if (Res)
1927 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1928
1929 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1930 continue;
1931
1932 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1933 ResByArg, Slot, CSByConstantArg.first))
1934 continue;
1935
1936 // Find an allocation offset in bits in all vtables associated with the
1937 // type.
1938 // TODO: If there would be "holes" in the vtable that were added by
1939 // padding, we could place i1s there to reduce any extra padding that
1940 // would be introduced by the i1s.
1941 uint64_t AllocBefore =
1942 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1943 uint64_t AllocAfter =
1944 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1945
1946 // Calculate the total amount of padding needed to store a value at both
1947 // ends of the object.
1948 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1949 for (auto &&Target : TargetsForSlot) {
1950 TotalPaddingBefore += std::max<int64_t>(
1951 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1952 TotalPaddingAfter += std::max<int64_t>(
1953 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1954 }
1955
1956 // If the amount of padding is too large, give up.
1957 // FIXME: do something smarter here.
1958 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1959 continue;
1960
1961 // Calculate the offset to the value as a (possibly negative) byte offset
1962 // and (if applicable) a bit offset, and store the values in the targets.
1963 int64_t OffsetByte;
1964 uint64_t OffsetBit;
1965 if (TotalPaddingBefore <= TotalPaddingAfter)
1966 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1967 OffsetBit);
1968 else
1969 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1970 OffsetBit);
1971
1972 // In an earlier check we forbade constant propagation from operating on
1973 // tables whose alignment is less than the alignment needed for loading
1974 // the constant. Thus, the address we take the offset from will always be
1975 // aligned to at least this integer alignment. Now, we need to ensure that
1976 // the offset is also aligned to this integer alignment to ensure we always
1977 // have an aligned load.
1978 assert(OffsetByte % TypeAlignment.value() == 0);
1979
1980 if (RemarksEnabled || AreStatisticsEnabled())
1981 for (auto &&Target : TargetsForSlot)
1982 Target.WasDevirt = true;
1983
1984
1985 if (CSByConstantArg.second.isExported()) {
1987 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
1988 ResByArg->Byte);
1989 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
1990 ResByArg->Bit);
1991 }
1992
1993 // Rewrite each call to a load from OffsetByte/OffsetBit.
1994 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
1995 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
1996 applyVirtualConstProp(CSByConstantArg.second,
1997 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
1998 }
1999 return true;
2000}
2001
2002void DevirtModule::rebuildGlobal(VTableBits &B) {
2003 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
2004 return;
2005
2006 // Align the before byte array to the global's minimum alignment so that we
2007 // don't break any alignment requirements on the global.
2008 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
2009 B.GV->getAlign(), B.GV->getValueType());
2010 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
2011
2012 // Before was stored in reverse order; flip it now.
2013 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
2014 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
2015
2016 // Build an anonymous global containing the before bytes, followed by the
2017 // original initializer, followed by the after bytes.
2018 auto *NewInit = ConstantStruct::getAnon(
2019 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
2020 B.GV->getInitializer(),
2021 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
2022 auto *NewGV =
2023 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
2024 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
2025 NewGV->setSection(B.GV->getSection());
2026 NewGV->setComdat(B.GV->getComdat());
2027 NewGV->setAlignment(B.GV->getAlign());
2028
2029 // Copy the original vtable's metadata to the anonymous global, adjusting
2030 // offsets as required.
2031 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
2032
2033 // Build an alias named after the original global, pointing at the second
2034 // element (the original initializer).
2035 auto *Alias = GlobalAlias::create(
2036 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
2038 NewInit->getType(), NewGV,
2039 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
2040 ConstantInt::get(Int32Ty, 1)}),
2041 &M);
2042 Alias->setVisibility(B.GV->getVisibility());
2043 Alias->takeName(B.GV);
2044
2045 B.GV->replaceAllUsesWith(Alias);
2046 B.GV->eraseFromParent();
2047}
2048
2049bool DevirtModule::areRemarksEnabled() {
2050 const auto &FL = M.getFunctionList();
2051 for (const Function &Fn : FL) {
2052 if (Fn.empty())
2053 continue;
2054 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
2055 return DI.isEnabled();
2056 }
2057 return false;
2058}
2059
2060void DevirtModule::scanTypeTestUsers(
2061 Function *TypeTestFunc,
2062 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
2063 // Find all virtual calls via a virtual table pointer %p under an assumption
2064 // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
2065 // points to a member of the type identifier %md. Group calls by (type ID,
2066 // offset) pair (effectively the identity of the virtual function) and store
2067 // to CallSlots.
2068 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
2069 auto *CI = dyn_cast<CallInst>(U.getUser());
2070 if (!CI)
2071 continue;
2072
2073 // Search for virtual calls based on %p and add them to DevirtCalls.
2076 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2077 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
2078
2079 Metadata *TypeId =
2080 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
2081 // If we found any, add them to CallSlots.
2082 if (!Assumes.empty()) {
2083 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
2084 for (DevirtCallSite Call : DevirtCalls)
2085 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
2086 }
2087
2088 auto RemoveTypeTestAssumes = [&]() {
2089 // We no longer need the assumes or the type test.
2090 for (auto *Assume : Assumes)
2091 Assume->eraseFromParent();
2092 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
2093 // may use the vtable argument later.
2094 if (CI->use_empty())
2095 CI->eraseFromParent();
2096 };
2097
2098 // At this point we could remove all type test assume sequences, as they
2099 // were originally inserted for WPD. However, we can keep these in the
2100 // code stream for later analysis (e.g. to help drive more efficient ICP
2101 // sequences). They will eventually be removed by a second LowerTypeTests
2102 // invocation that cleans them up. In order to do this correctly, the first
2103 // LowerTypeTests invocation needs to know that they have "Unknown" type
2104 // test resolution, so that they aren't treated as Unsat and lowered to
2105 // False, which will break any uses on assumes. Below we remove any type
2106 // test assumes that will not be treated as Unknown by LTT.
2107
2108 // The type test assumes will be treated by LTT as Unsat if the type id is
2109 // not used on a global (in which case it has no entry in the TypeIdMap).
2110 if (!TypeIdMap.count(TypeId))
2111 RemoveTypeTestAssumes();
2112
2113 // For ThinLTO importing, we need to remove the type test assumes if this is
2114 // an MDString type id without a corresponding TypeIdSummary. Any
2115 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2116 // type test assumes can be kept. If the MDString type id is missing a
2117 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2118 // exporting phase of WPD from analyzing it), then it would be treated as
2119 // Unsat by LTT and we need to remove its type test assumes here. If not
2120 // used on a vcall we don't need them for later optimization use in any
2121 // case.
2122 else if (ImportSummary && isa<MDString>(TypeId)) {
2123 const TypeIdSummary *TidSummary =
2124 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2125 if (!TidSummary)
2126 RemoveTypeTestAssumes();
2127 else
2128 // If one was created it should not be Unsat, because if we reached here
2129 // the type id was used on a global.
2131 }
2132 }
2133}
2134
2135void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2136 Function *TypeTestFunc =
2137 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
2138
2139 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2140 auto *CI = dyn_cast<CallInst>(U.getUser());
2141 if (!CI)
2142 continue;
2143
2144 Value *Ptr = CI->getArgOperand(0);
2145 Value *Offset = CI->getArgOperand(1);
2146 Value *TypeIdValue = CI->getArgOperand(2);
2147 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2148
2152 bool HasNonCallUses = false;
2153 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2154 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2155 HasNonCallUses, CI, DT);
2156
2157 // Start by generating "pessimistic" code that explicitly loads the function
2158 // pointer from the vtable and performs the type check. If possible, we will
2159 // eliminate the load and the type check later.
2160
2161 // If possible, only generate the load at the point where it is used.
2162 // This helps avoid unnecessary spills.
2163 IRBuilder<> LoadB(
2164 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2165
2166 Value *LoadedValue = nullptr;
2167 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2168 Intrinsic::type_checked_load_relative) {
2170 &M, Intrinsic::load_relative, {Int32Ty});
2171 LoadedValue = LoadB.CreateCall(LoadRelFunc, {Ptr, Offset});
2172 } else {
2173 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2174 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2175 }
2176
2177 for (Instruction *LoadedPtr : LoadedPtrs) {
2178 LoadedPtr->replaceAllUsesWith(LoadedValue);
2179 LoadedPtr->eraseFromParent();
2180 }
2181
2182 // Likewise for the type test.
2183 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2184 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2185
2186 for (Instruction *Pred : Preds) {
2187 Pred->replaceAllUsesWith(TypeTestCall);
2188 Pred->eraseFromParent();
2189 }
2190
2191 // We have already erased any extractvalue instructions that refer to the
2192 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2193 // (although this is unlikely). In that case, explicitly build a pair and
2194 // RAUW it.
2195 if (!CI->use_empty()) {
2196 Value *Pair = PoisonValue::get(CI->getType());
2197 IRBuilder<> B(CI);
2198 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2199 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2200 CI->replaceAllUsesWith(Pair);
2201 }
2202
2203 // The number of unsafe uses is initially the number of uses.
2204 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2205 NumUnsafeUses = DevirtCalls.size();
2206
2207 // If the function pointer has a non-call user, we cannot eliminate the type
2208 // check, as one of those users may eventually call the pointer. Increment
2209 // the unsafe use count to make sure it cannot reach zero.
2210 if (HasNonCallUses)
2211 ++NumUnsafeUses;
2212 for (DevirtCallSite Call : DevirtCalls) {
2213 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2214 &NumUnsafeUses);
2215 }
2216
2217 CI->eraseFromParent();
2218 }
2219}
2220
2221void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2222 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2223 if (!TypeId)
2224 return;
2225 const TypeIdSummary *TidSummary =
2226 ImportSummary->getTypeIdSummary(TypeId->getString());
2227 if (!TidSummary)
2228 return;
2229 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2230 if (ResI == TidSummary->WPDRes.end())
2231 return;
2232 const WholeProgramDevirtResolution &Res = ResI->second;
2233
2235 assert(!Res.SingleImplName.empty());
2236 // The type of the function in the declaration is irrelevant because every
2237 // call site will cast it to the correct type.
2238 Constant *SingleImpl =
2239 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2240 Type::getVoidTy(M.getContext()))
2241 .getCallee());
2242
2243 // This is the import phase so we should not be exporting anything.
2244 bool IsExported = false;
2245 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2246 assert(!IsExported);
2247 }
2248
2249 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2250 auto I = Res.ResByArg.find(CSByConstantArg.first);
2251 if (I == Res.ResByArg.end())
2252 continue;
2253 auto &ResByArg = I->second;
2254 // FIXME: We should figure out what to do about the "function name" argument
2255 // to the apply* functions, as the function names are unavailable during the
2256 // importing phase. For now we just pass the empty string. This does not
2257 // impact correctness because the function names are just used for remarks.
2258 switch (ResByArg.TheKind) {
2260 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2261 break;
2263 Constant *UniqueMemberAddr =
2264 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2265 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2266 UniqueMemberAddr);
2267 break;
2268 }
2270 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2271 Int32Ty, ResByArg.Byte);
2272 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2273 ResByArg.Bit);
2274 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2275 break;
2276 }
2277 default:
2278 break;
2279 }
2280 }
2281
2283 // The type of the function is irrelevant, because it's bitcast at calls
2284 // anyhow.
2285 auto *JT = cast<Function>(
2286 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2287 Type::getVoidTy(M.getContext()))
2288 .getCallee());
2289 bool IsExported = false;
2290 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
2291 assert(!IsExported);
2292 }
2293}
2294
2295void DevirtModule::removeRedundantTypeTests() {
2296 auto *True = ConstantInt::getTrue(M.getContext());
2297 for (auto &&U : NumUnsafeUsesForTypeTest) {
2298 if (U.second == 0) {
2299 U.first->replaceAllUsesWith(True);
2300 U.first->eraseFromParent();
2301 }
2302 }
2303}
2304
2305ValueInfo
2306DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2307 ModuleSummaryIndex *ExportSummary) {
2308 assert((ExportSummary != nullptr) &&
2309 "Caller guarantees ExportSummary is not nullptr");
2310
2311 const auto TheFnGUID = TheFn->getGUID();
2312 const auto TheFnGUIDWithExportedName =
2314 // Look up ValueInfo with the GUID in the current linkage.
2315 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2316 // If no entry is found and GUID is different from GUID computed using
2317 // exported name, look up ValueInfo with the exported name unconditionally.
2318 // This is a fallback.
2319 //
2320 // The reason to have a fallback:
2321 // 1. LTO could enable global value internalization via
2322 // `enable-lto-internalization`.
2323 // 2. The GUID in ExportedSummary is computed using exported name.
2324 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2325 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2326 }
2327 return TheFnVI;
2328}
2329
2330bool DevirtModule::mustBeUnreachableFunction(
2331 Function *const F, ModuleSummaryIndex *ExportSummary) {
2333 return false;
2334 // First, learn unreachability by analyzing function IR.
2335 if (!F->isDeclaration()) {
2336 // A function must be unreachable if its entry block ends with an
2337 // 'unreachable'.
2338 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2339 }
2340 // Learn unreachability from ExportSummary if ExportSummary is present.
2341 return ExportSummary &&
2343 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2344}
2345
2346bool DevirtModule::run() {
2347 // If only some of the modules were split, we cannot correctly perform
2348 // this transformation. We already checked for the presense of type tests
2349 // with partially split modules during the thin link, and would have emitted
2350 // an error if any were found, so here we can simply return.
2351 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2352 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2353 return false;
2354
2355 Function *TypeTestFunc =
2356 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2357 Function *TypeCheckedLoadFunc =
2358 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
2359 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
2360 &M, Intrinsic::type_checked_load_relative);
2361 Function *AssumeFunc =
2362 Intrinsic::getDeclarationIfExists(&M, Intrinsic::assume);
2363
2364 // Normally if there are no users of the devirtualization intrinsics in the
2365 // module, this pass has nothing to do. But if we are exporting, we also need
2366 // to handle any users that appear only in the function summaries.
2367 if (!ExportSummary &&
2368 (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
2369 AssumeFunc->use_empty()) &&
2370 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2371 (!TypeCheckedLoadRelativeFunc ||
2372 TypeCheckedLoadRelativeFunc->use_empty()))
2373 return false;
2374
2375 // Rebuild type metadata into a map for easy lookup.
2376 std::vector<VTableBits> Bits;
2377 DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
2378 buildTypeIdentifierMap(Bits, TypeIdMap);
2379
2380 if (TypeTestFunc && AssumeFunc)
2381 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2382
2383 if (TypeCheckedLoadFunc)
2384 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2385
2386 if (TypeCheckedLoadRelativeFunc)
2387 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2388
2389 if (ImportSummary) {
2390 for (auto &S : CallSlots)
2391 importResolution(S.first, S.second);
2392
2393 removeRedundantTypeTests();
2394
2395 // We have lowered or deleted the type intrinsics, so we will no longer have
2396 // enough information to reason about the liveness of virtual function
2397 // pointers in GlobalDCE.
2398 for (GlobalVariable &GV : M.globals())
2399 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2400
2401 // The rest of the code is only necessary when exporting or during regular
2402 // LTO, so we are done.
2403 return true;
2404 }
2405
2406 if (TypeIdMap.empty())
2407 return true;
2408
2409 // Collect information from summary about which calls to try to devirtualize.
2410 if (ExportSummary) {
2411 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2412 for (auto &P : TypeIdMap) {
2413 if (auto *TypeId = dyn_cast<MDString>(P.first))
2415 TypeId->getString())]
2416 .push_back(TypeId);
2417 }
2418
2419 for (auto &P : *ExportSummary) {
2420 for (auto &S : P.second.SummaryList) {
2421 auto *FS = dyn_cast<FunctionSummary>(S.get());
2422 if (!FS)
2423 continue;
2424 // FIXME: Only add live functions.
2425 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2426 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2427 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2428 }
2429 }
2430 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2431 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2432 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2433 }
2434 }
2435 for (const FunctionSummary::ConstVCall &VC :
2436 FS->type_test_assume_const_vcalls()) {
2437 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2438 CallSlots[{MD, VC.VFunc.Offset}]
2439 .ConstCSInfo[VC.Args]
2440 .addSummaryTypeTestAssumeUser(FS);
2441 }
2442 }
2443 for (const FunctionSummary::ConstVCall &VC :
2444 FS->type_checked_load_const_vcalls()) {
2445 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2446 CallSlots[{MD, VC.VFunc.Offset}]
2447 .ConstCSInfo[VC.Args]
2448 .addSummaryTypeCheckedLoadUser(FS);
2449 }
2450 }
2451 }
2452 }
2453 }
2454
2455 // For each (type, offset) pair:
2456 bool DidVirtualConstProp = false;
2457 std::map<std::string, GlobalValue *> DevirtTargets;
2458 for (auto &S : CallSlots) {
2459 // Search each of the members of the type identifier for the virtual
2460 // function implementation at offset S.first.ByteOffset, and add to
2461 // TargetsForSlot.
2462 std::vector<VirtualCallTarget> TargetsForSlot;
2463 WholeProgramDevirtResolution *Res = nullptr;
2464 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2465 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2466 TypeMemberInfos.size())
2467 // For any type id used on a global's type metadata, create the type id
2468 // summary resolution regardless of whether we can devirtualize, so that
2469 // lower type tests knows the type id is not Unsat. If it was not used on
2470 // a global's type metadata, the TypeIdMap entry set will be empty, and
2471 // we don't want to create an entry (with the default Unknown type
2472 // resolution), which can prevent detection of the Unsat.
2473 Res = &ExportSummary
2474 ->getOrInsertTypeIdSummary(
2475 cast<MDString>(S.first.TypeID)->getString())
2476 .WPDRes[S.first.ByteOffset];
2477 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2478 S.first.ByteOffset, ExportSummary)) {
2479
2480 if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2481 DidVirtualConstProp |=
2482 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2483
2484 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2485 }
2486
2487 // Collect functions devirtualized at least for one call site for stats.
2488 if (RemarksEnabled || AreStatisticsEnabled())
2489 for (const auto &T : TargetsForSlot)
2490 if (T.WasDevirt)
2491 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2492 }
2493
2494 // CFI-specific: if we are exporting and any llvm.type.checked.load
2495 // intrinsics were *not* devirtualized, we need to add the resulting
2496 // llvm.type.test intrinsics to the function summaries so that the
2497 // LowerTypeTests pass will export them.
2498 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2500 cast<MDString>(S.first.TypeID)->getString());
2501 auto AddTypeTestsForTypeCheckedLoads = [&](CallSiteInfo &CSI) {
2502 if (!CSI.AllCallSitesDevirted)
2503 for (auto *FS : CSI.SummaryTypeCheckedLoadUsers)
2504 FS->addTypeTest(GUID);
2505 };
2506 AddTypeTestsForTypeCheckedLoads(S.second.CSInfo);
2507 for (auto &CCS : S.second.ConstCSInfo)
2508 AddTypeTestsForTypeCheckedLoads(CCS.second);
2509 }
2510 }
2511
2512 if (RemarksEnabled) {
2513 // Generate remarks for each devirtualized function.
2514 for (const auto &DT : DevirtTargets) {
2515 GlobalValue *GV = DT.second;
2516 auto *F = dyn_cast<Function>(GV);
2517 if (!F) {
2518 auto *A = dyn_cast<GlobalAlias>(GV);
2519 assert(A && isa<Function>(A->getAliasee()));
2520 F = dyn_cast<Function>(A->getAliasee());
2521 assert(F);
2522 }
2523
2524 using namespace ore;
2525 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2526 << "devirtualized " << NV("FunctionName", DT.first));
2527 }
2528 }
2529
2530 NumDevirtTargets += DevirtTargets.size();
2531
2532 removeRedundantTypeTests();
2533
2534 // Rebuild each global we touched as part of virtual constant propagation to
2535 // include the before and after bytes.
2536 if (DidVirtualConstProp)
2537 for (VTableBits &B : Bits)
2538 rebuildGlobal(B);
2539
2540 // We have lowered or deleted the type intrinsics, so we will no longer have
2541 // enough information to reason about the liveness of virtual function
2542 // pointers in GlobalDCE.
2543 for (GlobalVariable &GV : M.globals())
2544 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2545
2546 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2547 CI->eraseFromParent();
2548
2549 return true;
2550}
2551
2552void DevirtIndex::run() {
2553 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2554 return;
2555
2556 DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
2557 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2558 NameByGUID[GlobalValue::getGUIDAssumingExternalLinkage(P.first)].push_back(
2559 P.first);
2560 // Create the type id summary resolution regardlness of whether we can
2561 // devirtualize, so that lower type tests knows the type id is used on
2562 // a global and not Unsat. We do this here rather than in the loop over the
2563 // CallSlots, since that handling will only see type tests that directly
2564 // feed assumes, and we would miss any that aren't currently handled by WPD
2565 // (such as type tests that feed assumes via phis).
2566 ExportSummary.getOrInsertTypeIdSummary(P.first);
2567 }
2568
2569 // Collect information from summary about which calls to try to devirtualize.
2570 for (auto &P : ExportSummary) {
2571 for (auto &S : P.second.SummaryList) {
2572 auto *FS = dyn_cast<FunctionSummary>(S.get());
2573 if (!FS)
2574 continue;
2575 // FIXME: Only add live functions.
2576 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2577 for (StringRef Name : NameByGUID[VF.GUID]) {
2578 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2579 }
2580 }
2581 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2582 for (StringRef Name : NameByGUID[VF.GUID]) {
2583 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2584 }
2585 }
2586 for (const FunctionSummary::ConstVCall &VC :
2587 FS->type_test_assume_const_vcalls()) {
2588 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2589 CallSlots[{Name, VC.VFunc.Offset}]
2590 .ConstCSInfo[VC.Args]
2591 .addSummaryTypeTestAssumeUser(FS);
2592 }
2593 }
2594 for (const FunctionSummary::ConstVCall &VC :
2595 FS->type_checked_load_const_vcalls()) {
2596 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2597 CallSlots[{Name, VC.VFunc.Offset}]
2598 .ConstCSInfo[VC.Args]
2599 .addSummaryTypeCheckedLoadUser(FS);
2600 }
2601 }
2602 }
2603 }
2604
2605 std::set<ValueInfo> DevirtTargets;
2606 // For each (type, offset) pair:
2607 for (auto &S : CallSlots) {
2608 // Search each of the members of the type identifier for the virtual
2609 // function implementation at offset S.first.ByteOffset, and add to
2610 // TargetsForSlot.
2611 std::vector<ValueInfo> TargetsForSlot;
2612 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2613 assert(TidSummary);
2614 // The type id summary would have been created while building the NameByGUID
2615 // map earlier.
2616 WholeProgramDevirtResolution *Res =
2617 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2618 ->WPDRes[S.first.ByteOffset];
2619 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2620 S.first.ByteOffset)) {
2621
2622 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2623 DevirtTargets))
2624 continue;
2625 }
2626 }
2627
2628 // Optionally have the thin link print message for each devirtualized
2629 // function.
2631 for (const auto &DT : DevirtTargets)
2632 errs() << "Devirtualized call to " << DT << "\n";
2633
2634 NumDevirtTargets += DevirtTargets.size();
2635}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
dxil translate DXIL Translate Metadata
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Provides passes for computing function attributes based on interprocedural analyses.
@ CallSiteInfo
#define DEBUG_TYPE
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, bool Skip)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static cl::opt< std::string > ClReadSummary("lowertypetests-read-summary", cl::desc("Read summary from given YAML file before running pass"), cl::Hidden)
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< std::string > ClWriteSummary("lowertypetests-write-summary", cl::desc("Write summary to given YAML file after running pass"), cl::Hidden)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
#define T
static bool mustBeUnreachableFunction(const Function &F)
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< bool > WholeProgramDevirtKeepUnreachableFunction("wholeprogramdevirt-keep-unreachable-function", cl::desc("Regard unreachable functions as possible devirtualize targets."), cl::Hidden, cl::init(true))
With Clang, a pure virtual class's deleting destructor is emitted as a llvm.trap intrinsic followed b...
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static cl::opt< unsigned > WholeProgramDevirtCutoff("wholeprogramdevirt-cutoff", cl::desc("Max number of devirtualizations for devirt module pass"), cl::init(0))
If explicitly specified, the devirt module pass will stop transformation once the total number of dev...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:223
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setCallingConv(CallingConv::ID CC)
bool arg_empty() const
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
AttributeList getAttributes() const
Return the attributes for this call.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1301
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1274
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition Constants.h:486
const Constant * stripPointerCasts() const
Definition Constant.h:219
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool empty() const
Definition DenseMap.h:109
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition Error.h:334
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Type * getParamType(unsigned i) const
Parameter type accessors.
ArrayRef< Type * > params() const
bool isVarArg() const
Type * getReturnType() const
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
bool empty() const
Definition Function.h:857
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
bool arg_empty() const
Definition Function.h:900
const BasicBlock & front() const
Definition Function.h:858
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
arg_iterator arg_begin()
Definition Function.h:866
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:597
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition Value.h:602
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI VCallVisibility getVCallVisibility() const
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI void setVCallVisibilityMetadata(VCallVisibility Visibility)
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
void setVisibility(VisibilityTypes V)
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition ModRef.h:215
Root of the metadata hierarchy.
Definition Metadata.h:64
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:426
Target - Wrapper for Target specific information.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
bool use_empty() const
Definition Value.h:346
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
DiagnosticInfoOptimizationBase::Argument NV
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition FileSystem.h:764
LLVM_ABI uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
LLVM_ABI void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
LLVM_ABI void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
@ Export
Export information to summary.
Definition IPO.h:57
@ None
Do nothing.
Definition IPO.h:55
@ Import
Import information from summary.
Definition IPO.h:56
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
LLVM_ABI void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const ModuleToSummariesForIndexTy *ModuleToSummariesForIndex=nullptr, const GVSummaryPtrSet *DecSummaries=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
@ invalid_argument
Definition Errc.h:56
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
LLVM_ABI void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
LLVM_ABI void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
LLVM_ABI CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName)
Analogous to setExplicitlyUnknownBranchWeights, but for functions and their entry counts.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
LLVM_ABI void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition Error.h:1245
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
LLVM_ABI void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
LLVM_ABI void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
const ModuleSummaryIndex * ImportSummary
ModuleSummaryIndex * ExportSummary
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
LLVM_ABI VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)