LLVM 20.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular and thin LTO pipelines:
28//
29// During regular LTO, the pass determines the best optimization for each
30// virtual call and applies the resolutions directly to virtual calls that are
31// eligible for virtual call optimization (i.e. calls that use either of the
32// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
33//
34// During hybrid Regular/ThinLTO, the pass operates in two phases:
35// - Export phase: this is run during the thin link over a single merged module
36// that contains all vtables with !type metadata that participate in the link.
37// The pass computes a resolution for each virtual call and stores it in the
38// type identifier summary.
39// - Import phase: this is run during the thin backends over the individual
40// modules. The pass applies the resolutions previously computed during the
41// import phase to each eligible virtual call.
42//
43// During ThinLTO, the pass operates in two phases:
44// - Export phase: this is run during the thin link over the index which
45// contains a summary of all vtables with !type metadata that participate in
46// the link. It computes a resolution for each virtual call and stores it in
47// the type identifier summary. Only single implementation devirtualization
48// is supported.
49// - Import phase: (same as with hybrid case above).
50//
51//===----------------------------------------------------------------------===//
52
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/MapVector.h"
60#include "llvm/ADT/Statistic.h"
67#include "llvm/IR/Constants.h"
68#include "llvm/IR/DataLayout.h"
69#include "llvm/IR/DebugLoc.h"
71#include "llvm/IR/Dominators.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/IRBuilder.h"
76#include "llvm/IR/InstrTypes.h"
77#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/LLVMContext.h"
81#include "llvm/IR/MDBuilder.h"
82#include "llvm/IR/Metadata.h"
83#include "llvm/IR/Module.h"
87#include "llvm/Support/Errc.h"
88#include "llvm/Support/Error.h"
93#include "llvm/Transforms/IPO.h"
98#include <algorithm>
99#include <cstddef>
100#include <map>
101#include <set>
102#include <string>
103
104using namespace llvm;
105using namespace wholeprogramdevirt;
106
107#define DEBUG_TYPE "wholeprogramdevirt"
108
109STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
110STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
111STATISTIC(NumBranchFunnel, "Number of branch funnels");
112STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
113STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
114STATISTIC(NumVirtConstProp1Bit,
115 "Number of 1 bit virtual constant propagations");
116STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
117
119 "wholeprogramdevirt-summary-action",
120 cl::desc("What to do with the summary when running this pass"),
121 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
122 clEnumValN(PassSummaryAction::Import, "import",
123 "Import typeid resolutions from summary and globals"),
124 clEnumValN(PassSummaryAction::Export, "export",
125 "Export typeid resolutions to summary and globals")),
126 cl::Hidden);
127
129 "wholeprogramdevirt-read-summary",
130 cl::desc(
131 "Read summary from given bitcode or YAML file before running pass"),
132 cl::Hidden);
133
135 "wholeprogramdevirt-write-summary",
136 cl::desc("Write summary to given bitcode or YAML file after running pass. "
137 "Output file format is deduced from extension: *.bc means writing "
138 "bitcode, otherwise YAML"),
139 cl::Hidden);
140
142 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
143 cl::init(10),
144 cl::desc("Maximum number of call targets per "
145 "call site to enable branch funnels"));
146
147static cl::opt<bool>
148 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
149 cl::desc("Print index-based devirtualization messages"));
150
151/// Provide a way to force enable whole program visibility in tests.
152/// This is needed to support legacy tests that don't contain
153/// !vcall_visibility metadata (the mere presense of type tests
154/// previously implied hidden visibility).
155static cl::opt<bool>
156 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
157 cl::desc("Enable whole program visibility"));
158
159/// Provide a way to force disable whole program for debugging or workarounds,
160/// when enabled via the linker.
162 "disable-whole-program-visibility", cl::Hidden,
163 cl::desc("Disable whole program visibility (overrides enabling options)"));
164
165/// Provide way to prevent certain function from being devirtualized
167 SkipFunctionNames("wholeprogramdevirt-skip",
168 cl::desc("Prevent function(s) from being devirtualized"),
170
171/// Mechanism to add runtime checking of devirtualization decisions, optionally
172/// trapping or falling back to indirect call on any that are not correct.
173/// Trapping mode is useful for debugging undefined behavior leading to failures
174/// with WPD. Fallback mode is useful for ensuring safety when whole program
175/// visibility may be compromised.
178 "wholeprogramdevirt-check", cl::Hidden,
179 cl::desc("Type of checking for incorrect devirtualizations"),
180 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
181 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
183 "Fallback to indirect when incorrect")));
184
185namespace {
186struct PatternList {
187 std::vector<GlobPattern> Patterns;
188 template <class T> void init(const T &StringList) {
189 for (const auto &S : StringList)
191 Patterns.push_back(std::move(*Pat));
192 }
193 bool match(StringRef S) {
194 for (const GlobPattern &P : Patterns)
195 if (P.match(S))
196 return true;
197 return false;
198 }
199};
200} // namespace
201
202// Find the minimum offset that we may store a value of size Size bits at. If
203// IsAfter is set, look for an offset before the object, otherwise look for an
204// offset after the object.
207 bool IsAfter, uint64_t Size) {
208 // Find a minimum offset taking into account only vtable sizes.
209 uint64_t MinByte = 0;
210 for (const VirtualCallTarget &Target : Targets) {
211 if (IsAfter)
212 MinByte = std::max(MinByte, Target.minAfterBytes());
213 else
214 MinByte = std::max(MinByte, Target.minBeforeBytes());
215 }
216
217 // Build a vector of arrays of bytes covering, for each target, a slice of the
218 // used region (see AccumBitVector::BytesUsed in
219 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
220 // this aligns the used regions to start at MinByte.
221 //
222 // In this example, A, B and C are vtables, # is a byte already allocated for
223 // a virtual function pointer, AAAA... (etc.) are the used regions for the
224 // vtables and Offset(X) is the value computed for the Offset variable below
225 // for X.
226 //
227 // Offset(A)
228 // | |
229 // |MinByte
230 // A: ################AAAAAAAA|AAAAAAAA
231 // B: ########BBBBBBBBBBBBBBBB|BBBB
232 // C: ########################|CCCCCCCCCCCCCCCC
233 // | Offset(B) |
234 //
235 // This code produces the slices of A, B and C that appear after the divider
236 // at MinByte.
237 std::vector<ArrayRef<uint8_t>> Used;
238 for (const VirtualCallTarget &Target : Targets) {
239 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
240 : Target.TM->Bits->Before.BytesUsed;
241 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
242 : MinByte - Target.minBeforeBytes();
243
244 // Disregard used regions that are smaller than Offset. These are
245 // effectively all-free regions that do not need to be checked.
246 if (VTUsed.size() > Offset)
247 Used.push_back(VTUsed.slice(Offset));
248 }
249
250 if (Size == 1) {
251 // Find a free bit in each member of Used.
252 for (unsigned I = 0;; ++I) {
253 uint8_t BitsUsed = 0;
254 for (auto &&B : Used)
255 if (I < B.size())
256 BitsUsed |= B[I];
257 if (BitsUsed != 0xff)
258 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
259 }
260 } else {
261 // Find a free (Size/8) byte region in each member of Used.
262 // FIXME: see if alignment helps.
263 for (unsigned I = 0;; ++I) {
264 for (auto &&B : Used) {
265 unsigned Byte = 0;
266 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
267 if (B[I + Byte])
268 goto NextI;
269 ++Byte;
270 }
271 }
272 return (MinByte + I) * 8;
273 NextI:;
274 }
275 }
276}
277
279 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
280 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
281 if (BitWidth == 1)
282 OffsetByte = -(AllocBefore / 8 + 1);
283 else
284 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
285 OffsetBit = AllocBefore % 8;
286
287 for (VirtualCallTarget &Target : Targets) {
288 if (BitWidth == 1)
289 Target.setBeforeBit(AllocBefore);
290 else
291 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
292 }
293}
294
297 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
298 if (BitWidth == 1)
299 OffsetByte = AllocAfter / 8;
300 else
301 OffsetByte = (AllocAfter + 7) / 8;
302 OffsetBit = AllocAfter % 8;
303
304 for (VirtualCallTarget &Target : Targets) {
305 if (BitWidth == 1)
306 Target.setAfterBit(AllocAfter);
307 else
308 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
309 }
310}
311
313 : Fn(Fn), TM(TM),
314 IsBigEndian(Fn->getDataLayout().isBigEndian()),
315 WasDevirt(false) {}
316
317namespace {
318
319// A slot in a set of virtual tables. The TypeID identifies the set of virtual
320// tables, and the ByteOffset is the offset in bytes from the address point to
321// the virtual function pointer.
322struct VTableSlot {
324 uint64_t ByteOffset;
325};
326
327} // end anonymous namespace
328
329namespace llvm {
330
331template <> struct DenseMapInfo<VTableSlot> {
332 static VTableSlot getEmptyKey() {
335 }
336 static VTableSlot getTombstoneKey() {
339 }
340 static unsigned getHashValue(const VTableSlot &I) {
343 }
344 static bool isEqual(const VTableSlot &LHS,
345 const VTableSlot &RHS) {
346 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
347 }
348};
349
350template <> struct DenseMapInfo<VTableSlotSummary> {
354 }
358 }
359 static unsigned getHashValue(const VTableSlotSummary &I) {
362 }
363 static bool isEqual(const VTableSlotSummary &LHS,
364 const VTableSlotSummary &RHS) {
365 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
366 }
367};
368
369} // end namespace llvm
370
371// Returns true if the function must be unreachable based on ValueInfo.
372//
373// In particular, identifies a function as unreachable in the following
374// conditions
375// 1) All summaries are live.
376// 2) All function summaries indicate it's unreachable
377// 3) There is no non-function with the same GUID (which is rare)
379 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
380 // Returns false if ValueInfo is absent, or the summary list is empty
381 // (e.g., function declarations).
382 return false;
383 }
384
385 for (const auto &Summary : TheFnVI.getSummaryList()) {
386 // Conservatively returns false if any non-live functions are seen.
387 // In general either all summaries should be live or all should be dead.
388 if (!Summary->isLive())
389 return false;
390 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
391 if (!FS->fflags().MustBeUnreachable)
392 return false;
393 }
394 // Be conservative if a non-function has the same GUID (which is rare).
395 else
396 return false;
397 }
398 // All function summaries are live and all of them agree that the function is
399 // unreachble.
400 return true;
401}
402
403namespace {
404// A virtual call site. VTable is the loaded virtual table pointer, and CS is
405// the indirect virtual call.
406struct VirtualCallSite {
407 Value *VTable = nullptr;
408 CallBase &CB;
409
410 // If non-null, this field points to the associated unsafe use count stored in
411 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
412 // of that field for details.
413 unsigned *NumUnsafeUses = nullptr;
414
415 void
416 emitRemark(const StringRef OptName, const StringRef TargetName,
418 Function *F = CB.getCaller();
419 DebugLoc DLoc = CB.getDebugLoc();
420 BasicBlock *Block = CB.getParent();
421
422 using namespace ore;
423 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
424 << NV("Optimization", OptName)
425 << ": devirtualized a call to "
426 << NV("FunctionName", TargetName));
427 }
428
429 void replaceAndErase(
430 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
432 Value *New) {
433 if (RemarksEnabled)
434 emitRemark(OptName, TargetName, OREGetter);
435 CB.replaceAllUsesWith(New);
436 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
437 BranchInst::Create(II->getNormalDest(), CB.getIterator());
438 II->getUnwindDest()->removePredecessor(II->getParent());
439 }
440 CB.eraseFromParent();
441 // This use is no longer unsafe.
442 if (NumUnsafeUses)
443 --*NumUnsafeUses;
444 }
445};
446
447// Call site information collected for a specific VTableSlot and possibly a list
448// of constant integer arguments. The grouping by arguments is handled by the
449// VTableSlotInfo class.
450struct CallSiteInfo {
451 /// The set of call sites for this slot. Used during regular LTO and the
452 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
453 /// call sites that appear in the merged module itself); in each of these
454 /// cases we are directly operating on the call sites at the IR level.
455 std::vector<VirtualCallSite> CallSites;
456
457 /// Whether all call sites represented by this CallSiteInfo, including those
458 /// in summaries, have been devirtualized. This starts off as true because a
459 /// default constructed CallSiteInfo represents no call sites.
460 bool AllCallSitesDevirted = true;
461
462 // These fields are used during the export phase of ThinLTO and reflect
463 // information collected from function summaries.
464
465 /// Whether any function summary contains an llvm.assume(llvm.type.test) for
466 /// this slot.
467 bool SummaryHasTypeTestAssumeUsers = false;
468
469 /// CFI-specific: a vector containing the list of function summaries that use
470 /// the llvm.type.checked.load intrinsic and therefore will require
471 /// resolutions for llvm.type.test in order to implement CFI checks if
472 /// devirtualization was unsuccessful. If devirtualization was successful, the
473 /// pass will clear this vector by calling markDevirt(). If at the end of the
474 /// pass the vector is non-empty, we will need to add a use of llvm.type.test
475 /// to each of the function summaries in the vector.
476 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
477 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
478
479 bool isExported() const {
480 return SummaryHasTypeTestAssumeUsers ||
481 !SummaryTypeCheckedLoadUsers.empty();
482 }
483
484 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
485 SummaryTypeCheckedLoadUsers.push_back(FS);
486 AllCallSitesDevirted = false;
487 }
488
489 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
490 SummaryTypeTestAssumeUsers.push_back(FS);
491 SummaryHasTypeTestAssumeUsers = true;
492 AllCallSitesDevirted = false;
493 }
494
495 void markDevirt() {
496 AllCallSitesDevirted = true;
497
498 // As explained in the comment for SummaryTypeCheckedLoadUsers.
499 SummaryTypeCheckedLoadUsers.clear();
500 }
501};
502
503// Call site information collected for a specific VTableSlot.
504struct VTableSlotInfo {
505 // The set of call sites which do not have all constant integer arguments
506 // (excluding "this").
507 CallSiteInfo CSInfo;
508
509 // The set of call sites with all constant integer arguments (excluding
510 // "this"), grouped by argument list.
511 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
512
513 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
514
515private:
516 CallSiteInfo &findCallSiteInfo(CallBase &CB);
517};
518
519CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
520 std::vector<uint64_t> Args;
521 auto *CBType = dyn_cast<IntegerType>(CB.getType());
522 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
523 return CSInfo;
524 for (auto &&Arg : drop_begin(CB.args())) {
525 auto *CI = dyn_cast<ConstantInt>(Arg);
526 if (!CI || CI->getBitWidth() > 64)
527 return CSInfo;
528 Args.push_back(CI->getZExtValue());
529 }
530 return ConstCSInfo[Args];
531}
532
533void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
534 unsigned *NumUnsafeUses) {
535 auto &CSI = findCallSiteInfo(CB);
536 CSI.AllCallSitesDevirted = false;
537 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
538}
539
540struct DevirtModule {
541 Module &M;
542 function_ref<AAResults &(Function &)> AARGetter;
543 function_ref<DominatorTree &(Function &)> LookupDomTree;
544
545 ModuleSummaryIndex *ExportSummary;
546 const ModuleSummaryIndex *ImportSummary;
547
548 IntegerType *Int8Ty;
549 PointerType *Int8PtrTy;
550 IntegerType *Int32Ty;
551 IntegerType *Int64Ty;
552 IntegerType *IntPtrTy;
553 /// Sizeless array type, used for imported vtables. This provides a signal
554 /// to analyzers that these imports may alias, as they do for example
555 /// when multiple unique return values occur in the same vtable.
556 ArrayType *Int8Arr0Ty;
557
558 bool RemarksEnabled;
560
562
563 // Calls that have already been optimized. We may add a call to multiple
564 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
565 // optimize a call more than once.
566 SmallPtrSet<CallBase *, 8> OptimizedCalls;
567
568 // Store calls that had their ptrauth bundle removed. They are to be deleted
569 // at the end of the optimization.
570 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
571
572 // This map keeps track of the number of "unsafe" uses of a loaded function
573 // pointer. The key is the associated llvm.type.test intrinsic call generated
574 // by this pass. An unsafe use is one that calls the loaded function pointer
575 // directly. Every time we eliminate an unsafe use (for example, by
576 // devirtualizing it or by applying virtual constant propagation), we
577 // decrement the value stored in this map. If a value reaches zero, we can
578 // eliminate the type check by RAUWing the associated llvm.type.test call with
579 // true.
580 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
581 PatternList FunctionsToSkip;
582
583 DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
585 function_ref<DominatorTree &(Function &)> LookupDomTree,
586 ModuleSummaryIndex *ExportSummary,
587 const ModuleSummaryIndex *ImportSummary)
588 : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
589 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
590 Int8Ty(Type::getInt8Ty(M.getContext())),
591 Int8PtrTy(PointerType::getUnqual(M.getContext())),
592 Int32Ty(Type::getInt32Ty(M.getContext())),
593 Int64Ty(Type::getInt64Ty(M.getContext())),
594 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
595 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
596 RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
597 assert(!(ExportSummary && ImportSummary));
598 FunctionsToSkip.init(SkipFunctionNames);
599 }
600
601 bool areRemarksEnabled();
602
603 void
604 scanTypeTestUsers(Function *TypeTestFunc,
605 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
606 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
607
608 void buildTypeIdentifierMap(
609 std::vector<VTableBits> &Bits,
610 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
611
612 bool
613 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
614 const std::set<TypeMemberInfo> &TypeMemberInfos,
615 uint64_t ByteOffset,
616 ModuleSummaryIndex *ExportSummary);
617
618 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
619 bool &IsExported);
620 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
622 VTableSlotInfo &SlotInfo,
624
625 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
626 bool &IsExported);
627 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
628 VTableSlotInfo &SlotInfo,
629 WholeProgramDevirtResolution *Res, VTableSlot Slot);
630
631 bool tryEvaluateFunctionsWithArgs(
633 ArrayRef<uint64_t> Args);
634
635 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
636 uint64_t TheRetVal);
637 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
638 CallSiteInfo &CSInfo,
640
641 // Returns the global symbol name that is used to export information about the
642 // given vtable slot and list of arguments.
643 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
645
646 bool shouldExportConstantsAsAbsoluteSymbols();
647
648 // This function is called during the export phase to create a symbol
649 // definition containing information about the given vtable slot and list of
650 // arguments.
651 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
652 Constant *C);
653 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
654 uint32_t Const, uint32_t &Storage);
655
656 // This function is called during the import phase to create a reference to
657 // the symbol definition created during the export phase.
658 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
660 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
661 StringRef Name, IntegerType *IntTy,
662 uint32_t Storage);
663
664 Constant *getMemberAddr(const TypeMemberInfo *M);
665
666 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
667 Constant *UniqueMemberAddr);
668 bool tryUniqueRetValOpt(unsigned BitWidth,
670 CallSiteInfo &CSInfo,
672 VTableSlot Slot, ArrayRef<uint64_t> Args);
673
674 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
675 Constant *Byte, Constant *Bit);
676 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
677 VTableSlotInfo &SlotInfo,
678 WholeProgramDevirtResolution *Res, VTableSlot Slot);
679
680 void rebuildGlobal(VTableBits &B);
681
682 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
683 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
684
685 // If we were able to eliminate all unsafe uses for a type checked load,
686 // eliminate the associated type tests by replacing them with true.
687 void removeRedundantTypeTests();
688
689 bool run();
690
691 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
692 //
693 // Caller guarantees that `ExportSummary` is not nullptr.
694 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
695 ModuleSummaryIndex *ExportSummary);
696
697 // Returns true if the function definition must be unreachable.
698 //
699 // Note if this helper function returns true, `F` is guaranteed
700 // to be unreachable; if it returns false, `F` might still
701 // be unreachable but not covered by this helper function.
702 //
703 // Implementation-wise, if function definition is present, IR is analyzed; if
704 // not, look up function flags from ExportSummary as a fallback.
705 static bool mustBeUnreachableFunction(Function *const F,
706 ModuleSummaryIndex *ExportSummary);
707
708 // Lower the module using the action and summary passed as command line
709 // arguments. For testing purposes only.
710 static bool
711 runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
713 function_ref<DominatorTree &(Function &)> LookupDomTree);
714};
715
716struct DevirtIndex {
717 ModuleSummaryIndex &ExportSummary;
718 // The set in which to record GUIDs exported from their module by
719 // devirtualization, used by client to ensure they are not internalized.
720 std::set<GlobalValue::GUID> &ExportedGUIDs;
721 // A map in which to record the information necessary to locate the WPD
722 // resolution for local targets in case they are exported by cross module
723 // importing.
724 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
725
727
728 PatternList FunctionsToSkip;
729
730 DevirtIndex(
731 ModuleSummaryIndex &ExportSummary,
732 std::set<GlobalValue::GUID> &ExportedGUIDs,
733 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
734 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
735 LocalWPDTargetsMap(LocalWPDTargetsMap) {
736 FunctionsToSkip.init(SkipFunctionNames);
737 }
738
739 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
740 const TypeIdCompatibleVtableInfo TIdInfo,
741 uint64_t ByteOffset);
742
743 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
744 VTableSlotSummary &SlotSummary,
745 VTableSlotInfo &SlotInfo,
747 std::set<ValueInfo> &DevirtTargets);
748
749 void run();
750};
751} // end anonymous namespace
752
755 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
756 auto AARGetter = [&](Function &F) -> AAResults & {
757 return FAM.getResult<AAManager>(F);
758 };
759 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
761 };
762 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
764 };
765 if (UseCommandLine) {
766 if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
767 return PreservedAnalyses::all();
769 }
770 if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
771 ImportSummary)
772 .run())
773 return PreservedAnalyses::all();
775}
776
777// Enable whole program visibility if enabled by client (e.g. linker) or
778// internal option, and not force disabled.
779bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
780 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
782}
783
784static bool
786 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
787 // TypeID for member function pointer type is an internal construct
788 // and won't exist in IsVisibleToRegularObj. The full TypeID
789 // will be present and participate in invalidation.
790 if (TypeID.ends_with(".virtual"))
791 return false;
792
793 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
794 // non-externally visible types which cannot interact with
795 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
796 if (!TypeID.consume_front("_ZTS"))
797 return false;
798
799 // TypeID is keyed off the type name symbol (_ZTS). However, the native
800 // object may not contain this symbol if it does not contain a key
801 // function for the base type and thus only contains a reference to the
802 // type info (_ZTI). To catch this case we query using the type info
803 // symbol corresponding to the TypeID.
804 std::string typeInfo = ("_ZTI" + TypeID).str();
805 return IsVisibleToRegularObj(typeInfo);
806}
807
808static bool
810 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
812 GV.getMetadata(LLVMContext::MD_type, Types);
813
814 for (auto Type : Types)
815 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
816 return typeIDVisibleToRegularObj(TypeID->getString(),
817 IsVisibleToRegularObj);
818
819 return false;
820}
821
822/// If whole program visibility asserted, then upgrade all public vcall
823/// visibility metadata on vtable definitions to linkage unit visibility in
824/// Module IR (for regular or hybrid LTO).
826 Module &M, bool WholeProgramVisibilityEnabledInLTO,
827 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
828 bool ValidateAllVtablesHaveTypeInfos,
829 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
830 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
831 return;
832 for (GlobalVariable &GV : M.globals()) {
833 // Add linkage unit visibility to any variable with type metadata, which are
834 // the vtable definitions. We won't have an existing vcall_visibility
835 // metadata on vtable definitions with public visibility.
836 if (GV.hasMetadata(LLVMContext::MD_type) &&
838 // Don't upgrade the visibility for symbols exported to the dynamic
839 // linker, as we have no information on their eventual use.
840 !DynamicExportSymbols.count(GV.getGUID()) &&
841 // With validation enabled, we want to exclude symbols visible to
842 // regular objects. Local symbols will be in this group due to the
843 // current implementation but those with VCallVisibilityTranslationUnit
844 // will have already been marked in clang so are unaffected.
845 !(ValidateAllVtablesHaveTypeInfos &&
846 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
848 }
849}
850
852 bool WholeProgramVisibilityEnabledInLTO) {
853 Function *PublicTypeTestFunc =
854 M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
855 if (!PublicTypeTestFunc)
856 return;
857 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
858 Function *TypeTestFunc =
859 Intrinsic::getDeclaration(&M, Intrinsic::type_test);
860 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
861 auto *CI = cast<CallInst>(U.getUser());
862 auto *NewCI = CallInst::Create(
863 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)},
864 std::nullopt, "", CI->getIterator());
865 CI->replaceAllUsesWith(NewCI);
866 CI->eraseFromParent();
867 }
868 } else {
869 auto *True = ConstantInt::getTrue(M.getContext());
870 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
871 auto *CI = cast<CallInst>(U.getUser());
872 CI->replaceAllUsesWith(True);
873 CI->eraseFromParent();
874 }
875 }
876}
877
878/// Based on typeID string, get all associated vtable GUIDS that are
879/// visible to regular objects.
882 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
883 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
884 for (const auto &typeID : Index.typeIdCompatibleVtableMap()) {
885 if (typeIDVisibleToRegularObj(typeID.first, IsVisibleToRegularObj))
886 for (const TypeIdOffsetVtableInfo &P : typeID.second)
887 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
888 }
889}
890
891/// If whole program visibility asserted, then upgrade all public vcall
892/// visibility metadata on vtable definition summaries to linkage unit
893/// visibility in Module summary index (for ThinLTO).
895 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
896 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
897 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
898 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
899 return;
900 for (auto &P : Index) {
901 // Don't upgrade the visibility for symbols exported to the dynamic
902 // linker, as we have no information on their eventual use.
903 if (DynamicExportSymbols.count(P.first))
904 continue;
905 for (auto &S : P.second.SummaryList) {
906 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
907 if (!GVar ||
908 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
909 continue;
910 // With validation enabled, we want to exclude symbols visible to regular
911 // objects. Local symbols will be in this group due to the current
912 // implementation but those with VCallVisibilityTranslationUnit will have
913 // already been marked in clang so are unaffected.
914 if (VisibleToRegularObjSymbols.count(P.first))
915 continue;
916 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
917 }
918 }
919}
920
922 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
923 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
924 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
925}
926
928 ModuleSummaryIndex &Summary,
929 function_ref<bool(StringRef, ValueInfo)> isExported,
930 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
931 for (auto &T : LocalWPDTargetsMap) {
932 auto &VI = T.first;
933 // This was enforced earlier during trySingleImplDevirt.
934 assert(VI.getSummaryList().size() == 1 &&
935 "Devirt of local target has more than one copy");
936 auto &S = VI.getSummaryList()[0];
937 if (!isExported(S->modulePath(), VI))
938 continue;
939
940 // It's been exported by a cross module import.
941 for (auto &SlotSummary : T.second) {
942 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
943 assert(TIdSum);
944 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
945 assert(WPDRes != TIdSum->WPDRes.end());
946 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
947 WPDRes->second.SingleImplName,
948 Summary.getModuleHash(S->modulePath()));
949 }
950 }
951}
952
954 // Check that summary index contains regular LTO module when performing
955 // export to prevent occasional use of index from pure ThinLTO compilation
956 // (-fno-split-lto-module). This kind of summary index is passed to
957 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
958 const auto &ModPaths = Summary->modulePaths();
961 return createStringError(
963 "combined summary should contain Regular LTO module");
964 return ErrorSuccess();
965}
966
967bool DevirtModule::runForTesting(
968 Module &M, function_ref<AAResults &(Function &)> AARGetter,
970 function_ref<DominatorTree &(Function &)> LookupDomTree) {
971 std::unique_ptr<ModuleSummaryIndex> Summary =
972 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
973
974 // Handle the command-line summary arguments. This code is for testing
975 // purposes only, so we handle errors directly.
976 if (!ClReadSummary.empty()) {
977 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
978 ": ");
979 auto ReadSummaryFile =
981 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
982 getModuleSummaryIndex(*ReadSummaryFile)) {
983 Summary = std::move(*SummaryOrErr);
984 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
985 } else {
986 // Try YAML if we've failed with bitcode.
987 consumeError(SummaryOrErr.takeError());
988 yaml::Input In(ReadSummaryFile->getBuffer());
989 In >> *Summary;
990 ExitOnErr(errorCodeToError(In.error()));
991 }
992 }
993
994 bool Changed =
995 DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
997 : nullptr,
999 : nullptr)
1000 .run();
1001
1002 if (!ClWriteSummary.empty()) {
1003 ExitOnError ExitOnErr(
1004 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1005 std::error_code EC;
1006 if (StringRef(ClWriteSummary).ends_with(".bc")) {
1008 ExitOnErr(errorCodeToError(EC));
1009 writeIndexToFile(*Summary, OS);
1010 } else {
1012 ExitOnErr(errorCodeToError(EC));
1013 yaml::Output Out(OS);
1014 Out << *Summary;
1015 }
1016 }
1017
1018 return Changed;
1019}
1020
1021void DevirtModule::buildTypeIdentifierMap(
1022 std::vector<VTableBits> &Bits,
1023 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1025 Bits.reserve(M.global_size());
1027 for (GlobalVariable &GV : M.globals()) {
1028 Types.clear();
1029 GV.getMetadata(LLVMContext::MD_type, Types);
1030 if (GV.isDeclaration() || Types.empty())
1031 continue;
1032
1033 VTableBits *&BitsPtr = GVToBits[&GV];
1034 if (!BitsPtr) {
1035 Bits.emplace_back();
1036 Bits.back().GV = &GV;
1037 Bits.back().ObjectSize =
1038 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1039 BitsPtr = &Bits.back();
1040 }
1041
1042 for (MDNode *Type : Types) {
1043 auto TypeID = Type->getOperand(1).get();
1044
1046 cast<ConstantInt>(
1047 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1048 ->getZExtValue();
1049
1050 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1051 }
1052 }
1053}
1054
1055bool DevirtModule::tryFindVirtualCallTargets(
1056 std::vector<VirtualCallTarget> &TargetsForSlot,
1057 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1058 ModuleSummaryIndex *ExportSummary) {
1059 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1060 if (!TM.Bits->GV->isConstant())
1061 return false;
1062
1063 // We cannot perform whole program devirtualization analysis on a vtable
1064 // with public LTO visibility.
1065 if (TM.Bits->GV->getVCallVisibility() ==
1067 return false;
1068
1069 Function *Fn = nullptr;
1070 Constant *C = nullptr;
1071 std::tie(Fn, C) =
1072 getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);
1073
1074 if (!Fn)
1075 return false;
1076
1077 if (FunctionsToSkip.match(Fn->getName()))
1078 return false;
1079
1080 // We can disregard __cxa_pure_virtual as a possible call target, as
1081 // calls to pure virtuals are UB.
1082 if (Fn->getName() == "__cxa_pure_virtual")
1083 continue;
1084
1085 // We can disregard unreachable functions as possible call targets, as
1086 // unreachable functions shouldn't be called.
1087 if (mustBeUnreachableFunction(Fn, ExportSummary))
1088 continue;
1089
1090 // Save the symbol used in the vtable to use as the devirtualization
1091 // target.
1092 auto GV = dyn_cast<GlobalValue>(C);
1093 assert(GV);
1094 TargetsForSlot.push_back({GV, &TM});
1095 }
1096
1097 // Give up if we couldn't find any targets.
1098 return !TargetsForSlot.empty();
1099}
1100
1101bool DevirtIndex::tryFindVirtualCallTargets(
1102 std::vector<ValueInfo> &TargetsForSlot,
1103 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1104 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1105 // Find a representative copy of the vtable initializer.
1106 // We can have multiple available_externally, linkonce_odr and weak_odr
1107 // vtable initializers. We can also have multiple external vtable
1108 // initializers in the case of comdats, which we cannot check here.
1109 // The linker should give an error in this case.
1110 //
1111 // Also, handle the case of same-named local Vtables with the same path
1112 // and therefore the same GUID. This can happen if there isn't enough
1113 // distinguishing path when compiling the source file. In that case we
1114 // conservatively return false early.
1115 const GlobalVarSummary *VS = nullptr;
1116 bool LocalFound = false;
1117 for (const auto &S : P.VTableVI.getSummaryList()) {
1118 if (GlobalValue::isLocalLinkage(S->linkage())) {
1119 if (LocalFound)
1120 return false;
1121 LocalFound = true;
1122 }
1123 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1124 if (!CurVS->vTableFuncs().empty() ||
1125 // Previously clang did not attach the necessary type metadata to
1126 // available_externally vtables, in which case there would not
1127 // be any vtable functions listed in the summary and we need
1128 // to treat this case conservatively (in case the bitcode is old).
1129 // However, we will also not have any vtable functions in the
1130 // case of a pure virtual base class. In that case we do want
1131 // to set VS to avoid treating it conservatively.
1133 VS = CurVS;
1134 // We cannot perform whole program devirtualization analysis on a vtable
1135 // with public LTO visibility.
1136 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1137 return false;
1138 }
1139 }
1140 // There will be no VS if all copies are available_externally having no
1141 // type metadata. In that case we can't safely perform WPD.
1142 if (!VS)
1143 return false;
1144 if (!VS->isLive())
1145 continue;
1146 for (auto VTP : VS->vTableFuncs()) {
1147 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1148 continue;
1149
1150 if (mustBeUnreachableFunction(VTP.FuncVI))
1151 continue;
1152
1153 TargetsForSlot.push_back(VTP.FuncVI);
1154 }
1155 }
1156
1157 // Give up if we couldn't find any targets.
1158 return !TargetsForSlot.empty();
1159}
1160
1161void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1162 Constant *TheFn, bool &IsExported) {
1163 // Don't devirtualize function if we're told to skip it
1164 // in -wholeprogramdevirt-skip.
1165 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1166 return;
1167 auto Apply = [&](CallSiteInfo &CSInfo) {
1168 for (auto &&VCallSite : CSInfo.CallSites) {
1169 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1170 continue;
1171
1172 if (RemarksEnabled)
1173 VCallSite.emitRemark("single-impl",
1174 TheFn->stripPointerCasts()->getName(), OREGetter);
1175 NumSingleImpl++;
1176 auto &CB = VCallSite.CB;
1177 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1178 IRBuilder<> Builder(&CB);
1179 Value *Callee =
1180 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1181
1182 // If trap checking is enabled, add support to compare the virtual
1183 // function pointer to the devirtualized target. In case of a mismatch,
1184 // perform a debug trap.
1185 if (DevirtCheckMode == WPDCheckMode::Trap) {
1186 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1187 Instruction *ThenTerm =
1188 SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
1189 Builder.SetInsertPoint(ThenTerm);
1190 Function *TrapFn = Intrinsic::getDeclaration(&M, Intrinsic::debugtrap);
1191 auto *CallTrap = Builder.CreateCall(TrapFn);
1192 CallTrap->setDebugLoc(CB.getDebugLoc());
1193 }
1194
1195 // If fallback checking is enabled, add support to compare the virtual
1196 // function pointer to the devirtualized target. In case of a mismatch,
1197 // fall back to indirect call.
1198 if (DevirtCheckMode == WPDCheckMode::Fallback) {
1199 MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
1200 // Version the indirect call site. If the called value is equal to the
1201 // given callee, 'NewInst' will be executed, otherwise the original call
1202 // site will be executed.
1203 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1204 NewInst.setCalledOperand(Callee);
1205 // Since the new call site is direct, we must clear metadata that
1206 // is only appropriate for indirect calls. This includes !prof and
1207 // !callees metadata.
1208 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1209 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1210 // Additionally, we should remove them from the fallback indirect call,
1211 // so that we don't attempt to perform indirect call promotion later.
1212 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1213 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1214 }
1215
1216 // In either trapping or non-checking mode, devirtualize original call.
1217 else {
1218 // Devirtualize unconditionally.
1219 CB.setCalledOperand(Callee);
1220 // Since the call site is now direct, we must clear metadata that
1221 // is only appropriate for indirect calls. This includes !prof and
1222 // !callees metadata.
1223 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1224 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1225 if (CB.getCalledOperand() &&
1227 auto *NewCS = CallBase::removeOperandBundle(
1229 CB.replaceAllUsesWith(NewCS);
1230 // Schedule for deletion at the end of pass run.
1231 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1232 }
1233 }
1234
1235 // This use is no longer unsafe.
1236 if (VCallSite.NumUnsafeUses)
1237 --*VCallSite.NumUnsafeUses;
1238 }
1239 if (CSInfo.isExported())
1240 IsExported = true;
1241 CSInfo.markDevirt();
1242 };
1243 Apply(SlotInfo.CSInfo);
1244 for (auto &P : SlotInfo.ConstCSInfo)
1245 Apply(P.second);
1246}
1247
1248static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1249 // We can't add calls if we haven't seen a definition
1250 if (Callee.getSummaryList().empty())
1251 return false;
1252
1253 // Insert calls into the summary index so that the devirtualized targets
1254 // are eligible for import.
1255 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1256 // to better ensure we have the opportunity to inline them.
1257 bool IsExported = false;
1258 auto &S = Callee.getSummaryList()[0];
1259 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,
1260 /* RelBF = */ 0);
1261 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1262 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1263 FS->addCall({Callee, CI});
1264 IsExported |= S->modulePath() != FS->modulePath();
1265 }
1266 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1267 FS->addCall({Callee, CI});
1268 IsExported |= S->modulePath() != FS->modulePath();
1269 }
1270 };
1271 AddCalls(SlotInfo.CSInfo);
1272 for (auto &P : SlotInfo.ConstCSInfo)
1273 AddCalls(P.second);
1274 return IsExported;
1275}
1276
1277bool DevirtModule::trySingleImplDevirt(
1278 ModuleSummaryIndex *ExportSummary,
1279 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1281 // See if the program contains a single implementation of this virtual
1282 // function.
1283 auto *TheFn = TargetsForSlot[0].Fn;
1284 for (auto &&Target : TargetsForSlot)
1285 if (TheFn != Target.Fn)
1286 return false;
1287
1288 // If so, update each call site to call that implementation directly.
1289 if (RemarksEnabled || AreStatisticsEnabled())
1290 TargetsForSlot[0].WasDevirt = true;
1291
1292 bool IsExported = false;
1293 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1294 if (!IsExported)
1295 return false;
1296
1297 // If the only implementation has local linkage, we must promote to external
1298 // to make it visible to thin LTO objects. We can only get here during the
1299 // ThinLTO export phase.
1300 if (TheFn->hasLocalLinkage()) {
1301 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1302
1303 // Since we are renaming the function, any comdats with the same name must
1304 // also be renamed. This is required when targeting COFF, as the comdat name
1305 // must match one of the names of the symbols in the comdat.
1306 if (Comdat *C = TheFn->getComdat()) {
1307 if (C->getName() == TheFn->getName()) {
1308 Comdat *NewC = M.getOrInsertComdat(NewName);
1309 NewC->setSelectionKind(C->getSelectionKind());
1310 for (GlobalObject &GO : M.global_objects())
1311 if (GO.getComdat() == C)
1312 GO.setComdat(NewC);
1313 }
1314 }
1315
1316 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1317 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1318 TheFn->setName(NewName);
1319 }
1320 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1321 // Any needed promotion of 'TheFn' has already been done during
1322 // LTO unit split, so we can ignore return value of AddCalls.
1323 AddCalls(SlotInfo, TheFnVI);
1324
1326 Res->SingleImplName = std::string(TheFn->getName());
1327
1328 return true;
1329}
1330
1331bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1332 VTableSlotSummary &SlotSummary,
1333 VTableSlotInfo &SlotInfo,
1335 std::set<ValueInfo> &DevirtTargets) {
1336 // See if the program contains a single implementation of this virtual
1337 // function.
1338 auto TheFn = TargetsForSlot[0];
1339 for (auto &&Target : TargetsForSlot)
1340 if (TheFn != Target)
1341 return false;
1342
1343 // Don't devirtualize if we don't have target definition.
1344 auto Size = TheFn.getSummaryList().size();
1345 if (!Size)
1346 return false;
1347
1348 // Don't devirtualize function if we're told to skip it
1349 // in -wholeprogramdevirt-skip.
1350 if (FunctionsToSkip.match(TheFn.name()))
1351 return false;
1352
1353 // If the summary list contains multiple summaries where at least one is
1354 // a local, give up, as we won't know which (possibly promoted) name to use.
1355 for (const auto &S : TheFn.getSummaryList())
1356 if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
1357 return false;
1358
1359 // Collect functions devirtualized at least for one call site for stats.
1361 DevirtTargets.insert(TheFn);
1362
1363 auto &S = TheFn.getSummaryList()[0];
1364 bool IsExported = AddCalls(SlotInfo, TheFn);
1365 if (IsExported)
1366 ExportedGUIDs.insert(TheFn.getGUID());
1367
1368 // Record in summary for use in devirtualization during the ThinLTO import
1369 // step.
1371 if (GlobalValue::isLocalLinkage(S->linkage())) {
1372 if (IsExported)
1373 // If target is a local function and we are exporting it by
1374 // devirtualizing a call in another module, we need to record the
1375 // promoted name.
1377 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1378 else {
1379 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1380 Res->SingleImplName = std::string(TheFn.name());
1381 }
1382 } else
1383 Res->SingleImplName = std::string(TheFn.name());
1384
1385 // Name will be empty if this thin link driven off of serialized combined
1386 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1387 // legacy LTO API anyway.
1388 assert(!Res->SingleImplName.empty());
1389
1390 return true;
1391}
1392
1393void DevirtModule::tryICallBranchFunnel(
1394 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1395 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1396 Triple T(M.getTargetTriple());
1397 if (T.getArch() != Triple::x86_64)
1398 return;
1399
1400 if (TargetsForSlot.size() > ClThreshold)
1401 return;
1402
1403 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1404 if (!HasNonDevirt)
1405 for (auto &P : SlotInfo.ConstCSInfo)
1406 if (!P.second.AllCallSitesDevirted) {
1407 HasNonDevirt = true;
1408 break;
1409 }
1410
1411 if (!HasNonDevirt)
1412 return;
1413
1414 FunctionType *FT =
1415 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1416 Function *JT;
1417 if (isa<MDString>(Slot.TypeID)) {
1419 M.getDataLayout().getProgramAddressSpace(),
1420 getGlobalName(Slot, {}, "branch_funnel"), &M);
1421 JT->setVisibility(GlobalValue::HiddenVisibility);
1422 } else {
1424 M.getDataLayout().getProgramAddressSpace(),
1425 "branch_funnel", &M);
1426 }
1427 JT->addParamAttr(0, Attribute::Nest);
1428
1429 std::vector<Value *> JTArgs;
1430 JTArgs.push_back(JT->arg_begin());
1431 for (auto &T : TargetsForSlot) {
1432 JTArgs.push_back(getMemberAddr(T.TM));
1433 JTArgs.push_back(T.Fn);
1434 }
1435
1436 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1437 Function *Intr =
1438 Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
1439
1440 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1441 CI->setTailCallKind(CallInst::TCK_MustTail);
1442 ReturnInst::Create(M.getContext(), nullptr, BB);
1443
1444 bool IsExported = false;
1445 applyICallBranchFunnel(SlotInfo, JT, IsExported);
1446 if (IsExported)
1448}
1449
1450void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1451 Constant *JT, bool &IsExported) {
1452 auto Apply = [&](CallSiteInfo &CSInfo) {
1453 if (CSInfo.isExported())
1454 IsExported = true;
1455 if (CSInfo.AllCallSitesDevirted)
1456 return;
1457
1458 std::map<CallBase *, CallBase *> CallBases;
1459 for (auto &&VCallSite : CSInfo.CallSites) {
1460 CallBase &CB = VCallSite.CB;
1461
1462 if (CallBases.find(&CB) != CallBases.end()) {
1463 // When finding devirtualizable calls, it's possible to find the same
1464 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1465 // calls, which can cause duplicate call sites to be recorded in
1466 // [Const]CallSites. If we've already found one of these
1467 // call instances, just ignore it. It will be replaced later.
1468 continue;
1469 }
1470
1471 // Jump tables are only profitable if the retpoline mitigation is enabled.
1472 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1473 if (!FSAttr.isValid() ||
1474 !FSAttr.getValueAsString().contains("+retpoline"))
1475 continue;
1476
1477 NumBranchFunnel++;
1478 if (RemarksEnabled)
1479 VCallSite.emitRemark("branch-funnel",
1480 JT->stripPointerCasts()->getName(), OREGetter);
1481
1482 // Pass the address of the vtable in the nest register, which is r10 on
1483 // x86_64.
1484 std::vector<Type *> NewArgs;
1485 NewArgs.push_back(Int8PtrTy);
1486 append_range(NewArgs, CB.getFunctionType()->params());
1487 FunctionType *NewFT =
1489 CB.getFunctionType()->isVarArg());
1490 PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
1491
1492 IRBuilder<> IRB(&CB);
1493 std::vector<Value *> Args;
1494 Args.push_back(VCallSite.VTable);
1495 llvm::append_range(Args, CB.args());
1496
1497 CallBase *NewCS = nullptr;
1498 if (isa<CallInst>(CB))
1499 NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
1500 else
1501 NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
1502 cast<InvokeInst>(CB).getNormalDest(),
1503 cast<InvokeInst>(CB).getUnwindDest(), Args);
1504 NewCS->setCallingConv(CB.getCallingConv());
1505
1507 std::vector<AttributeSet> NewArgAttrs;
1508 NewArgAttrs.push_back(AttributeSet::get(
1509 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1510 M.getContext(), Attribute::Nest)}));
1511 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1512 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1513 NewCS->setAttributes(
1514 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1515 Attrs.getRetAttrs(), NewArgAttrs));
1516
1517 CallBases[&CB] = NewCS;
1518
1519 // This use is no longer unsafe.
1520 if (VCallSite.NumUnsafeUses)
1521 --*VCallSite.NumUnsafeUses;
1522 }
1523 // Don't mark as devirtualized because there may be callers compiled without
1524 // retpoline mitigation, which would mean that they are lowered to
1525 // llvm.type.test and therefore require an llvm.type.test resolution for the
1526 // type identifier.
1527
1528 for (auto &[Old, New] : CallBases) {
1529 Old->replaceAllUsesWith(New);
1530 Old->eraseFromParent();
1531 }
1532 };
1533 Apply(SlotInfo.CSInfo);
1534 for (auto &P : SlotInfo.ConstCSInfo)
1535 Apply(P.second);
1536}
1537
1538bool DevirtModule::tryEvaluateFunctionsWithArgs(
1540 ArrayRef<uint64_t> Args) {
1541 // Evaluate each function and store the result in each target's RetVal
1542 // field.
1543 for (VirtualCallTarget &Target : TargetsForSlot) {
1544 // TODO: Skip for now if the vtable symbol was an alias to a function,
1545 // need to evaluate whether it would be correct to analyze the aliasee
1546 // function for this optimization.
1547 auto Fn = dyn_cast<Function>(Target.Fn);
1548 if (!Fn)
1549 return false;
1550
1551 if (Fn->arg_size() != Args.size() + 1)
1552 return false;
1553
1554 Evaluator Eval(M.getDataLayout(), nullptr);
1556 EvalArgs.push_back(
1558 for (unsigned I = 0; I != Args.size(); ++I) {
1559 auto *ArgTy =
1560 dyn_cast<IntegerType>(Fn->getFunctionType()->getParamType(I + 1));
1561 if (!ArgTy)
1562 return false;
1563 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1564 }
1565
1566 Constant *RetVal;
1567 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1568 !isa<ConstantInt>(RetVal))
1569 return false;
1570 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1571 }
1572 return true;
1573}
1574
1575void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1576 uint64_t TheRetVal) {
1577 for (auto Call : CSInfo.CallSites) {
1578 if (!OptimizedCalls.insert(&Call.CB).second)
1579 continue;
1580 NumUniformRetVal++;
1581 Call.replaceAndErase(
1582 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1583 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1584 }
1585 CSInfo.markDevirt();
1586}
1587
1588bool DevirtModule::tryUniformRetValOpt(
1589 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1591 // Uniform return value optimization. If all functions return the same
1592 // constant, replace all calls with that constant.
1593 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1594 for (const VirtualCallTarget &Target : TargetsForSlot)
1595 if (Target.RetVal != TheRetVal)
1596 return false;
1597
1598 if (CSInfo.isExported()) {
1600 Res->Info = TheRetVal;
1601 }
1602
1603 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1604 if (RemarksEnabled || AreStatisticsEnabled())
1605 for (auto &&Target : TargetsForSlot)
1606 Target.WasDevirt = true;
1607 return true;
1608}
1609
1610std::string DevirtModule::getGlobalName(VTableSlot Slot,
1611 ArrayRef<uint64_t> Args,
1612 StringRef Name) {
1613 std::string FullName = "__typeid_";
1614 raw_string_ostream OS(FullName);
1615 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1616 for (uint64_t Arg : Args)
1617 OS << '_' << Arg;
1618 OS << '_' << Name;
1619 return FullName;
1620}
1621
1622bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1623 Triple T(M.getTargetTriple());
1624 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1625}
1626
1627void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1630 getGlobalName(Slot, Args, Name), C, &M);
1632}
1633
1634void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1635 StringRef Name, uint32_t Const,
1636 uint32_t &Storage) {
1637 if (shouldExportConstantsAsAbsoluteSymbols()) {
1638 exportGlobal(
1639 Slot, Args, Name,
1640 ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
1641 return;
1642 }
1643
1644 Storage = Const;
1645}
1646
1647Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1648 StringRef Name) {
1649 Constant *C =
1650 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1651 auto *GV = dyn_cast<GlobalVariable>(C);
1652 if (GV)
1654 return C;
1655}
1656
1657Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1658 StringRef Name, IntegerType *IntTy,
1659 uint32_t Storage) {
1660 if (!shouldExportConstantsAsAbsoluteSymbols())
1661 return ConstantInt::get(IntTy, Storage);
1662
1663 Constant *C = importGlobal(Slot, Args, Name);
1664 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1665 C = ConstantExpr::getPtrToInt(C, IntTy);
1666
1667 // We only need to set metadata if the global is newly created, in which
1668 // case it would not have hidden visibility.
1669 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1670 return C;
1671
1672 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1673 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1674 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1675 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1676 MDNode::get(M.getContext(), {MinC, MaxC}));
1677 };
1678 unsigned AbsWidth = IntTy->getBitWidth();
1679 if (AbsWidth == IntPtrTy->getBitWidth())
1680 SetAbsRange(~0ull, ~0ull); // Full set.
1681 else
1682 SetAbsRange(0, 1ull << AbsWidth);
1683 return C;
1684}
1685
1686void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1687 bool IsOne,
1688 Constant *UniqueMemberAddr) {
1689 for (auto &&Call : CSInfo.CallSites) {
1690 if (!OptimizedCalls.insert(&Call.CB).second)
1691 continue;
1692 IRBuilder<> B(&Call.CB);
1693 Value *Cmp =
1694 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1695 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1696 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1697 NumUniqueRetVal++;
1698 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1699 Cmp);
1700 }
1701 CSInfo.markDevirt();
1702}
1703
1704Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1705 return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
1706 ConstantInt::get(Int64Ty, M->Offset));
1707}
1708
1709bool DevirtModule::tryUniqueRetValOpt(
1710 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1711 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1712 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1713 // IsOne controls whether we look for a 0 or a 1.
1714 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1715 const TypeMemberInfo *UniqueMember = nullptr;
1716 for (const VirtualCallTarget &Target : TargetsForSlot) {
1717 if (Target.RetVal == (IsOne ? 1 : 0)) {
1718 if (UniqueMember)
1719 return false;
1720 UniqueMember = Target.TM;
1721 }
1722 }
1723
1724 // We should have found a unique member or bailed out by now. We already
1725 // checked for a uniform return value in tryUniformRetValOpt.
1726 assert(UniqueMember);
1727
1728 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1729 if (CSInfo.isExported()) {
1731 Res->Info = IsOne;
1732
1733 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1734 }
1735
1736 // Replace each call with the comparison.
1737 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1738 UniqueMemberAddr);
1739
1740 // Update devirtualization statistics for targets.
1741 if (RemarksEnabled || AreStatisticsEnabled())
1742 for (auto &&Target : TargetsForSlot)
1743 Target.WasDevirt = true;
1744
1745 return true;
1746 };
1747
1748 if (BitWidth == 1) {
1749 if (tryUniqueRetValOptFor(true))
1750 return true;
1751 if (tryUniqueRetValOptFor(false))
1752 return true;
1753 }
1754 return false;
1755}
1756
1757void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1758 Constant *Byte, Constant *Bit) {
1759 for (auto Call : CSInfo.CallSites) {
1760 if (!OptimizedCalls.insert(&Call.CB).second)
1761 continue;
1762 auto *RetType = cast<IntegerType>(Call.CB.getType());
1763 IRBuilder<> B(&Call.CB);
1764 Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);
1765 if (RetType->getBitWidth() == 1) {
1766 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1767 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1768 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1769 NumVirtConstProp1Bit++;
1770 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1771 OREGetter, IsBitSet);
1772 } else {
1773 Value *Val = B.CreateLoad(RetType, Addr);
1774 NumVirtConstProp++;
1775 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1776 OREGetter, Val);
1777 }
1778 }
1779 CSInfo.markDevirt();
1780}
1781
1782bool DevirtModule::tryVirtualConstProp(
1783 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1784 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1785 // TODO: Skip for now if the vtable symbol was an alias to a function,
1786 // need to evaluate whether it would be correct to analyze the aliasee
1787 // function for this optimization.
1788 auto Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1789 if (!Fn)
1790 return false;
1791 // This only works if the function returns an integer.
1792 auto RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1793 if (!RetType)
1794 return false;
1795 unsigned BitWidth = RetType->getBitWidth();
1796 if (BitWidth > 64)
1797 return false;
1798
1799 // Make sure that each function is defined, does not access memory, takes at
1800 // least one argument, does not use its first argument (which we assume is
1801 // 'this'), and has the same return type.
1802 //
1803 // Note that we test whether this copy of the function is readnone, rather
1804 // than testing function attributes, which must hold for any copy of the
1805 // function, even a less optimized version substituted at link time. This is
1806 // sound because the virtual constant propagation optimizations effectively
1807 // inline all implementations of the virtual function into each call site,
1808 // rather than using function attributes to perform local optimization.
1809 for (VirtualCallTarget &Target : TargetsForSlot) {
1810 // TODO: Skip for now if the vtable symbol was an alias to a function,
1811 // need to evaluate whether it would be correct to analyze the aliasee
1812 // function for this optimization.
1813 auto Fn = dyn_cast<Function>(Target.Fn);
1814 if (!Fn)
1815 return false;
1816
1817 if (Fn->isDeclaration() ||
1818 !computeFunctionBodyMemoryAccess(*Fn, AARGetter(*Fn))
1819 .doesNotAccessMemory() ||
1820 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1821 Fn->getReturnType() != RetType)
1822 return false;
1823 }
1824
1825 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1826 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1827 continue;
1828
1829 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1830 if (Res)
1831 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1832
1833 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1834 continue;
1835
1836 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1837 ResByArg, Slot, CSByConstantArg.first))
1838 continue;
1839
1840 // Find an allocation offset in bits in all vtables associated with the
1841 // type.
1842 uint64_t AllocBefore =
1843 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1844 uint64_t AllocAfter =
1845 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1846
1847 // Calculate the total amount of padding needed to store a value at both
1848 // ends of the object.
1849 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1850 for (auto &&Target : TargetsForSlot) {
1851 TotalPaddingBefore += std::max<int64_t>(
1852 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1853 TotalPaddingAfter += std::max<int64_t>(
1854 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1855 }
1856
1857 // If the amount of padding is too large, give up.
1858 // FIXME: do something smarter here.
1859 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1860 continue;
1861
1862 // Calculate the offset to the value as a (possibly negative) byte offset
1863 // and (if applicable) a bit offset, and store the values in the targets.
1864 int64_t OffsetByte;
1865 uint64_t OffsetBit;
1866 if (TotalPaddingBefore <= TotalPaddingAfter)
1867 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1868 OffsetBit);
1869 else
1870 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1871 OffsetBit);
1872
1873 if (RemarksEnabled || AreStatisticsEnabled())
1874 for (auto &&Target : TargetsForSlot)
1875 Target.WasDevirt = true;
1876
1877
1878 if (CSByConstantArg.second.isExported()) {
1880 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
1881 ResByArg->Byte);
1882 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
1883 ResByArg->Bit);
1884 }
1885
1886 // Rewrite each call to a load from OffsetByte/OffsetBit.
1887 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
1888 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
1889 applyVirtualConstProp(CSByConstantArg.second,
1890 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
1891 }
1892 return true;
1893}
1894
1895void DevirtModule::rebuildGlobal(VTableBits &B) {
1896 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
1897 return;
1898
1899 // Align the before byte array to the global's minimum alignment so that we
1900 // don't break any alignment requirements on the global.
1901 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
1902 B.GV->getAlign(), B.GV->getValueType());
1903 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
1904
1905 // Before was stored in reverse order; flip it now.
1906 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
1907 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
1908
1909 // Build an anonymous global containing the before bytes, followed by the
1910 // original initializer, followed by the after bytes.
1911 auto NewInit = ConstantStruct::getAnon(
1912 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
1913 B.GV->getInitializer(),
1914 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
1915 auto NewGV =
1916 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
1917 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
1918 NewGV->setSection(B.GV->getSection());
1919 NewGV->setComdat(B.GV->getComdat());
1920 NewGV->setAlignment(B.GV->getAlign());
1921
1922 // Copy the original vtable's metadata to the anonymous global, adjusting
1923 // offsets as required.
1924 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
1925
1926 // Build an alias named after the original global, pointing at the second
1927 // element (the original initializer).
1928 auto Alias = GlobalAlias::create(
1929 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
1931 NewInit->getType(), NewGV,
1932 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
1933 ConstantInt::get(Int32Ty, 1)}),
1934 &M);
1935 Alias->setVisibility(B.GV->getVisibility());
1936 Alias->takeName(B.GV);
1937
1938 B.GV->replaceAllUsesWith(Alias);
1939 B.GV->eraseFromParent();
1940}
1941
1942bool DevirtModule::areRemarksEnabled() {
1943 const auto &FL = M.getFunctionList();
1944 for (const Function &Fn : FL) {
1945 if (Fn.empty())
1946 continue;
1947 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
1948 return DI.isEnabled();
1949 }
1950 return false;
1951}
1952
1953void DevirtModule::scanTypeTestUsers(
1954 Function *TypeTestFunc,
1955 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1956 // Find all virtual calls via a virtual table pointer %p under an assumption
1957 // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
1958 // points to a member of the type identifier %md. Group calls by (type ID,
1959 // offset) pair (effectively the identity of the virtual function) and store
1960 // to CallSlots.
1961 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
1962 auto *CI = dyn_cast<CallInst>(U.getUser());
1963 if (!CI)
1964 continue;
1965
1966 // Search for virtual calls based on %p and add them to DevirtCalls.
1969 auto &DT = LookupDomTree(*CI->getFunction());
1970 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1971
1972 Metadata *TypeId =
1973 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
1974 // If we found any, add them to CallSlots.
1975 if (!Assumes.empty()) {
1976 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
1977 for (DevirtCallSite Call : DevirtCalls)
1978 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
1979 }
1980
1981 auto RemoveTypeTestAssumes = [&]() {
1982 // We no longer need the assumes or the type test.
1983 for (auto *Assume : Assumes)
1984 Assume->eraseFromParent();
1985 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
1986 // may use the vtable argument later.
1987 if (CI->use_empty())
1988 CI->eraseFromParent();
1989 };
1990
1991 // At this point we could remove all type test assume sequences, as they
1992 // were originally inserted for WPD. However, we can keep these in the
1993 // code stream for later analysis (e.g. to help drive more efficient ICP
1994 // sequences). They will eventually be removed by a second LowerTypeTests
1995 // invocation that cleans them up. In order to do this correctly, the first
1996 // LowerTypeTests invocation needs to know that they have "Unknown" type
1997 // test resolution, so that they aren't treated as Unsat and lowered to
1998 // False, which will break any uses on assumes. Below we remove any type
1999 // test assumes that will not be treated as Unknown by LTT.
2000
2001 // The type test assumes will be treated by LTT as Unsat if the type id is
2002 // not used on a global (in which case it has no entry in the TypeIdMap).
2003 if (!TypeIdMap.count(TypeId))
2004 RemoveTypeTestAssumes();
2005
2006 // For ThinLTO importing, we need to remove the type test assumes if this is
2007 // an MDString type id without a corresponding TypeIdSummary. Any
2008 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2009 // type test assumes can be kept. If the MDString type id is missing a
2010 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2011 // exporting phase of WPD from analyzing it), then it would be treated as
2012 // Unsat by LTT and we need to remove its type test assumes here. If not
2013 // used on a vcall we don't need them for later optimization use in any
2014 // case.
2015 else if (ImportSummary && isa<MDString>(TypeId)) {
2016 const TypeIdSummary *TidSummary =
2017 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2018 if (!TidSummary)
2019 RemoveTypeTestAssumes();
2020 else
2021 // If one was created it should not be Unsat, because if we reached here
2022 // the type id was used on a global.
2024 }
2025 }
2026}
2027
2028void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2029 Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
2030
2031 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2032 auto *CI = dyn_cast<CallInst>(U.getUser());
2033 if (!CI)
2034 continue;
2035
2036 Value *Ptr = CI->getArgOperand(0);
2037 Value *Offset = CI->getArgOperand(1);
2038 Value *TypeIdValue = CI->getArgOperand(2);
2039 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2040
2044 bool HasNonCallUses = false;
2045 auto &DT = LookupDomTree(*CI->getFunction());
2046 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2047 HasNonCallUses, CI, DT);
2048
2049 // Start by generating "pessimistic" code that explicitly loads the function
2050 // pointer from the vtable and performs the type check. If possible, we will
2051 // eliminate the load and the type check later.
2052
2053 // If possible, only generate the load at the point where it is used.
2054 // This helps avoid unnecessary spills.
2055 IRBuilder<> LoadB(
2056 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2057
2058 Value *LoadedValue = nullptr;
2059 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2060 Intrinsic::type_checked_load_relative) {
2061 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2062 LoadedValue = LoadB.CreateLoad(Int32Ty, GEP);
2063 LoadedValue = LoadB.CreateSExt(LoadedValue, IntPtrTy);
2064 GEP = LoadB.CreatePtrToInt(GEP, IntPtrTy);
2065 LoadedValue = LoadB.CreateAdd(GEP, LoadedValue);
2066 LoadedValue = LoadB.CreateIntToPtr(LoadedValue, Int8PtrTy);
2067 } else {
2068 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2069 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2070 }
2071
2072 for (Instruction *LoadedPtr : LoadedPtrs) {
2073 LoadedPtr->replaceAllUsesWith(LoadedValue);
2074 LoadedPtr->eraseFromParent();
2075 }
2076
2077 // Likewise for the type test.
2078 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2079 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2080
2081 for (Instruction *Pred : Preds) {
2082 Pred->replaceAllUsesWith(TypeTestCall);
2083 Pred->eraseFromParent();
2084 }
2085
2086 // We have already erased any extractvalue instructions that refer to the
2087 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2088 // (although this is unlikely). In that case, explicitly build a pair and
2089 // RAUW it.
2090 if (!CI->use_empty()) {
2091 Value *Pair = PoisonValue::get(CI->getType());
2092 IRBuilder<> B(CI);
2093 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2094 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2095 CI->replaceAllUsesWith(Pair);
2096 }
2097
2098 // The number of unsafe uses is initially the number of uses.
2099 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2100 NumUnsafeUses = DevirtCalls.size();
2101
2102 // If the function pointer has a non-call user, we cannot eliminate the type
2103 // check, as one of those users may eventually call the pointer. Increment
2104 // the unsafe use count to make sure it cannot reach zero.
2105 if (HasNonCallUses)
2106 ++NumUnsafeUses;
2107 for (DevirtCallSite Call : DevirtCalls) {
2108 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2109 &NumUnsafeUses);
2110 }
2111
2112 CI->eraseFromParent();
2113 }
2114}
2115
2116void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2117 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2118 if (!TypeId)
2119 return;
2120 const TypeIdSummary *TidSummary =
2121 ImportSummary->getTypeIdSummary(TypeId->getString());
2122 if (!TidSummary)
2123 return;
2124 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2125 if (ResI == TidSummary->WPDRes.end())
2126 return;
2127 const WholeProgramDevirtResolution &Res = ResI->second;
2128
2130 assert(!Res.SingleImplName.empty());
2131 // The type of the function in the declaration is irrelevant because every
2132 // call site will cast it to the correct type.
2133 Constant *SingleImpl =
2134 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2135 Type::getVoidTy(M.getContext()))
2136 .getCallee());
2137
2138 // This is the import phase so we should not be exporting anything.
2139 bool IsExported = false;
2140 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2141 assert(!IsExported);
2142 }
2143
2144 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2145 auto I = Res.ResByArg.find(CSByConstantArg.first);
2146 if (I == Res.ResByArg.end())
2147 continue;
2148 auto &ResByArg = I->second;
2149 // FIXME: We should figure out what to do about the "function name" argument
2150 // to the apply* functions, as the function names are unavailable during the
2151 // importing phase. For now we just pass the empty string. This does not
2152 // impact correctness because the function names are just used for remarks.
2153 switch (ResByArg.TheKind) {
2155 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2156 break;
2158 Constant *UniqueMemberAddr =
2159 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2160 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2161 UniqueMemberAddr);
2162 break;
2163 }
2165 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2166 Int32Ty, ResByArg.Byte);
2167 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2168 ResByArg.Bit);
2169 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2170 break;
2171 }
2172 default:
2173 break;
2174 }
2175 }
2176
2178 // The type of the function is irrelevant, because it's bitcast at calls
2179 // anyhow.
2180 Constant *JT = cast<Constant>(
2181 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2182 Type::getVoidTy(M.getContext()))
2183 .getCallee());
2184 bool IsExported = false;
2185 applyICallBranchFunnel(SlotInfo, JT, IsExported);
2186 assert(!IsExported);
2187 }
2188}
2189
2190void DevirtModule::removeRedundantTypeTests() {
2191 auto True = ConstantInt::getTrue(M.getContext());
2192 for (auto &&U : NumUnsafeUsesForTypeTest) {
2193 if (U.second == 0) {
2194 U.first->replaceAllUsesWith(True);
2195 U.first->eraseFromParent();
2196 }
2197 }
2198}
2199
2201DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2202 ModuleSummaryIndex *ExportSummary) {
2203 assert((ExportSummary != nullptr) &&
2204 "Caller guarantees ExportSummary is not nullptr");
2205
2206 const auto TheFnGUID = TheFn->getGUID();
2207 const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());
2208 // Look up ValueInfo with the GUID in the current linkage.
2209 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2210 // If no entry is found and GUID is different from GUID computed using
2211 // exported name, look up ValueInfo with the exported name unconditionally.
2212 // This is a fallback.
2213 //
2214 // The reason to have a fallback:
2215 // 1. LTO could enable global value internalization via
2216 // `enable-lto-internalization`.
2217 // 2. The GUID in ExportedSummary is computed using exported name.
2218 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2219 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2220 }
2221 return TheFnVI;
2222}
2223
2224bool DevirtModule::mustBeUnreachableFunction(
2225 Function *const F, ModuleSummaryIndex *ExportSummary) {
2226 // First, learn unreachability by analyzing function IR.
2227 if (!F->isDeclaration()) {
2228 // A function must be unreachable if its entry block ends with an
2229 // 'unreachable'.
2230 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2231 }
2232 // Learn unreachability from ExportSummary if ExportSummary is present.
2233 return ExportSummary &&
2235 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2236}
2237
2238bool DevirtModule::run() {
2239 // If only some of the modules were split, we cannot correctly perform
2240 // this transformation. We already checked for the presense of type tests
2241 // with partially split modules during the thin link, and would have emitted
2242 // an error if any were found, so here we can simply return.
2243 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2244 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2245 return false;
2246
2247 Function *TypeTestFunc =
2248 M.getFunction(Intrinsic::getName(Intrinsic::type_test));
2249 Function *TypeCheckedLoadFunc =
2250 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
2251 Function *TypeCheckedLoadRelativeFunc =
2252 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative));
2253 Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
2254
2255 // Normally if there are no users of the devirtualization intrinsics in the
2256 // module, this pass has nothing to do. But if we are exporting, we also need
2257 // to handle any users that appear only in the function summaries.
2258 if (!ExportSummary &&
2259 (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
2260 AssumeFunc->use_empty()) &&
2261 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2262 (!TypeCheckedLoadRelativeFunc ||
2263 TypeCheckedLoadRelativeFunc->use_empty()))
2264 return false;
2265
2266 // Rebuild type metadata into a map for easy lookup.
2267 std::vector<VTableBits> Bits;
2269 buildTypeIdentifierMap(Bits, TypeIdMap);
2270
2271 if (TypeTestFunc && AssumeFunc)
2272 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2273
2274 if (TypeCheckedLoadFunc)
2275 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2276
2277 if (TypeCheckedLoadRelativeFunc)
2278 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2279
2280 if (ImportSummary) {
2281 for (auto &S : CallSlots)
2282 importResolution(S.first, S.second);
2283
2284 removeRedundantTypeTests();
2285
2286 // We have lowered or deleted the type intrinsics, so we will no longer have
2287 // enough information to reason about the liveness of virtual function
2288 // pointers in GlobalDCE.
2289 for (GlobalVariable &GV : M.globals())
2290 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2291
2292 // The rest of the code is only necessary when exporting or during regular
2293 // LTO, so we are done.
2294 return true;
2295 }
2296
2297 if (TypeIdMap.empty())
2298 return true;
2299
2300 // Collect information from summary about which calls to try to devirtualize.
2301 if (ExportSummary) {
2303 for (auto &P : TypeIdMap) {
2304 if (auto *TypeId = dyn_cast<MDString>(P.first))
2305 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
2306 TypeId);
2307 }
2308
2309 for (auto &P : *ExportSummary) {
2310 for (auto &S : P.second.SummaryList) {
2311 auto *FS = dyn_cast<FunctionSummary>(S.get());
2312 if (!FS)
2313 continue;
2314 // FIXME: Only add live functions.
2315 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2316 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2317 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2318 }
2319 }
2320 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2321 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2322 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2323 }
2324 }
2325 for (const FunctionSummary::ConstVCall &VC :
2326 FS->type_test_assume_const_vcalls()) {
2327 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2328 CallSlots[{MD, VC.VFunc.Offset}]
2329 .ConstCSInfo[VC.Args]
2330 .addSummaryTypeTestAssumeUser(FS);
2331 }
2332 }
2333 for (const FunctionSummary::ConstVCall &VC :
2334 FS->type_checked_load_const_vcalls()) {
2335 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2336 CallSlots[{MD, VC.VFunc.Offset}]
2337 .ConstCSInfo[VC.Args]
2338 .addSummaryTypeCheckedLoadUser(FS);
2339 }
2340 }
2341 }
2342 }
2343 }
2344
2345 // For each (type, offset) pair:
2346 bool DidVirtualConstProp = false;
2347 std::map<std::string, GlobalValue *> DevirtTargets;
2348 for (auto &S : CallSlots) {
2349 // Search each of the members of the type identifier for the virtual
2350 // function implementation at offset S.first.ByteOffset, and add to
2351 // TargetsForSlot.
2352 std::vector<VirtualCallTarget> TargetsForSlot;
2353 WholeProgramDevirtResolution *Res = nullptr;
2354 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2355 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2356 TypeMemberInfos.size())
2357 // For any type id used on a global's type metadata, create the type id
2358 // summary resolution regardless of whether we can devirtualize, so that
2359 // lower type tests knows the type id is not Unsat. If it was not used on
2360 // a global's type metadata, the TypeIdMap entry set will be empty, and
2361 // we don't want to create an entry (with the default Unknown type
2362 // resolution), which can prevent detection of the Unsat.
2363 Res = &ExportSummary
2364 ->getOrInsertTypeIdSummary(
2365 cast<MDString>(S.first.TypeID)->getString())
2366 .WPDRes[S.first.ByteOffset];
2367 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2368 S.first.ByteOffset, ExportSummary)) {
2369
2370 if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2371 DidVirtualConstProp |=
2372 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2373
2374 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2375 }
2376
2377 // Collect functions devirtualized at least for one call site for stats.
2378 if (RemarksEnabled || AreStatisticsEnabled())
2379 for (const auto &T : TargetsForSlot)
2380 if (T.WasDevirt)
2381 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2382 }
2383
2384 // CFI-specific: if we are exporting and any llvm.type.checked.load
2385 // intrinsics were *not* devirtualized, we need to add the resulting
2386 // llvm.type.test intrinsics to the function summaries so that the
2387 // LowerTypeTests pass will export them.
2388 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2389 auto GUID =
2390 GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
2391 for (auto *FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
2392 FS->addTypeTest(GUID);
2393 for (auto &CCS : S.second.ConstCSInfo)
2394 for (auto *FS : CCS.second.SummaryTypeCheckedLoadUsers)
2395 FS->addTypeTest(GUID);
2396 }
2397 }
2398
2399 if (RemarksEnabled) {
2400 // Generate remarks for each devirtualized function.
2401 for (const auto &DT : DevirtTargets) {
2402 GlobalValue *GV = DT.second;
2403 auto F = dyn_cast<Function>(GV);
2404 if (!F) {
2405 auto A = dyn_cast<GlobalAlias>(GV);
2406 assert(A && isa<Function>(A->getAliasee()));
2407 F = dyn_cast<Function>(A->getAliasee());
2408 assert(F);
2409 }
2410
2411 using namespace ore;
2412 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2413 << "devirtualized "
2414 << NV("FunctionName", DT.first));
2415 }
2416 }
2417
2418 NumDevirtTargets += DevirtTargets.size();
2419
2420 removeRedundantTypeTests();
2421
2422 // Rebuild each global we touched as part of virtual constant propagation to
2423 // include the before and after bytes.
2424 if (DidVirtualConstProp)
2425 for (VTableBits &B : Bits)
2426 rebuildGlobal(B);
2427
2428 // We have lowered or deleted the type intrinsics, so we will no longer have
2429 // enough information to reason about the liveness of virtual function
2430 // pointers in GlobalDCE.
2431 for (GlobalVariable &GV : M.globals())
2432 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2433
2434 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2435 CI->eraseFromParent();
2436
2437 return true;
2438}
2439
2440void DevirtIndex::run() {
2441 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2442 return;
2443
2445 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2446 NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
2447 // Create the type id summary resolution regardlness of whether we can
2448 // devirtualize, so that lower type tests knows the type id is used on
2449 // a global and not Unsat. We do this here rather than in the loop over the
2450 // CallSlots, since that handling will only see type tests that directly
2451 // feed assumes, and we would miss any that aren't currently handled by WPD
2452 // (such as type tests that feed assumes via phis).
2453 ExportSummary.getOrInsertTypeIdSummary(P.first);
2454 }
2455
2456 // Collect information from summary about which calls to try to devirtualize.
2457 for (auto &P : ExportSummary) {
2458 for (auto &S : P.second.SummaryList) {
2459 auto *FS = dyn_cast<FunctionSummary>(S.get());
2460 if (!FS)
2461 continue;
2462 // FIXME: Only add live functions.
2463 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2464 for (StringRef Name : NameByGUID[VF.GUID]) {
2465 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2466 }
2467 }
2468 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2469 for (StringRef Name : NameByGUID[VF.GUID]) {
2470 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2471 }
2472 }
2473 for (const FunctionSummary::ConstVCall &VC :
2474 FS->type_test_assume_const_vcalls()) {
2475 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2476 CallSlots[{Name, VC.VFunc.Offset}]
2477 .ConstCSInfo[VC.Args]
2478 .addSummaryTypeTestAssumeUser(FS);
2479 }
2480 }
2481 for (const FunctionSummary::ConstVCall &VC :
2482 FS->type_checked_load_const_vcalls()) {
2483 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2484 CallSlots[{Name, VC.VFunc.Offset}]
2485 .ConstCSInfo[VC.Args]
2486 .addSummaryTypeCheckedLoadUser(FS);
2487 }
2488 }
2489 }
2490 }
2491
2492 std::set<ValueInfo> DevirtTargets;
2493 // For each (type, offset) pair:
2494 for (auto &S : CallSlots) {
2495 // Search each of the members of the type identifier for the virtual
2496 // function implementation at offset S.first.ByteOffset, and add to
2497 // TargetsForSlot.
2498 std::vector<ValueInfo> TargetsForSlot;
2499 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2500 assert(TidSummary);
2501 // The type id summary would have been created while building the NameByGUID
2502 // map earlier.
2504 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2505 ->WPDRes[S.first.ByteOffset];
2506 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2507 S.first.ByteOffset)) {
2508
2509 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2510 DevirtTargets))
2511 continue;
2512 }
2513 }
2514
2515 // Optionally have the thin link print message for each devirtualized
2516 // function.
2518 for (const auto &DT : DevirtTargets)
2519 errs() << "Devirtualized call to " << DT << "\n";
2520
2521 NumDevirtTargets += DevirtTargets.size();
2522}
unsigned Intr
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Addr
std::string Name
uint64_t Size
Provides passes for computing function attributes based on interprocedural analyses.
#define DEBUG_TYPE
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, bool Skip)
Hexagon Common GEP
static cl::opt< std::string > ClReadSummary("lowertypetests-read-summary", cl::desc("Read summary from given YAML file before running pass"), cl::Hidden)
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< std::string > ClWriteSummary("lowertypetests-write-summary", cl::desc("Write summary to given YAML file after running pass"), cl::Hidden)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
static bool mustBeUnreachableFunction(const Function &F)
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:843
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:203
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1527
bool arg_empty() const
Definition: InstrTypes.h:1407
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Definition: InstrTypes.h:2143
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1546
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1323
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1401
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1501
static CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2281
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1280
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2267
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1253
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition: Constants.h:477
This is an important base class in LLVM.
Definition: Constant.h:42
const Constant * stripPointerCasts() const
Definition: Constant.h:218
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:335
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:37
Helper for check-and-exit error handling.
Definition: Error.h:1408
Tagged union holding either a T or a Error.
Definition: Error.h:481
Function summary information to aid decisions and implementation of importing.
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
bool isVarArg() const
Definition: DerivedTypes.h:123
Type * getReturnType() const
Definition: DerivedTypes.h:124
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:172
bool empty() const
Definition: Function.h:857
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
bool arg_empty() const
Definition: Function.h:900
const BasicBlock & front() const
Definition: Function.h:858
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:249
arg_iterator arg_begin()
Definition: Function.h:866
size_t arg_size() const
Definition: Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
Definition: GlobPattern.h:51
static Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:550
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:589
void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
Definition: Metadata.cpp:1494
VCallVisibility getVCallVisibility() const
Definition: Metadata.cpp:1822
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1539
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:565
void setVCallVisibilityMetadata(VCallVisibility Visibility)
Definition: Metadata.cpp:1812
static bool isLocalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:409
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:379
static GUID getGUID(StringRef GlobalName)
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: Globals.cpp:75
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:595
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Global variable summary information to aid decisions and implementation of importing.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
Definition: MDBuilder.cpp:42
Metadata node.
Definition: Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Root of the metadata hierarchy.
Definition: Metadata.h:62
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:409
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
static Type * getVoidTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1539
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:460
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1096
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
@ FS
Definition: X86.h:210
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ CommaSeparated
Definition: CommandLine.h:163
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:767
uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
@ Export
Export information to summary.
@ Import
Import information from summary.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1286
bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const std::map< std::string, GVSummaryMapTy > *ModuleToSummariesForIndex=nullptr, const GVSummaryPtrSet *DecSummaries=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition: Error.h:1226
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:111
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1069
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
A call site that could be devirtualized.
A specification for a virtual function call with all constant integer arguments.
An "identifier" for a virtual function.
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)