LLVM 17.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular and thin LTO pipelines:
28//
29// During regular LTO, the pass determines the best optimization for each
30// virtual call and applies the resolutions directly to virtual calls that are
31// eligible for virtual call optimization (i.e. calls that use either of the
32// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
33//
34// During hybrid Regular/ThinLTO, the pass operates in two phases:
35// - Export phase: this is run during the thin link over a single merged module
36// that contains all vtables with !type metadata that participate in the link.
37// The pass computes a resolution for each virtual call and stores it in the
38// type identifier summary.
39// - Import phase: this is run during the thin backends over the individual
40// modules. The pass applies the resolutions previously computed during the
41// import phase to each eligible virtual call.
42//
43// During ThinLTO, the pass operates in two phases:
44// - Export phase: this is run during the thin link over the index which
45// contains a summary of all vtables with !type metadata that participate in
46// the link. It computes a resolution for each virtual call and stores it in
47// the type identifier summary. Only single implementation devirtualization
48// is supported.
49// - Import phase: (same as with hybrid case above).
50//
51//===----------------------------------------------------------------------===//
52
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/MapVector.h"
60#include "llvm/ADT/Statistic.h"
68#include "llvm/IR/Constants.h"
69#include "llvm/IR/DataLayout.h"
70#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/Dominators.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/IRBuilder.h"
77#include "llvm/IR/InstrTypes.h"
78#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Intrinsics.h"
81#include "llvm/IR/LLVMContext.h"
82#include "llvm/IR/MDBuilder.h"
83#include "llvm/IR/Metadata.h"
84#include "llvm/IR/Module.h"
87#include "llvm/Pass.h"
88#include "llvm/PassRegistry.h"
91#include "llvm/Support/Errc.h"
92#include "llvm/Support/Error.h"
97#include "llvm/Transforms/IPO.h"
102#include <algorithm>
103#include <cstddef>
104#include <map>
105#include <set>
106#include <string>
107
108using namespace llvm;
109using namespace wholeprogramdevirt;
110
111#define DEBUG_TYPE "wholeprogramdevirt"
112
113STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
114STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
115STATISTIC(NumBranchFunnel, "Number of branch funnels");
116STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
117STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
118STATISTIC(NumVirtConstProp1Bit,
119 "Number of 1 bit virtual constant propagations");
120STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
121
123 "wholeprogramdevirt-summary-action",
124 cl::desc("What to do with the summary when running this pass"),
125 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
126 clEnumValN(PassSummaryAction::Import, "import",
127 "Import typeid resolutions from summary and globals"),
128 clEnumValN(PassSummaryAction::Export, "export",
129 "Export typeid resolutions to summary and globals")),
130 cl::Hidden);
131
133 "wholeprogramdevirt-read-summary",
134 cl::desc(
135 "Read summary from given bitcode or YAML file before running pass"),
136 cl::Hidden);
137
139 "wholeprogramdevirt-write-summary",
140 cl::desc("Write summary to given bitcode or YAML file after running pass. "
141 "Output file format is deduced from extension: *.bc means writing "
142 "bitcode, otherwise YAML"),
143 cl::Hidden);
144
146 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
147 cl::init(10),
148 cl::desc("Maximum number of call targets per "
149 "call site to enable branch funnels"));
150
151static cl::opt<bool>
152 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
153 cl::desc("Print index-based devirtualization messages"));
154
155/// Provide a way to force enable whole program visibility in tests.
156/// This is needed to support legacy tests that don't contain
157/// !vcall_visibility metadata (the mere presense of type tests
158/// previously implied hidden visibility).
159static cl::opt<bool>
160 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
161 cl::desc("Enable whole program visibility"));
162
163/// Provide a way to force disable whole program for debugging or workarounds,
164/// when enabled via the linker.
166 "disable-whole-program-visibility", cl::Hidden,
167 cl::desc("Disable whole program visibility (overrides enabling options)"));
168
169/// Provide way to prevent certain function from being devirtualized
171 SkipFunctionNames("wholeprogramdevirt-skip",
172 cl::desc("Prevent function(s) from being devirtualized"),
174
175/// Mechanism to add runtime checking of devirtualization decisions, optionally
176/// trapping or falling back to indirect call on any that are not correct.
177/// Trapping mode is useful for debugging undefined behavior leading to failures
178/// with WPD. Fallback mode is useful for ensuring safety when whole program
179/// visibility may be compromised.
182 "wholeprogramdevirt-check", cl::Hidden,
183 cl::desc("Type of checking for incorrect devirtualizations"),
184 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
185 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
187 "Fallback to indirect when incorrect")));
188
189namespace {
190struct PatternList {
191 std::vector<GlobPattern> Patterns;
192 template <class T> void init(const T &StringList) {
193 for (const auto &S : StringList)
195 Patterns.push_back(std::move(*Pat));
196 }
197 bool match(StringRef S) {
198 for (const GlobPattern &P : Patterns)
199 if (P.match(S))
200 return true;
201 return false;
202 }
203};
204} // namespace
205
206// Find the minimum offset that we may store a value of size Size bits at. If
207// IsAfter is set, look for an offset before the object, otherwise look for an
208// offset after the object.
211 bool IsAfter, uint64_t Size) {
212 // Find a minimum offset taking into account only vtable sizes.
213 uint64_t MinByte = 0;
214 for (const VirtualCallTarget &Target : Targets) {
215 if (IsAfter)
216 MinByte = std::max(MinByte, Target.minAfterBytes());
217 else
218 MinByte = std::max(MinByte, Target.minBeforeBytes());
219 }
220
221 // Build a vector of arrays of bytes covering, for each target, a slice of the
222 // used region (see AccumBitVector::BytesUsed in
223 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
224 // this aligns the used regions to start at MinByte.
225 //
226 // In this example, A, B and C are vtables, # is a byte already allocated for
227 // a virtual function pointer, AAAA... (etc.) are the used regions for the
228 // vtables and Offset(X) is the value computed for the Offset variable below
229 // for X.
230 //
231 // Offset(A)
232 // | |
233 // |MinByte
234 // A: ################AAAAAAAA|AAAAAAAA
235 // B: ########BBBBBBBBBBBBBBBB|BBBB
236 // C: ########################|CCCCCCCCCCCCCCCC
237 // | Offset(B) |
238 //
239 // This code produces the slices of A, B and C that appear after the divider
240 // at MinByte.
241 std::vector<ArrayRef<uint8_t>> Used;
242 for (const VirtualCallTarget &Target : Targets) {
243 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
244 : Target.TM->Bits->Before.BytesUsed;
245 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
246 : MinByte - Target.minBeforeBytes();
247
248 // Disregard used regions that are smaller than Offset. These are
249 // effectively all-free regions that do not need to be checked.
250 if (VTUsed.size() > Offset)
251 Used.push_back(VTUsed.slice(Offset));
252 }
253
254 if (Size == 1) {
255 // Find a free bit in each member of Used.
256 for (unsigned I = 0;; ++I) {
257 uint8_t BitsUsed = 0;
258 for (auto &&B : Used)
259 if (I < B.size())
260 BitsUsed |= B[I];
261 if (BitsUsed != 0xff)
262 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
263 }
264 } else {
265 // Find a free (Size/8) byte region in each member of Used.
266 // FIXME: see if alignment helps.
267 for (unsigned I = 0;; ++I) {
268 for (auto &&B : Used) {
269 unsigned Byte = 0;
270 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
271 if (B[I + Byte])
272 goto NextI;
273 ++Byte;
274 }
275 }
276 return (MinByte + I) * 8;
277 NextI:;
278 }
279 }
280}
281
284 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
285 if (BitWidth == 1)
286 OffsetByte = -(AllocBefore / 8 + 1);
287 else
288 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
289 OffsetBit = AllocBefore % 8;
290
292 if (BitWidth == 1)
293 Target.setBeforeBit(AllocBefore);
294 else
295 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
296 }
297}
298
301 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
302 if (BitWidth == 1)
303 OffsetByte = AllocAfter / 8;
304 else
305 OffsetByte = (AllocAfter + 7) / 8;
306 OffsetBit = AllocAfter % 8;
307
309 if (BitWidth == 1)
310 Target.setAfterBit(AllocAfter);
311 else
312 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
313 }
314}
315
317 : Fn(Fn), TM(TM),
318 IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()),
319 WasDevirt(false) {}
320
321namespace {
322
323// A slot in a set of virtual tables. The TypeID identifies the set of virtual
324// tables, and the ByteOffset is the offset in bytes from the address point to
325// the virtual function pointer.
326struct VTableSlot {
328 uint64_t ByteOffset;
329};
330
331} // end anonymous namespace
332
333namespace llvm {
334
335template <> struct DenseMapInfo<VTableSlot> {
336 static VTableSlot getEmptyKey() {
339 }
340 static VTableSlot getTombstoneKey() {
343 }
344 static unsigned getHashValue(const VTableSlot &I) {
347 }
348 static bool isEqual(const VTableSlot &LHS,
349 const VTableSlot &RHS) {
350 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
351 }
352};
353
354template <> struct DenseMapInfo<VTableSlotSummary> {
358 }
362 }
363 static unsigned getHashValue(const VTableSlotSummary &I) {
366 }
367 static bool isEqual(const VTableSlotSummary &LHS,
368 const VTableSlotSummary &RHS) {
369 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
370 }
371};
372
373} // end namespace llvm
374
375namespace {
376
377// Returns true if the function must be unreachable based on ValueInfo.
378//
379// In particular, identifies a function as unreachable in the following
380// conditions
381// 1) All summaries are live.
382// 2) All function summaries indicate it's unreachable
383// 3) There is no non-function with the same GUID (which is rare)
385 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
386 // Returns false if ValueInfo is absent, or the summary list is empty
387 // (e.g., function declarations).
388 return false;
389 }
390
391 for (const auto &Summary : TheFnVI.getSummaryList()) {
392 // Conservatively returns false if any non-live functions are seen.
393 // In general either all summaries should be live or all should be dead.
394 if (!Summary->isLive())
395 return false;
396 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
397 if (!FS->fflags().MustBeUnreachable)
398 return false;
399 }
400 // Be conservative if a non-function has the same GUID (which is rare).
401 else
402 return false;
403 }
404 // All function summaries are live and all of them agree that the function is
405 // unreachble.
406 return true;
407}
408
409// A virtual call site. VTable is the loaded virtual table pointer, and CS is
410// the indirect virtual call.
411struct VirtualCallSite {
412 Value *VTable = nullptr;
413 CallBase &CB;
414
415 // If non-null, this field points to the associated unsafe use count stored in
416 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
417 // of that field for details.
418 unsigned *NumUnsafeUses = nullptr;
419
420 void
421 emitRemark(const StringRef OptName, const StringRef TargetName,
423 Function *F = CB.getCaller();
424 DebugLoc DLoc = CB.getDebugLoc();
425 BasicBlock *Block = CB.getParent();
426
427 using namespace ore;
428 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
429 << NV("Optimization", OptName)
430 << ": devirtualized a call to "
431 << NV("FunctionName", TargetName));
432 }
433
434 void replaceAndErase(
435 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
437 Value *New) {
438 if (RemarksEnabled)
439 emitRemark(OptName, TargetName, OREGetter);
440 CB.replaceAllUsesWith(New);
441 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
442 BranchInst::Create(II->getNormalDest(), &CB);
443 II->getUnwindDest()->removePredecessor(II->getParent());
444 }
445 CB.eraseFromParent();
446 // This use is no longer unsafe.
447 if (NumUnsafeUses)
448 --*NumUnsafeUses;
449 }
450};
451
452// Call site information collected for a specific VTableSlot and possibly a list
453// of constant integer arguments. The grouping by arguments is handled by the
454// VTableSlotInfo class.
455struct CallSiteInfo {
456 /// The set of call sites for this slot. Used during regular LTO and the
457 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
458 /// call sites that appear in the merged module itself); in each of these
459 /// cases we are directly operating on the call sites at the IR level.
460 std::vector<VirtualCallSite> CallSites;
461
462 /// Whether all call sites represented by this CallSiteInfo, including those
463 /// in summaries, have been devirtualized. This starts off as true because a
464 /// default constructed CallSiteInfo represents no call sites.
465 bool AllCallSitesDevirted = true;
466
467 // These fields are used during the export phase of ThinLTO and reflect
468 // information collected from function summaries.
469
470 /// Whether any function summary contains an llvm.assume(llvm.type.test) for
471 /// this slot.
472 bool SummaryHasTypeTestAssumeUsers = false;
473
474 /// CFI-specific: a vector containing the list of function summaries that use
475 /// the llvm.type.checked.load intrinsic and therefore will require
476 /// resolutions for llvm.type.test in order to implement CFI checks if
477 /// devirtualization was unsuccessful. If devirtualization was successful, the
478 /// pass will clear this vector by calling markDevirt(). If at the end of the
479 /// pass the vector is non-empty, we will need to add a use of llvm.type.test
480 /// to each of the function summaries in the vector.
481 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
482 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
483
484 bool isExported() const {
485 return SummaryHasTypeTestAssumeUsers ||
486 !SummaryTypeCheckedLoadUsers.empty();
487 }
488
489 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
490 SummaryTypeCheckedLoadUsers.push_back(FS);
491 AllCallSitesDevirted = false;
492 }
493
494 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
495 SummaryTypeTestAssumeUsers.push_back(FS);
496 SummaryHasTypeTestAssumeUsers = true;
497 AllCallSitesDevirted = false;
498 }
499
500 void markDevirt() {
501 AllCallSitesDevirted = true;
502
503 // As explained in the comment for SummaryTypeCheckedLoadUsers.
504 SummaryTypeCheckedLoadUsers.clear();
505 }
506};
507
508// Call site information collected for a specific VTableSlot.
509struct VTableSlotInfo {
510 // The set of call sites which do not have all constant integer arguments
511 // (excluding "this").
512 CallSiteInfo CSInfo;
513
514 // The set of call sites with all constant integer arguments (excluding
515 // "this"), grouped by argument list.
516 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
517
518 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
519
520private:
521 CallSiteInfo &findCallSiteInfo(CallBase &CB);
522};
523
524CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
525 std::vector<uint64_t> Args;
526 auto *CBType = dyn_cast<IntegerType>(CB.getType());
527 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
528 return CSInfo;
529 for (auto &&Arg : drop_begin(CB.args())) {
530 auto *CI = dyn_cast<ConstantInt>(Arg);
531 if (!CI || CI->getBitWidth() > 64)
532 return CSInfo;
533 Args.push_back(CI->getZExtValue());
534 }
535 return ConstCSInfo[Args];
536}
537
538void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
539 unsigned *NumUnsafeUses) {
540 auto &CSI = findCallSiteInfo(CB);
541 CSI.AllCallSitesDevirted = false;
542 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
543}
544
545struct DevirtModule {
546 Module &M;
547 function_ref<AAResults &(Function &)> AARGetter;
548 function_ref<DominatorTree &(Function &)> LookupDomTree;
549
550 ModuleSummaryIndex *ExportSummary;
551 const ModuleSummaryIndex *ImportSummary;
552
553 IntegerType *Int8Ty;
554 PointerType *Int8PtrTy;
556 IntegerType *Int64Ty;
557 IntegerType *IntPtrTy;
558 /// Sizeless array type, used for imported vtables. This provides a signal
559 /// to analyzers that these imports may alias, as they do for example
560 /// when multiple unique return values occur in the same vtable.
561 ArrayType *Int8Arr0Ty;
562
563 bool RemarksEnabled;
565
567
568 // Calls that have already been optimized. We may add a call to multiple
569 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
570 // optimize a call more than once.
571 SmallPtrSet<CallBase *, 8> OptimizedCalls;
572
573 // This map keeps track of the number of "unsafe" uses of a loaded function
574 // pointer. The key is the associated llvm.type.test intrinsic call generated
575 // by this pass. An unsafe use is one that calls the loaded function pointer
576 // directly. Every time we eliminate an unsafe use (for example, by
577 // devirtualizing it or by applying virtual constant propagation), we
578 // decrement the value stored in this map. If a value reaches zero, we can
579 // eliminate the type check by RAUWing the associated llvm.type.test call with
580 // true.
581 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
582 PatternList FunctionsToSkip;
583
584 DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
586 function_ref<DominatorTree &(Function &)> LookupDomTree,
587 ModuleSummaryIndex *ExportSummary,
588 const ModuleSummaryIndex *ImportSummary)
589 : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
590 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
591 Int8Ty(Type::getInt8Ty(M.getContext())),
592 Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
593 Int32Ty(Type::getInt32Ty(M.getContext())),
594 Int64Ty(Type::getInt64Ty(M.getContext())),
595 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
596 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
597 RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
598 assert(!(ExportSummary && ImportSummary));
599 FunctionsToSkip.init(SkipFunctionNames);
600 }
601
602 bool areRemarksEnabled();
603
604 void
605 scanTypeTestUsers(Function *TypeTestFunc,
606 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
607 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
608
609 void buildTypeIdentifierMap(
610 std::vector<VTableBits> &Bits,
611 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
612
613 bool
614 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
615 const std::set<TypeMemberInfo> &TypeMemberInfos,
616 uint64_t ByteOffset,
617 ModuleSummaryIndex *ExportSummary);
618
619 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
620 bool &IsExported);
621 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
623 VTableSlotInfo &SlotInfo,
625
626 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
627 bool &IsExported);
628 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
629 VTableSlotInfo &SlotInfo,
630 WholeProgramDevirtResolution *Res, VTableSlot Slot);
631
632 bool tryEvaluateFunctionsWithArgs(
634 ArrayRef<uint64_t> Args);
635
636 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
637 uint64_t TheRetVal);
638 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
639 CallSiteInfo &CSInfo,
641
642 // Returns the global symbol name that is used to export information about the
643 // given vtable slot and list of arguments.
644 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
646
647 bool shouldExportConstantsAsAbsoluteSymbols();
648
649 // This function is called during the export phase to create a symbol
650 // definition containing information about the given vtable slot and list of
651 // arguments.
652 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
653 Constant *C);
654 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
655 uint32_t Const, uint32_t &Storage);
656
657 // This function is called during the import phase to create a reference to
658 // the symbol definition created during the export phase.
659 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
661 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
662 StringRef Name, IntegerType *IntTy,
663 uint32_t Storage);
664
665 Constant *getMemberAddr(const TypeMemberInfo *M);
666
667 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
668 Constant *UniqueMemberAddr);
669 bool tryUniqueRetValOpt(unsigned BitWidth,
671 CallSiteInfo &CSInfo,
673 VTableSlot Slot, ArrayRef<uint64_t> Args);
674
675 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
676 Constant *Byte, Constant *Bit);
677 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
678 VTableSlotInfo &SlotInfo,
679 WholeProgramDevirtResolution *Res, VTableSlot Slot);
680
681 void rebuildGlobal(VTableBits &B);
682
683 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
684 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
685
686 // If we were able to eliminate all unsafe uses for a type checked load,
687 // eliminate the associated type tests by replacing them with true.
688 void removeRedundantTypeTests();
689
690 bool run();
691
692 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
693 //
694 // Caller guarantees that `ExportSummary` is not nullptr.
695 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
696 ModuleSummaryIndex *ExportSummary);
697
698 // Returns true if the function definition must be unreachable.
699 //
700 // Note if this helper function returns true, `F` is guaranteed
701 // to be unreachable; if it returns false, `F` might still
702 // be unreachable but not covered by this helper function.
703 //
704 // Implementation-wise, if function definition is present, IR is analyzed; if
705 // not, look up function flags from ExportSummary as a fallback.
706 static bool mustBeUnreachableFunction(Function *const F,
707 ModuleSummaryIndex *ExportSummary);
708
709 // Lower the module using the action and summary passed as command line
710 // arguments. For testing purposes only.
711 static bool
712 runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
714 function_ref<DominatorTree &(Function &)> LookupDomTree);
715};
716
717struct DevirtIndex {
718 ModuleSummaryIndex &ExportSummary;
719 // The set in which to record GUIDs exported from their module by
720 // devirtualization, used by client to ensure they are not internalized.
721 std::set<GlobalValue::GUID> &ExportedGUIDs;
722 // A map in which to record the information necessary to locate the WPD
723 // resolution for local targets in case they are exported by cross module
724 // importing.
725 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
726
728
729 PatternList FunctionsToSkip;
730
731 DevirtIndex(
732 ModuleSummaryIndex &ExportSummary,
733 std::set<GlobalValue::GUID> &ExportedGUIDs,
734 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
735 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
736 LocalWPDTargetsMap(LocalWPDTargetsMap) {
737 FunctionsToSkip.init(SkipFunctionNames);
738 }
739
740 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
741 const TypeIdCompatibleVtableInfo TIdInfo,
742 uint64_t ByteOffset);
743
744 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
745 VTableSlotSummary &SlotSummary,
746 VTableSlotInfo &SlotInfo,
748 std::set<ValueInfo> &DevirtTargets);
749
750 void run();
751};
752} // end anonymous namespace
753
756 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
757 auto AARGetter = [&](Function &F) -> AAResults & {
758 return FAM.getResult<AAManager>(F);
759 };
760 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
762 };
763 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
765 };
766 if (UseCommandLine) {
767 if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
768 return PreservedAnalyses::all();
770 }
771 if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
772 ImportSummary)
773 .run())
774 return PreservedAnalyses::all();
776}
777
778namespace llvm {
779// Enable whole program visibility if enabled by client (e.g. linker) or
780// internal option, and not force disabled.
781bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
782 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
784}
785
786/// If whole program visibility asserted, then upgrade all public vcall
787/// visibility metadata on vtable definitions to linkage unit visibility in
788/// Module IR (for regular or hybrid LTO).
790 Module &M, bool WholeProgramVisibilityEnabledInLTO,
791 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
792 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
793 return;
794 for (GlobalVariable &GV : M.globals()) {
795 // Add linkage unit visibility to any variable with type metadata, which are
796 // the vtable definitions. We won't have an existing vcall_visibility
797 // metadata on vtable definitions with public visibility.
798 if (GV.hasMetadata(LLVMContext::MD_type) &&
799 GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic &&
800 // Don't upgrade the visibility for symbols exported to the dynamic
801 // linker, as we have no information on their eventual use.
802 !DynamicExportSymbols.count(GV.getGUID()))
803 GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
804 }
805}
806
808 bool WholeProgramVisibilityEnabledInLTO) {
809 Function *PublicTypeTestFunc =
810 M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
811 if (!PublicTypeTestFunc)
812 return;
813 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
814 Function *TypeTestFunc =
815 Intrinsic::getDeclaration(&M, Intrinsic::type_test);
816 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
817 auto *CI = cast<CallInst>(U.getUser());
818 auto *NewCI = CallInst::Create(
819 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)},
820 std::nullopt, "", CI);
821 CI->replaceAllUsesWith(NewCI);
822 CI->eraseFromParent();
823 }
824 } else {
825 auto *True = ConstantInt::getTrue(M.getContext());
826 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
827 auto *CI = cast<CallInst>(U.getUser());
828 CI->replaceAllUsesWith(True);
829 CI->eraseFromParent();
830 }
831 }
832}
833
834/// If whole program visibility asserted, then upgrade all public vcall
835/// visibility metadata on vtable definition summaries to linkage unit
836/// visibility in Module summary index (for ThinLTO).
838 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
839 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
840 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
841 return;
842 for (auto &P : Index) {
843 // Don't upgrade the visibility for symbols exported to the dynamic
844 // linker, as we have no information on their eventual use.
845 if (DynamicExportSymbols.count(P.first))
846 continue;
847 for (auto &S : P.second.SummaryList) {
848 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
849 if (!GVar ||
850 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
851 continue;
852 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
853 }
854 }
855}
856
858 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
859 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
860 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
861}
862
864 ModuleSummaryIndex &Summary,
865 function_ref<bool(StringRef, ValueInfo)> isExported,
866 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
867 for (auto &T : LocalWPDTargetsMap) {
868 auto &VI = T.first;
869 // This was enforced earlier during trySingleImplDevirt.
870 assert(VI.getSummaryList().size() == 1 &&
871 "Devirt of local target has more than one copy");
872 auto &S = VI.getSummaryList()[0];
873 if (!isExported(S->modulePath(), VI))
874 continue;
875
876 // It's been exported by a cross module import.
877 for (auto &SlotSummary : T.second) {
878 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
879 assert(TIdSum);
880 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
881 assert(WPDRes != TIdSum->WPDRes.end());
882 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
883 WPDRes->second.SingleImplName,
884 Summary.getModuleHash(S->modulePath()));
885 }
886 }
887}
888
889} // end namespace llvm
890
892 // Check that summary index contains regular LTO module when performing
893 // export to prevent occasional use of index from pure ThinLTO compilation
894 // (-fno-split-lto-module). This kind of summary index is passed to
895 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
896 const auto &ModPaths = Summary->modulePaths();
899 return createStringError(
901 "combined summary should contain Regular LTO module");
902 return ErrorSuccess();
903}
904
905bool DevirtModule::runForTesting(
906 Module &M, function_ref<AAResults &(Function &)> AARGetter,
908 function_ref<DominatorTree &(Function &)> LookupDomTree) {
909 std::unique_ptr<ModuleSummaryIndex> Summary =
910 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
911
912 // Handle the command-line summary arguments. This code is for testing
913 // purposes only, so we handle errors directly.
914 if (!ClReadSummary.empty()) {
915 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
916 ": ");
917 auto ReadSummaryFile =
919 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
920 getModuleSummaryIndex(*ReadSummaryFile)) {
921 Summary = std::move(*SummaryOrErr);
922 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
923 } else {
924 // Try YAML if we've failed with bitcode.
925 consumeError(SummaryOrErr.takeError());
926 yaml::Input In(ReadSummaryFile->getBuffer());
927 In >> *Summary;
928 ExitOnErr(errorCodeToError(In.error()));
929 }
930 }
931
932 bool Changed =
933 DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
935 : nullptr,
937 : nullptr)
938 .run();
939
940 if (!ClWriteSummary.empty()) {
941 ExitOnError ExitOnErr(
942 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
943 std::error_code EC;
944 if (StringRef(ClWriteSummary).endswith(".bc")) {
946 ExitOnErr(errorCodeToError(EC));
947 writeIndexToFile(*Summary, OS);
948 } else {
950 ExitOnErr(errorCodeToError(EC));
951 yaml::Output Out(OS);
952 Out << *Summary;
953 }
954 }
955
956 return Changed;
957}
958
959void DevirtModule::buildTypeIdentifierMap(
960 std::vector<VTableBits> &Bits,
961 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
963 Bits.reserve(M.global_size());
965 for (GlobalVariable &GV : M.globals()) {
966 Types.clear();
967 GV.getMetadata(LLVMContext::MD_type, Types);
968 if (GV.isDeclaration() || Types.empty())
969 continue;
970
971 VTableBits *&BitsPtr = GVToBits[&GV];
972 if (!BitsPtr) {
973 Bits.emplace_back();
974 Bits.back().GV = &GV;
975 Bits.back().ObjectSize =
976 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
977 BitsPtr = &Bits.back();
978 }
979
980 for (MDNode *Type : Types) {
981 auto TypeID = Type->getOperand(1).get();
982
984 cast<ConstantInt>(
985 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
986 ->getZExtValue();
987
988 TypeIdMap[TypeID].insert({BitsPtr, Offset});
989 }
990 }
991}
992
993bool DevirtModule::tryFindVirtualCallTargets(
994 std::vector<VirtualCallTarget> &TargetsForSlot,
995 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
996 ModuleSummaryIndex *ExportSummary) {
997 for (const TypeMemberInfo &TM : TypeMemberInfos) {
998 if (!TM.Bits->GV->isConstant())
999 return false;
1000
1001 // We cannot perform whole program devirtualization analysis on a vtable
1002 // with public LTO visibility.
1003 if (TM.Bits->GV->getVCallVisibility() ==
1005 return false;
1006
1007 Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
1008 TM.Offset + ByteOffset, M, TM.Bits->GV);
1009 if (!Ptr)
1010 return false;
1011
1012 auto C = Ptr->stripPointerCasts();
1013 // Make sure this is a function or alias to a function.
1014 auto Fn = dyn_cast<Function>(C);
1015 auto A = dyn_cast<GlobalAlias>(C);
1016 if (!Fn && A)
1017 Fn = dyn_cast<Function>(A->getAliasee());
1018
1019 if (!Fn)
1020 return false;
1021
1022 if (FunctionsToSkip.match(Fn->getName()))
1023 return false;
1024
1025 // We can disregard __cxa_pure_virtual as a possible call target, as
1026 // calls to pure virtuals are UB.
1027 if (Fn->getName() == "__cxa_pure_virtual")
1028 continue;
1029
1030 // We can disregard unreachable functions as possible call targets, as
1031 // unreachable functions shouldn't be called.
1032 if (mustBeUnreachableFunction(Fn, ExportSummary))
1033 continue;
1034
1035 // Save the symbol used in the vtable to use as the devirtualization
1036 // target.
1037 auto GV = dyn_cast<GlobalValue>(C);
1038 assert(GV);
1039 TargetsForSlot.push_back({GV, &TM});
1040 }
1041
1042 // Give up if we couldn't find any targets.
1043 return !TargetsForSlot.empty();
1044}
1045
1046bool DevirtIndex::tryFindVirtualCallTargets(
1047 std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
1048 uint64_t ByteOffset) {
1049 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1050 // Find a representative copy of the vtable initializer.
1051 // We can have multiple available_externally, linkonce_odr and weak_odr
1052 // vtable initializers. We can also have multiple external vtable
1053 // initializers in the case of comdats, which we cannot check here.
1054 // The linker should give an error in this case.
1055 //
1056 // Also, handle the case of same-named local Vtables with the same path
1057 // and therefore the same GUID. This can happen if there isn't enough
1058 // distinguishing path when compiling the source file. In that case we
1059 // conservatively return false early.
1060 const GlobalVarSummary *VS = nullptr;
1061 bool LocalFound = false;
1062 for (const auto &S : P.VTableVI.getSummaryList()) {
1063 if (GlobalValue::isLocalLinkage(S->linkage())) {
1064 if (LocalFound)
1065 return false;
1066 LocalFound = true;
1067 }
1068 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1069 if (!CurVS->vTableFuncs().empty() ||
1070 // Previously clang did not attach the necessary type metadata to
1071 // available_externally vtables, in which case there would not
1072 // be any vtable functions listed in the summary and we need
1073 // to treat this case conservatively (in case the bitcode is old).
1074 // However, we will also not have any vtable functions in the
1075 // case of a pure virtual base class. In that case we do want
1076 // to set VS to avoid treating it conservatively.
1078 VS = CurVS;
1079 // We cannot perform whole program devirtualization analysis on a vtable
1080 // with public LTO visibility.
1081 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1082 return false;
1083 }
1084 }
1085 // There will be no VS if all copies are available_externally having no
1086 // type metadata. In that case we can't safely perform WPD.
1087 if (!VS)
1088 return false;
1089 if (!VS->isLive())
1090 continue;
1091 for (auto VTP : VS->vTableFuncs()) {
1092 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1093 continue;
1094
1095 if (mustBeUnreachableFunction(VTP.FuncVI))
1096 continue;
1097
1098 TargetsForSlot.push_back(VTP.FuncVI);
1099 }
1100 }
1101
1102 // Give up if we couldn't find any targets.
1103 return !TargetsForSlot.empty();
1104}
1105
1106void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1107 Constant *TheFn, bool &IsExported) {
1108 // Don't devirtualize function if we're told to skip it
1109 // in -wholeprogramdevirt-skip.
1110 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1111 return;
1112 auto Apply = [&](CallSiteInfo &CSInfo) {
1113 for (auto &&VCallSite : CSInfo.CallSites) {
1114 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1115 continue;
1116
1117 if (RemarksEnabled)
1118 VCallSite.emitRemark("single-impl",
1119 TheFn->stripPointerCasts()->getName(), OREGetter);
1120 NumSingleImpl++;
1121 auto &CB = VCallSite.CB;
1122 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1123 IRBuilder<> Builder(&CB);
1124 Value *Callee =
1125 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1126
1127 // If trap checking is enabled, add support to compare the virtual
1128 // function pointer to the devirtualized target. In case of a mismatch,
1129 // perform a debug trap.
1130 if (DevirtCheckMode == WPDCheckMode::Trap) {
1131 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1132 Instruction *ThenTerm =
1133 SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
1134 Builder.SetInsertPoint(ThenTerm);
1135 Function *TrapFn = Intrinsic::getDeclaration(&M, Intrinsic::debugtrap);
1136 auto *CallTrap = Builder.CreateCall(TrapFn);
1137 CallTrap->setDebugLoc(CB.getDebugLoc());
1138 }
1139
1140 // If fallback checking is enabled, add support to compare the virtual
1141 // function pointer to the devirtualized target. In case of a mismatch,
1142 // fall back to indirect call.
1143 if (DevirtCheckMode == WPDCheckMode::Fallback) {
1144 MDNode *Weights =
1145 MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1);
1146 // Version the indirect call site. If the called value is equal to the
1147 // given callee, 'NewInst' will be executed, otherwise the original call
1148 // site will be executed.
1149 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1150 NewInst.setCalledOperand(Callee);
1151 // Since the new call site is direct, we must clear metadata that
1152 // is only appropriate for indirect calls. This includes !prof and
1153 // !callees metadata.
1154 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1155 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1156 // Additionally, we should remove them from the fallback indirect call,
1157 // so that we don't attempt to perform indirect call promotion later.
1158 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1159 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1160 }
1161
1162 // In either trapping or non-checking mode, devirtualize original call.
1163 else {
1164 // Devirtualize unconditionally.
1166 // Since the call site is now direct, we must clear metadata that
1167 // is only appropriate for indirect calls. This includes !prof and
1168 // !callees metadata.
1169 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1170 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1171 }
1172
1173 // This use is no longer unsafe.
1174 if (VCallSite.NumUnsafeUses)
1175 --*VCallSite.NumUnsafeUses;
1176 }
1177 if (CSInfo.isExported())
1178 IsExported = true;
1179 CSInfo.markDevirt();
1180 };
1181 Apply(SlotInfo.CSInfo);
1182 for (auto &P : SlotInfo.ConstCSInfo)
1183 Apply(P.second);
1184}
1185
1186static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1187 // We can't add calls if we haven't seen a definition
1188 if (Callee.getSummaryList().empty())
1189 return false;
1190
1191 // Insert calls into the summary index so that the devirtualized targets
1192 // are eligible for import.
1193 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1194 // to better ensure we have the opportunity to inline them.
1195 bool IsExported = false;
1196 auto &S = Callee.getSummaryList()[0];
1197 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
1198 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1199 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1200 FS->addCall({Callee, CI});
1201 IsExported |= S->modulePath() != FS->modulePath();
1202 }
1203 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1204 FS->addCall({Callee, CI});
1205 IsExported |= S->modulePath() != FS->modulePath();
1206 }
1207 };
1208 AddCalls(SlotInfo.CSInfo);
1209 for (auto &P : SlotInfo.ConstCSInfo)
1210 AddCalls(P.second);
1211 return IsExported;
1212}
1213
1214bool DevirtModule::trySingleImplDevirt(
1215 ModuleSummaryIndex *ExportSummary,
1216 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1218 // See if the program contains a single implementation of this virtual
1219 // function.
1220 auto *TheFn = TargetsForSlot[0].Fn;
1221 for (auto &&Target : TargetsForSlot)
1222 if (TheFn != Target.Fn)
1223 return false;
1224
1225 // If so, update each call site to call that implementation directly.
1226 if (RemarksEnabled || AreStatisticsEnabled())
1227 TargetsForSlot[0].WasDevirt = true;
1228
1229 bool IsExported = false;
1230 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1231 if (!IsExported)
1232 return false;
1233
1234 // If the only implementation has local linkage, we must promote to external
1235 // to make it visible to thin LTO objects. We can only get here during the
1236 // ThinLTO export phase.
1237 if (TheFn->hasLocalLinkage()) {
1238 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1239
1240 // Since we are renaming the function, any comdats with the same name must
1241 // also be renamed. This is required when targeting COFF, as the comdat name
1242 // must match one of the names of the symbols in the comdat.
1243 if (Comdat *C = TheFn->getComdat()) {
1244 if (C->getName() == TheFn->getName()) {
1245 Comdat *NewC = M.getOrInsertComdat(NewName);
1246 NewC->setSelectionKind(C->getSelectionKind());
1247 for (GlobalObject &GO : M.global_objects())
1248 if (GO.getComdat() == C)
1249 GO.setComdat(NewC);
1250 }
1251 }
1252
1253 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1254 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1255 TheFn->setName(NewName);
1256 }
1257 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1258 // Any needed promotion of 'TheFn' has already been done during
1259 // LTO unit split, so we can ignore return value of AddCalls.
1260 AddCalls(SlotInfo, TheFnVI);
1261
1263 Res->SingleImplName = std::string(TheFn->getName());
1264
1265 return true;
1266}
1267
1268bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1269 VTableSlotSummary &SlotSummary,
1270 VTableSlotInfo &SlotInfo,
1272 std::set<ValueInfo> &DevirtTargets) {
1273 // See if the program contains a single implementation of this virtual
1274 // function.
1275 auto TheFn = TargetsForSlot[0];
1276 for (auto &&Target : TargetsForSlot)
1277 if (TheFn != Target)
1278 return false;
1279
1280 // Don't devirtualize if we don't have target definition.
1281 auto Size = TheFn.getSummaryList().size();
1282 if (!Size)
1283 return false;
1284
1285 // Don't devirtualize function if we're told to skip it
1286 // in -wholeprogramdevirt-skip.
1287 if (FunctionsToSkip.match(TheFn.name()))
1288 return false;
1289
1290 // If the summary list contains multiple summaries where at least one is
1291 // a local, give up, as we won't know which (possibly promoted) name to use.
1292 for (const auto &S : TheFn.getSummaryList())
1293 if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
1294 return false;
1295
1296 // Collect functions devirtualized at least for one call site for stats.
1298 DevirtTargets.insert(TheFn);
1299
1300 auto &S = TheFn.getSummaryList()[0];
1301 bool IsExported = AddCalls(SlotInfo, TheFn);
1302 if (IsExported)
1303 ExportedGUIDs.insert(TheFn.getGUID());
1304
1305 // Record in summary for use in devirtualization during the ThinLTO import
1306 // step.
1308 if (GlobalValue::isLocalLinkage(S->linkage())) {
1309 if (IsExported)
1310 // If target is a local function and we are exporting it by
1311 // devirtualizing a call in another module, we need to record the
1312 // promoted name.
1314 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1315 else {
1316 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1317 Res->SingleImplName = std::string(TheFn.name());
1318 }
1319 } else
1320 Res->SingleImplName = std::string(TheFn.name());
1321
1322 // Name will be empty if this thin link driven off of serialized combined
1323 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1324 // legacy LTO API anyway.
1325 assert(!Res->SingleImplName.empty());
1326
1327 return true;
1328}
1329
1330void DevirtModule::tryICallBranchFunnel(
1331 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1332 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1333 Triple T(M.getTargetTriple());
1334 if (T.getArch() != Triple::x86_64)
1335 return;
1336
1337 if (TargetsForSlot.size() > ClThreshold)
1338 return;
1339
1340 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1341 if (!HasNonDevirt)
1342 for (auto &P : SlotInfo.ConstCSInfo)
1343 if (!P.second.AllCallSitesDevirted) {
1344 HasNonDevirt = true;
1345 break;
1346 }
1347
1348 if (!HasNonDevirt)
1349 return;
1350
1351 FunctionType *FT =
1352 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1353 Function *JT;
1354 if (isa<MDString>(Slot.TypeID)) {
1356 M.getDataLayout().getProgramAddressSpace(),
1357 getGlobalName(Slot, {}, "branch_funnel"), &M);
1358 JT->setVisibility(GlobalValue::HiddenVisibility);
1359 } else {
1361 M.getDataLayout().getProgramAddressSpace(),
1362 "branch_funnel", &M);
1363 }
1364 JT->addParamAttr(0, Attribute::Nest);
1365
1366 std::vector<Value *> JTArgs;
1367 JTArgs.push_back(JT->arg_begin());
1368 for (auto &T : TargetsForSlot) {
1369 JTArgs.push_back(getMemberAddr(T.TM));
1370 JTArgs.push_back(T.Fn);
1371 }
1372
1373 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1374 Function *Intr =
1375 Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
1376
1377 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1378 CI->setTailCallKind(CallInst::TCK_MustTail);
1379 ReturnInst::Create(M.getContext(), nullptr, BB);
1380
1381 bool IsExported = false;
1382 applyICallBranchFunnel(SlotInfo, JT, IsExported);
1383 if (IsExported)
1385}
1386
1387void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1388 Constant *JT, bool &IsExported) {
1389 auto Apply = [&](CallSiteInfo &CSInfo) {
1390 if (CSInfo.isExported())
1391 IsExported = true;
1392 if (CSInfo.AllCallSitesDevirted)
1393 return;
1394 for (auto &&VCallSite : CSInfo.CallSites) {
1395 CallBase &CB = VCallSite.CB;
1396
1397 // Jump tables are only profitable if the retpoline mitigation is enabled.
1398 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1399 if (!FSAttr.isValid() ||
1400 !FSAttr.getValueAsString().contains("+retpoline"))
1401 continue;
1402
1403 NumBranchFunnel++;
1404 if (RemarksEnabled)
1405 VCallSite.emitRemark("branch-funnel",
1406 JT->stripPointerCasts()->getName(), OREGetter);
1407
1408 // Pass the address of the vtable in the nest register, which is r10 on
1409 // x86_64.
1410 std::vector<Type *> NewArgs;
1411 NewArgs.push_back(Int8PtrTy);
1412 append_range(NewArgs, CB.getFunctionType()->params());
1413 FunctionType *NewFT =
1415 CB.getFunctionType()->isVarArg());
1416 PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
1417
1418 IRBuilder<> IRB(&CB);
1419 std::vector<Value *> Args;
1420 Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
1421 llvm::append_range(Args, CB.args());
1422
1423 CallBase *NewCS = nullptr;
1424 if (isa<CallInst>(CB))
1425 NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
1426 else
1427 NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
1428 cast<InvokeInst>(CB).getNormalDest(),
1429 cast<InvokeInst>(CB).getUnwindDest(), Args);
1430 NewCS->setCallingConv(CB.getCallingConv());
1431
1433 std::vector<AttributeSet> NewArgAttrs;
1434 NewArgAttrs.push_back(AttributeSet::get(
1435 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1436 M.getContext(), Attribute::Nest)}));
1437 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1438 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1439 NewCS->setAttributes(
1440 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1441 Attrs.getRetAttrs(), NewArgAttrs));
1442
1443 CB.replaceAllUsesWith(NewCS);
1444 CB.eraseFromParent();
1445
1446 // This use is no longer unsafe.
1447 if (VCallSite.NumUnsafeUses)
1448 --*VCallSite.NumUnsafeUses;
1449 }
1450 // Don't mark as devirtualized because there may be callers compiled without
1451 // retpoline mitigation, which would mean that they are lowered to
1452 // llvm.type.test and therefore require an llvm.type.test resolution for the
1453 // type identifier.
1454 };
1455 Apply(SlotInfo.CSInfo);
1456 for (auto &P : SlotInfo.ConstCSInfo)
1457 Apply(P.second);
1458}
1459
1460bool DevirtModule::tryEvaluateFunctionsWithArgs(
1462 ArrayRef<uint64_t> Args) {
1463 // Evaluate each function and store the result in each target's RetVal
1464 // field.
1465 for (VirtualCallTarget &Target : TargetsForSlot) {
1466 // TODO: Skip for now if the vtable symbol was an alias to a function,
1467 // need to evaluate whether it would be correct to analyze the aliasee
1468 // function for this optimization.
1469 auto Fn = dyn_cast<Function>(Target.Fn);
1470 if (!Fn)
1471 return false;
1472
1473 if (Fn->arg_size() != Args.size() + 1)
1474 return false;
1475
1476 Evaluator Eval(M.getDataLayout(), nullptr);
1478 EvalArgs.push_back(
1479 Constant::getNullValue(Fn->getFunctionType()->getParamType(0)));
1480 for (unsigned I = 0; I != Args.size(); ++I) {
1481 auto *ArgTy =
1482 dyn_cast<IntegerType>(Fn->getFunctionType()->getParamType(I + 1));
1483 if (!ArgTy)
1484 return false;
1485 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1486 }
1487
1488 Constant *RetVal;
1489 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1490 !isa<ConstantInt>(RetVal))
1491 return false;
1492 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1493 }
1494 return true;
1495}
1496
1497void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1498 uint64_t TheRetVal) {
1499 for (auto Call : CSInfo.CallSites) {
1500 if (!OptimizedCalls.insert(&Call.CB).second)
1501 continue;
1502 NumUniformRetVal++;
1503 Call.replaceAndErase(
1504 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1505 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1506 }
1507 CSInfo.markDevirt();
1508}
1509
1510bool DevirtModule::tryUniformRetValOpt(
1511 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1513 // Uniform return value optimization. If all functions return the same
1514 // constant, replace all calls with that constant.
1515 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1516 for (const VirtualCallTarget &Target : TargetsForSlot)
1517 if (Target.RetVal != TheRetVal)
1518 return false;
1519
1520 if (CSInfo.isExported()) {
1522 Res->Info = TheRetVal;
1523 }
1524
1525 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1526 if (RemarksEnabled || AreStatisticsEnabled())
1527 for (auto &&Target : TargetsForSlot)
1528 Target.WasDevirt = true;
1529 return true;
1530}
1531
1532std::string DevirtModule::getGlobalName(VTableSlot Slot,
1533 ArrayRef<uint64_t> Args,
1534 StringRef Name) {
1535 std::string FullName = "__typeid_";
1536 raw_string_ostream OS(FullName);
1537 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1538 for (uint64_t Arg : Args)
1539 OS << '_' << Arg;
1540 OS << '_' << Name;
1541 return OS.str();
1542}
1543
1544bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1545 Triple T(M.getTargetTriple());
1546 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1547}
1548
1549void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1552 getGlobalName(Slot, Args, Name), C, &M);
1554}
1555
1556void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1557 StringRef Name, uint32_t Const,
1558 uint32_t &Storage) {
1559 if (shouldExportConstantsAsAbsoluteSymbols()) {
1560 exportGlobal(
1561 Slot, Args, Name,
1563 return;
1564 }
1565
1566 Storage = Const;
1567}
1568
1569Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1570 StringRef Name) {
1571 Constant *C =
1572 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1573 auto *GV = dyn_cast<GlobalVariable>(C);
1574 if (GV)
1575 GV->setVisibility(GlobalValue::HiddenVisibility);
1576 return C;
1577}
1578
1579Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1580 StringRef Name, IntegerType *IntTy,
1581 uint32_t Storage) {
1582 if (!shouldExportConstantsAsAbsoluteSymbols())
1583 return ConstantInt::get(IntTy, Storage);
1584
1585 Constant *C = importGlobal(Slot, Args, Name);
1586 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1587 C = ConstantExpr::getPtrToInt(C, IntTy);
1588
1589 // We only need to set metadata if the global is newly created, in which
1590 // case it would not have hidden visibility.
1591 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1592 return C;
1593
1594 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1595 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1596 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1597 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1598 MDNode::get(M.getContext(), {MinC, MaxC}));
1599 };
1600 unsigned AbsWidth = IntTy->getBitWidth();
1601 if (AbsWidth == IntPtrTy->getBitWidth())
1602 SetAbsRange(~0ull, ~0ull); // Full set.
1603 else
1604 SetAbsRange(0, 1ull << AbsWidth);
1605 return C;
1606}
1607
1608void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1609 bool IsOne,
1610 Constant *UniqueMemberAddr) {
1611 for (auto &&Call : CSInfo.CallSites) {
1612 if (!OptimizedCalls.insert(&Call.CB).second)
1613 continue;
1614 IRBuilder<> B(&Call.CB);
1615 Value *Cmp =
1616 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1617 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1618 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1619 NumUniqueRetVal++;
1620 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1621 Cmp);
1622 }
1623 CSInfo.markDevirt();
1624}
1625
1626Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1627 Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy);
1628 return ConstantExpr::getGetElementPtr(Int8Ty, C,
1629 ConstantInt::get(Int64Ty, M->Offset));
1630}
1631
1632bool DevirtModule::tryUniqueRetValOpt(
1633 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1634 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1635 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1636 // IsOne controls whether we look for a 0 or a 1.
1637 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1638 const TypeMemberInfo *UniqueMember = nullptr;
1639 for (const VirtualCallTarget &Target : TargetsForSlot) {
1640 if (Target.RetVal == (IsOne ? 1 : 0)) {
1641 if (UniqueMember)
1642 return false;
1643 UniqueMember = Target.TM;
1644 }
1645 }
1646
1647 // We should have found a unique member or bailed out by now. We already
1648 // checked for a uniform return value in tryUniformRetValOpt.
1649 assert(UniqueMember);
1650
1651 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1652 if (CSInfo.isExported()) {
1654 Res->Info = IsOne;
1655
1656 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1657 }
1658
1659 // Replace each call with the comparison.
1660 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1661 UniqueMemberAddr);
1662
1663 // Update devirtualization statistics for targets.
1664 if (RemarksEnabled || AreStatisticsEnabled())
1665 for (auto &&Target : TargetsForSlot)
1666 Target.WasDevirt = true;
1667
1668 return true;
1669 };
1670
1671 if (BitWidth == 1) {
1672 if (tryUniqueRetValOptFor(true))
1673 return true;
1674 if (tryUniqueRetValOptFor(false))
1675 return true;
1676 }
1677 return false;
1678}
1679
1680void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1681 Constant *Byte, Constant *Bit) {
1682 for (auto Call : CSInfo.CallSites) {
1683 if (!OptimizedCalls.insert(&Call.CB).second)
1684 continue;
1685 auto *RetType = cast<IntegerType>(Call.CB.getType());
1686 IRBuilder<> B(&Call.CB);
1687 Value *Addr =
1688 B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
1689 if (RetType->getBitWidth() == 1) {
1690 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1691 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1692 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1693 NumVirtConstProp1Bit++;
1694 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1695 OREGetter, IsBitSet);
1696 } else {
1697 Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
1698 Value *Val = B.CreateLoad(RetType, ValAddr);
1699 NumVirtConstProp++;
1700 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1701 OREGetter, Val);
1702 }
1703 }
1704 CSInfo.markDevirt();
1705}
1706
1707bool DevirtModule::tryVirtualConstProp(
1708 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1709 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1710 // TODO: Skip for now if the vtable symbol was an alias to a function,
1711 // need to evaluate whether it would be correct to analyze the aliasee
1712 // function for this optimization.
1713 auto Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1714 if (!Fn)
1715 return false;
1716 // This only works if the function returns an integer.
1717 auto RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1718 if (!RetType)
1719 return false;
1720 unsigned BitWidth = RetType->getBitWidth();
1721 if (BitWidth > 64)
1722 return false;
1723
1724 // Make sure that each function is defined, does not access memory, takes at
1725 // least one argument, does not use its first argument (which we assume is
1726 // 'this'), and has the same return type.
1727 //
1728 // Note that we test whether this copy of the function is readnone, rather
1729 // than testing function attributes, which must hold for any copy of the
1730 // function, even a less optimized version substituted at link time. This is
1731 // sound because the virtual constant propagation optimizations effectively
1732 // inline all implementations of the virtual function into each call site,
1733 // rather than using function attributes to perform local optimization.
1734 for (VirtualCallTarget &Target : TargetsForSlot) {
1735 // TODO: Skip for now if the vtable symbol was an alias to a function,
1736 // need to evaluate whether it would be correct to analyze the aliasee
1737 // function for this optimization.
1738 auto Fn = dyn_cast<Function>(Target.Fn);
1739 if (!Fn)
1740 return false;
1741
1742 if (Fn->isDeclaration() ||
1743 !computeFunctionBodyMemoryAccess(*Fn, AARGetter(*Fn))
1744 .doesNotAccessMemory() ||
1745 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1746 Fn->getReturnType() != RetType)
1747 return false;
1748 }
1749
1750 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1751 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1752 continue;
1753
1754 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1755 if (Res)
1756 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1757
1758 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1759 continue;
1760
1761 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1762 ResByArg, Slot, CSByConstantArg.first))
1763 continue;
1764
1765 // Find an allocation offset in bits in all vtables associated with the
1766 // type.
1767 uint64_t AllocBefore =
1768 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1769 uint64_t AllocAfter =
1770 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1771
1772 // Calculate the total amount of padding needed to store a value at both
1773 // ends of the object.
1774 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1775 for (auto &&Target : TargetsForSlot) {
1776 TotalPaddingBefore += std::max<int64_t>(
1777 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1778 TotalPaddingAfter += std::max<int64_t>(
1779 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1780 }
1781
1782 // If the amount of padding is too large, give up.
1783 // FIXME: do something smarter here.
1784 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1785 continue;
1786
1787 // Calculate the offset to the value as a (possibly negative) byte offset
1788 // and (if applicable) a bit offset, and store the values in the targets.
1789 int64_t OffsetByte;
1790 uint64_t OffsetBit;
1791 if (TotalPaddingBefore <= TotalPaddingAfter)
1792 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1793 OffsetBit);
1794 else
1795 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1796 OffsetBit);
1797
1798 if (RemarksEnabled || AreStatisticsEnabled())
1799 for (auto &&Target : TargetsForSlot)
1800 Target.WasDevirt = true;
1801
1802
1803 if (CSByConstantArg.second.isExported()) {
1805 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
1806 ResByArg->Byte);
1807 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
1808 ResByArg->Bit);
1809 }
1810
1811 // Rewrite each call to a load from OffsetByte/OffsetBit.
1812 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
1813 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
1814 applyVirtualConstProp(CSByConstantArg.second,
1815 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
1816 }
1817 return true;
1818}
1819
1820void DevirtModule::rebuildGlobal(VTableBits &B) {
1821 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
1822 return;
1823
1824 // Align the before byte array to the global's minimum alignment so that we
1825 // don't break any alignment requirements on the global.
1826 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
1827 B.GV->getAlign(), B.GV->getValueType());
1828 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
1829
1830 // Before was stored in reverse order; flip it now.
1831 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
1832 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
1833
1834 // Build an anonymous global containing the before bytes, followed by the
1835 // original initializer, followed by the after bytes.
1836 auto NewInit = ConstantStruct::getAnon(
1837 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
1838 B.GV->getInitializer(),
1839 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
1840 auto NewGV =
1841 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
1842 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
1843 NewGV->setSection(B.GV->getSection());
1844 NewGV->setComdat(B.GV->getComdat());
1845 NewGV->setAlignment(B.GV->getAlign());
1846
1847 // Copy the original vtable's metadata to the anonymous global, adjusting
1848 // offsets as required.
1849 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
1850
1851 // Build an alias named after the original global, pointing at the second
1852 // element (the original initializer).
1853 auto Alias = GlobalAlias::create(
1854 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
1856 NewInit->getType(), NewGV,
1857 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
1858 ConstantInt::get(Int32Ty, 1)}),
1859 &M);
1860 Alias->setVisibility(B.GV->getVisibility());
1861 Alias->takeName(B.GV);
1862
1863 B.GV->replaceAllUsesWith(Alias);
1864 B.GV->eraseFromParent();
1865}
1866
1867bool DevirtModule::areRemarksEnabled() {
1868 const auto &FL = M.getFunctionList();
1869 for (const Function &Fn : FL) {
1870 if (Fn.empty())
1871 continue;
1872 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
1873 return DI.isEnabled();
1874 }
1875 return false;
1876}
1877
1878void DevirtModule::scanTypeTestUsers(
1879 Function *TypeTestFunc,
1880 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1881 // Find all virtual calls via a virtual table pointer %p under an assumption
1882 // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
1883 // points to a member of the type identifier %md. Group calls by (type ID,
1884 // offset) pair (effectively the identity of the virtual function) and store
1885 // to CallSlots.
1886 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
1887 auto *CI = dyn_cast<CallInst>(U.getUser());
1888 if (!CI)
1889 continue;
1890
1891 // Search for virtual calls based on %p and add them to DevirtCalls.
1894 auto &DT = LookupDomTree(*CI->getFunction());
1895 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1896
1897 Metadata *TypeId =
1898 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
1899 // If we found any, add them to CallSlots.
1900 if (!Assumes.empty()) {
1901 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
1902 for (DevirtCallSite Call : DevirtCalls)
1903 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
1904 }
1905
1906 auto RemoveTypeTestAssumes = [&]() {
1907 // We no longer need the assumes or the type test.
1908 for (auto *Assume : Assumes)
1909 Assume->eraseFromParent();
1910 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
1911 // may use the vtable argument later.
1912 if (CI->use_empty())
1913 CI->eraseFromParent();
1914 };
1915
1916 // At this point we could remove all type test assume sequences, as they
1917 // were originally inserted for WPD. However, we can keep these in the
1918 // code stream for later analysis (e.g. to help drive more efficient ICP
1919 // sequences). They will eventually be removed by a second LowerTypeTests
1920 // invocation that cleans them up. In order to do this correctly, the first
1921 // LowerTypeTests invocation needs to know that they have "Unknown" type
1922 // test resolution, so that they aren't treated as Unsat and lowered to
1923 // False, which will break any uses on assumes. Below we remove any type
1924 // test assumes that will not be treated as Unknown by LTT.
1925
1926 // The type test assumes will be treated by LTT as Unsat if the type id is
1927 // not used on a global (in which case it has no entry in the TypeIdMap).
1928 if (!TypeIdMap.count(TypeId))
1929 RemoveTypeTestAssumes();
1930
1931 // For ThinLTO importing, we need to remove the type test assumes if this is
1932 // an MDString type id without a corresponding TypeIdSummary. Any
1933 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
1934 // type test assumes can be kept. If the MDString type id is missing a
1935 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
1936 // exporting phase of WPD from analyzing it), then it would be treated as
1937 // Unsat by LTT and we need to remove its type test assumes here. If not
1938 // used on a vcall we don't need them for later optimization use in any
1939 // case.
1940 else if (ImportSummary && isa<MDString>(TypeId)) {
1941 const TypeIdSummary *TidSummary =
1942 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
1943 if (!TidSummary)
1944 RemoveTypeTestAssumes();
1945 else
1946 // If one was created it should not be Unsat, because if we reached here
1947 // the type id was used on a global.
1949 }
1950 }
1951}
1952
1953void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
1954 Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
1955
1956 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
1957 auto *CI = dyn_cast<CallInst>(U.getUser());
1958 if (!CI)
1959 continue;
1960
1961 Value *Ptr = CI->getArgOperand(0);
1962 Value *Offset = CI->getArgOperand(1);
1963 Value *TypeIdValue = CI->getArgOperand(2);
1964 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
1965
1969 bool HasNonCallUses = false;
1970 auto &DT = LookupDomTree(*CI->getFunction());
1971 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
1972 HasNonCallUses, CI, DT);
1973
1974 // Start by generating "pessimistic" code that explicitly loads the function
1975 // pointer from the vtable and performs the type check. If possible, we will
1976 // eliminate the load and the type check later.
1977
1978 // If possible, only generate the load at the point where it is used.
1979 // This helps avoid unnecessary spills.
1980 IRBuilder<> LoadB(
1981 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
1982 Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
1983 Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
1984 Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
1985
1986 for (Instruction *LoadedPtr : LoadedPtrs) {
1987 LoadedPtr->replaceAllUsesWith(LoadedValue);
1988 LoadedPtr->eraseFromParent();
1989 }
1990
1991 // Likewise for the type test.
1992 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
1993 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
1994
1995 for (Instruction *Pred : Preds) {
1996 Pred->replaceAllUsesWith(TypeTestCall);
1997 Pred->eraseFromParent();
1998 }
1999
2000 // We have already erased any extractvalue instructions that refer to the
2001 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2002 // (although this is unlikely). In that case, explicitly build a pair and
2003 // RAUW it.
2004 if (!CI->use_empty()) {
2005 Value *Pair = PoisonValue::get(CI->getType());
2006 IRBuilder<> B(CI);
2007 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2008 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2009 CI->replaceAllUsesWith(Pair);
2010 }
2011
2012 // The number of unsafe uses is initially the number of uses.
2013 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2014 NumUnsafeUses = DevirtCalls.size();
2015
2016 // If the function pointer has a non-call user, we cannot eliminate the type
2017 // check, as one of those users may eventually call the pointer. Increment
2018 // the unsafe use count to make sure it cannot reach zero.
2019 if (HasNonCallUses)
2020 ++NumUnsafeUses;
2021 for (DevirtCallSite Call : DevirtCalls) {
2022 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2023 &NumUnsafeUses);
2024 }
2025
2026 CI->eraseFromParent();
2027 }
2028}
2029
2030void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2031 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2032 if (!TypeId)
2033 return;
2034 const TypeIdSummary *TidSummary =
2035 ImportSummary->getTypeIdSummary(TypeId->getString());
2036 if (!TidSummary)
2037 return;
2038 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2039 if (ResI == TidSummary->WPDRes.end())
2040 return;
2041 const WholeProgramDevirtResolution &Res = ResI->second;
2042
2044 assert(!Res.SingleImplName.empty());
2045 // The type of the function in the declaration is irrelevant because every
2046 // call site will cast it to the correct type.
2047 Constant *SingleImpl =
2048 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2049 Type::getVoidTy(M.getContext()))
2050 .getCallee());
2051
2052 // This is the import phase so we should not be exporting anything.
2053 bool IsExported = false;
2054 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2055 assert(!IsExported);
2056 }
2057
2058 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2059 auto I = Res.ResByArg.find(CSByConstantArg.first);
2060 if (I == Res.ResByArg.end())
2061 continue;
2062 auto &ResByArg = I->second;
2063 // FIXME: We should figure out what to do about the "function name" argument
2064 // to the apply* functions, as the function names are unavailable during the
2065 // importing phase. For now we just pass the empty string. This does not
2066 // impact correctness because the function names are just used for remarks.
2067 switch (ResByArg.TheKind) {
2069 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2070 break;
2072 Constant *UniqueMemberAddr =
2073 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2074 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2075 UniqueMemberAddr);
2076 break;
2077 }
2079 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2080 Int32Ty, ResByArg.Byte);
2081 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2082 ResByArg.Bit);
2083 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2084 break;
2085 }
2086 default:
2087 break;
2088 }
2089 }
2090
2092 // The type of the function is irrelevant, because it's bitcast at calls
2093 // anyhow.
2094 Constant *JT = cast<Constant>(
2095 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2096 Type::getVoidTy(M.getContext()))
2097 .getCallee());
2098 bool IsExported = false;
2099 applyICallBranchFunnel(SlotInfo, JT, IsExported);
2100 assert(!IsExported);
2101 }
2102}
2103
2104void DevirtModule::removeRedundantTypeTests() {
2105 auto True = ConstantInt::getTrue(M.getContext());
2106 for (auto &&U : NumUnsafeUsesForTypeTest) {
2107 if (U.second == 0) {
2108 U.first->replaceAllUsesWith(True);
2109 U.first->eraseFromParent();
2110 }
2111 }
2112}
2113
2115DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2116 ModuleSummaryIndex *ExportSummary) {
2117 assert((ExportSummary != nullptr) &&
2118 "Caller guarantees ExportSummary is not nullptr");
2119
2120 const auto TheFnGUID = TheFn->getGUID();
2121 const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());
2122 // Look up ValueInfo with the GUID in the current linkage.
2123 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2124 // If no entry is found and GUID is different from GUID computed using
2125 // exported name, look up ValueInfo with the exported name unconditionally.
2126 // This is a fallback.
2127 //
2128 // The reason to have a fallback:
2129 // 1. LTO could enable global value internalization via
2130 // `enable-lto-internalization`.
2131 // 2. The GUID in ExportedSummary is computed using exported name.
2132 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2133 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2134 }
2135 return TheFnVI;
2136}
2137
2138bool DevirtModule::mustBeUnreachableFunction(
2139 Function *const F, ModuleSummaryIndex *ExportSummary) {
2140 // First, learn unreachability by analyzing function IR.
2141 if (!F->isDeclaration()) {
2142 // A function must be unreachable if its entry block ends with an
2143 // 'unreachable'.
2144 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2145 }
2146 // Learn unreachability from ExportSummary if ExportSummary is present.
2147 return ExportSummary &&
2149 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2150}
2151
2152bool DevirtModule::run() {
2153 // If only some of the modules were split, we cannot correctly perform
2154 // this transformation. We already checked for the presense of type tests
2155 // with partially split modules during the thin link, and would have emitted
2156 // an error if any were found, so here we can simply return.
2157 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2158 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2159 return false;
2160
2161 Function *TypeTestFunc =
2162 M.getFunction(Intrinsic::getName(Intrinsic::type_test));
2163 Function *TypeCheckedLoadFunc =
2164 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
2165 Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
2166
2167 // Normally if there are no users of the devirtualization intrinsics in the
2168 // module, this pass has nothing to do. But if we are exporting, we also need
2169 // to handle any users that appear only in the function summaries.
2170 if (!ExportSummary &&
2171 (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
2172 AssumeFunc->use_empty()) &&
2173 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
2174 return false;
2175
2176 // Rebuild type metadata into a map for easy lookup.
2177 std::vector<VTableBits> Bits;
2179 buildTypeIdentifierMap(Bits, TypeIdMap);
2180
2181 if (TypeTestFunc && AssumeFunc)
2182 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2183
2184 if (TypeCheckedLoadFunc)
2185 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2186
2187 if (ImportSummary) {
2188 for (auto &S : CallSlots)
2189 importResolution(S.first, S.second);
2190
2191 removeRedundantTypeTests();
2192
2193 // We have lowered or deleted the type intrinsics, so we will no longer have
2194 // enough information to reason about the liveness of virtual function
2195 // pointers in GlobalDCE.
2196 for (GlobalVariable &GV : M.globals())
2197 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2198
2199 // The rest of the code is only necessary when exporting or during regular
2200 // LTO, so we are done.
2201 return true;
2202 }
2203
2204 if (TypeIdMap.empty())
2205 return true;
2206
2207 // Collect information from summary about which calls to try to devirtualize.
2208 if (ExportSummary) {
2210 for (auto &P : TypeIdMap) {
2211 if (auto *TypeId = dyn_cast<MDString>(P.first))
2212 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
2213 TypeId);
2214 }
2215
2216 for (auto &P : *ExportSummary) {
2217 for (auto &S : P.second.SummaryList) {
2218 auto *FS = dyn_cast<FunctionSummary>(S.get());
2219 if (!FS)
2220 continue;
2221 // FIXME: Only add live functions.
2222 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2223 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2224 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2225 }
2226 }
2227 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2228 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2229 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2230 }
2231 }
2232 for (const FunctionSummary::ConstVCall &VC :
2233 FS->type_test_assume_const_vcalls()) {
2234 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2235 CallSlots[{MD, VC.VFunc.Offset}]
2236 .ConstCSInfo[VC.Args]
2237 .addSummaryTypeTestAssumeUser(FS);
2238 }
2239 }
2240 for (const FunctionSummary::ConstVCall &VC :
2241 FS->type_checked_load_const_vcalls()) {
2242 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2243 CallSlots[{MD, VC.VFunc.Offset}]
2244 .ConstCSInfo[VC.Args]
2245 .addSummaryTypeCheckedLoadUser(FS);
2246 }
2247 }
2248 }
2249 }
2250 }
2251
2252 // For each (type, offset) pair:
2253 bool DidVirtualConstProp = false;
2254 std::map<std::string, GlobalValue *> DevirtTargets;
2255 for (auto &S : CallSlots) {
2256 // Search each of the members of the type identifier for the virtual
2257 // function implementation at offset S.first.ByteOffset, and add to
2258 // TargetsForSlot.
2259 std::vector<VirtualCallTarget> TargetsForSlot;
2260 WholeProgramDevirtResolution *Res = nullptr;
2261 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2262 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2263 TypeMemberInfos.size())
2264 // For any type id used on a global's type metadata, create the type id
2265 // summary resolution regardless of whether we can devirtualize, so that
2266 // lower type tests knows the type id is not Unsat. If it was not used on
2267 // a global's type metadata, the TypeIdMap entry set will be empty, and
2268 // we don't want to create an entry (with the default Unknown type
2269 // resolution), which can prevent detection of the Unsat.
2270 Res = &ExportSummary
2271 ->getOrInsertTypeIdSummary(
2272 cast<MDString>(S.first.TypeID)->getString())
2273 .WPDRes[S.first.ByteOffset];
2274 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2275 S.first.ByteOffset, ExportSummary)) {
2276
2277 if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2278 DidVirtualConstProp |=
2279 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2280
2281 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2282 }
2283
2284 // Collect functions devirtualized at least for one call site for stats.
2285 if (RemarksEnabled || AreStatisticsEnabled())
2286 for (const auto &T : TargetsForSlot)
2287 if (T.WasDevirt)
2288 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2289 }
2290
2291 // CFI-specific: if we are exporting and any llvm.type.checked.load
2292 // intrinsics were *not* devirtualized, we need to add the resulting
2293 // llvm.type.test intrinsics to the function summaries so that the
2294 // LowerTypeTests pass will export them.
2295 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2296 auto GUID =
2297 GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
2298 for (auto *FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
2299 FS->addTypeTest(GUID);
2300 for (auto &CCS : S.second.ConstCSInfo)
2301 for (auto *FS : CCS.second.SummaryTypeCheckedLoadUsers)
2302 FS->addTypeTest(GUID);
2303 }
2304 }
2305
2306 if (RemarksEnabled) {
2307 // Generate remarks for each devirtualized function.
2308 for (const auto &DT : DevirtTargets) {
2309 GlobalValue *GV = DT.second;
2310 auto F = dyn_cast<Function>(GV);
2311 if (!F) {
2312 auto A = dyn_cast<GlobalAlias>(GV);
2313 assert(A && isa<Function>(A->getAliasee()));
2314 F = dyn_cast<Function>(A->getAliasee());
2315 assert(F);
2316 }
2317
2318 using namespace ore;
2319 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2320 << "devirtualized "
2321 << NV("FunctionName", DT.first));
2322 }
2323 }
2324
2325 NumDevirtTargets += DevirtTargets.size();
2326
2327 removeRedundantTypeTests();
2328
2329 // Rebuild each global we touched as part of virtual constant propagation to
2330 // include the before and after bytes.
2331 if (DidVirtualConstProp)
2332 for (VTableBits &B : Bits)
2333 rebuildGlobal(B);
2334
2335 // We have lowered or deleted the type intrinsics, so we will no longer have
2336 // enough information to reason about the liveness of virtual function
2337 // pointers in GlobalDCE.
2338 for (GlobalVariable &GV : M.globals())
2339 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2340
2341 return true;
2342}
2343
2344void DevirtIndex::run() {
2345 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2346 return;
2347
2349 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2350 NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
2351 // Create the type id summary resolution regardlness of whether we can
2352 // devirtualize, so that lower type tests knows the type id is used on
2353 // a global and not Unsat. We do this here rather than in the loop over the
2354 // CallSlots, since that handling will only see type tests that directly
2355 // feed assumes, and we would miss any that aren't currently handled by WPD
2356 // (such as type tests that feed assumes via phis).
2357 ExportSummary.getOrInsertTypeIdSummary(P.first);
2358 }
2359
2360 // Collect information from summary about which calls to try to devirtualize.
2361 for (auto &P : ExportSummary) {
2362 for (auto &S : P.second.SummaryList) {
2363 auto *FS = dyn_cast<FunctionSummary>(S.get());
2364 if (!FS)
2365 continue;
2366 // FIXME: Only add live functions.
2367 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2368 for (StringRef Name : NameByGUID[VF.GUID]) {
2369 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2370 }
2371 }
2372 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2373 for (StringRef Name : NameByGUID[VF.GUID]) {
2374 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2375 }
2376 }
2377 for (const FunctionSummary::ConstVCall &VC :
2378 FS->type_test_assume_const_vcalls()) {
2379 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2380 CallSlots[{Name, VC.VFunc.Offset}]
2381 .ConstCSInfo[VC.Args]
2382 .addSummaryTypeTestAssumeUser(FS);
2383 }
2384 }
2385 for (const FunctionSummary::ConstVCall &VC :
2386 FS->type_checked_load_const_vcalls()) {
2387 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2388 CallSlots[{Name, VC.VFunc.Offset}]
2389 .ConstCSInfo[VC.Args]
2390 .addSummaryTypeCheckedLoadUser(FS);
2391 }
2392 }
2393 }
2394 }
2395
2396 std::set<ValueInfo> DevirtTargets;
2397 // For each (type, offset) pair:
2398 for (auto &S : CallSlots) {
2399 // Search each of the members of the type identifier for the virtual
2400 // function implementation at offset S.first.ByteOffset, and add to
2401 // TargetsForSlot.
2402 std::vector<ValueInfo> TargetsForSlot;
2403 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2404 assert(TidSummary);
2405 // The type id summary would have been created while building the NameByGUID
2406 // map earlier.
2408 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2409 ->WPDRes[S.first.ByteOffset];
2410 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2411 S.first.ByteOffset)) {
2412
2413 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2414 DevirtTargets))
2415 continue;
2416 }
2417 }
2418
2419 // Optionally have the thin link print message for each devirtualized
2420 // function.
2422 for (const auto &DT : DevirtTargets)
2423 errs() << "Devirtualized call to " << DT << "\n";
2424
2425 NumDevirtTargets += DevirtTargets.size();
2426}
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
assume Assume Builder
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Addr
std::string Name
uint64_t Size
Provides passes for computing function attributes based on interprocedural analyses.
#define DEBUG_TYPE
Hexagon Common GEP
static cl::opt< std::string > ClReadSummary("lowertypetests-read-summary", cl::desc("Read summary from given YAML file before running pass"), cl::Hidden)
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< std::string > ClWriteSummary("lowertypetests-write-summary", cl::desc("Write summary to given YAML file after running pass"), cl::Hidden)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
static bool mustBeUnreachableFunction(const Function &F)
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
@ Targets
Definition: TextStubV5.cpp:90
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:711
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:317
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:187
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1471
bool arg_empty() const
Definition: InstrTypes.h:1350
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1467
Value * getCalledOperand() const
Definition: InstrTypes.h:1401
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1490
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1266
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1344
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1444
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1486
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:691
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2214
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2200
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, std::optional< unsigned > InRangeIndex=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1228
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2228
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition: Constants.h:462
This is an important base class in LLVM.
Definition: Constant.h:41
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:328
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:37
Helper for check-and-exit error handling.
Definition: Error.h:1355
Tagged union holding either a T or a Error.
Definition: Error.h:470
Function summary information to aid decisions and implementation of importing.
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
bool isVarArg() const
Definition: DerivedTypes.h:123
Type * getReturnType() const
Definition: DerivedTypes.h:124
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:136
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:670
static Expected< GlobPattern > create(StringRef Pat)
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:520
static bool isLocalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:404
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:374
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:591
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
static GUID getGUID(StringRef GlobalName)
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:587
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:250
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
Global variable summary information to aid decisions and implementation of importing.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const BasicBlock * getParent() const
Definition: Instruction.h:90
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:943
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Root of the metadata hierarchy.
Definition: Metadata.h:61
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1758
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:422
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
static Type * getVoidTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
bool use_empty() const
Definition: Value.h:344
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1370
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:454
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:979
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
@ FS
Definition: X86.h:208
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
@ CommaSeparated
Definition: CommandLine.h:164
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:770
uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:397
@ Offset
Definition: DWP.cpp:406
MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const std::map< std::string, GVSummaryMapTy > *ModuleToSummariesForIndex=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
@ Export
Export information to summary.
@ Import
Import information from summary.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2092
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:732
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1246
bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition: Error.h:1186
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:92
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1043
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
Constant * getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, Constant *TopLevelGlobal=nullptr)
Processes a Constant recursively looking into elements of arrays, structs and expressions to find a t...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
A call site that could be devirtualized.
A specification for a virtual function call with all constant integer arguments.
An "identifier" for a virtual function.
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)