LLVM 23.0.0git
RewriteStatepointsForGC.cpp
Go to the documentation of this file.
1//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Rewrite call/invoke instructions so as to make potential relocations
10// performed by the garbage collector explicit in the IR.
11//
12//===----------------------------------------------------------------------===//
13
15
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Sequence.h"
22#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/StringRef.h"
29#include "llvm/IR/Argument.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/CallingConv.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GCStrategy.h"
41#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/InstrTypes.h"
44#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/IR/LLVMContext.h"
49#include "llvm/IR/MDBuilder.h"
50#include "llvm/IR/Metadata.h"
51#include "llvm/IR/Module.h"
52#include "llvm/IR/Statepoint.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/User.h"
55#include "llvm/IR/Value.h"
56#include "llvm/IR/ValueHandle.h"
60#include "llvm/Support/Debug.h"
66#include <cassert>
67#include <cstddef>
68#include <cstdint>
69#include <iterator>
70#include <optional>
71#include <set>
72#include <string>
73#include <utility>
74#include <vector>
75
76#define DEBUG_TYPE "rewrite-statepoints-for-gc"
77
78using namespace llvm;
79
80// Print the liveset found at the insert location
81static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
82 cl::init(false));
83static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
84 cl::init(false));
85
86// Print out the base pointers for debugging
87static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
88 cl::init(false));
89
90// Cost threshold measuring when it is profitable to rematerialize value instead
91// of relocating it
93RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
94 cl::init(6));
95
96#ifdef EXPENSIVE_CHECKS
97static bool ClobberNonLive = true;
98#else
99static bool ClobberNonLive = false;
100#endif
101
102static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
104 cl::Hidden);
105
106static cl::opt<bool>
107 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
108 cl::Hidden, cl::init(true));
109
110static cl::opt<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses",
111 cl::Hidden, cl::init(true));
112
113/// The IR fed into RewriteStatepointsForGC may have had attributes and
114/// metadata implying dereferenceability that are no longer valid/correct after
115/// RewriteStatepointsForGC has run. This is because semantically, after
116/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
117/// heap. stripNonValidData (conservatively) restores
118/// correctness by erasing all attributes in the module that externally imply
119/// dereferenceability. Similar reasoning also applies to the noalias
120/// attributes and metadata. gc.statepoint can touch the entire heap including
121/// noalias objects.
122/// Apart from attributes and metadata, we also remove instructions that imply
123/// constant physical memory: llvm.invariant.start.
124static void stripNonValidData(Module &M);
125
126// Find the GC strategy for a function, or null if it doesn't have one.
127static std::unique_ptr<GCStrategy> findGCStrategy(Function &F);
128
130
133 bool Changed = false;
134 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
135 for (Function &F : M) {
136 // Nothing to do for declarations.
137 if (F.isDeclaration() || F.empty())
138 continue;
139
140 // Policy choice says not to rewrite - the most common reason is that we're
141 // compiling code without a GCStrategy.
143 continue;
144
145 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
146 auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
147 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
148 Changed |= runOnFunction(F, DT, TTI, TLI);
149 }
150 if (!Changed)
151 return PreservedAnalyses::all();
152
153 // stripNonValidData asserts that shouldRewriteStatepointsIn
154 // returns true for at least one function in the module. Since at least
155 // one function changed, we know that the precondition is satisfied.
157
161 return PA;
162}
163
164namespace {
165
166struct GCPtrLivenessData {
167 /// Values defined in this block.
169
170 /// Values used in this block (and thus live); does not included values
171 /// killed within this block.
173
174 /// Values live into this basic block (i.e. used by any
175 /// instruction in this basic block or ones reachable from here)
177
178 /// Values live out of this basic block (i.e. live into
179 /// any successor block)
181};
182
183// The type of the internal cache used inside the findBasePointers family
184// of functions. From the callers perspective, this is an opaque type and
185// should not be inspected.
186//
187// In the actual implementation this caches two relations:
188// - The base relation itself (i.e. this pointer is based on that one)
189// - The base defining value relation (i.e. before base_phi insertion)
190// Generally, after the execution of a full findBasePointer call, only the
191// base relation will remain. Internally, we add a mixture of the two
192// types, then update all the second type to the first type
193using DefiningValueMapTy = MapVector<Value *, Value *>;
194using IsKnownBaseMapTy = MapVector<Value *, bool>;
195using PointerToBaseTy = MapVector<Value *, Value *>;
196using StatepointLiveSetTy = SetVector<Value *>;
197using RematerializedValueMapTy =
199
200struct PartiallyConstructedSafepointRecord {
201 /// The set of values known to be live across this safepoint
202 StatepointLiveSetTy LiveSet;
203
204 /// The *new* gc.statepoint instruction itself. This produces the token
205 /// that normal path gc.relocates and the gc.result are tied to.
206 GCStatepointInst *StatepointToken;
207
208 /// Instruction to which exceptional gc relocates are attached
209 /// Makes it easier to iterate through them during relocationViaAlloca.
210 Instruction *UnwindToken;
211
212 /// Record live values we are rematerialized instead of relocating.
213 /// They are not included into 'LiveSet' field.
214 /// Maps rematerialized copy to it's original value.
215 RematerializedValueMapTy RematerializedValues;
216};
217
218struct RematerizlizationCandidateRecord {
219 // Chain from derived pointer to base.
221 // Original base.
222 Value *RootOfChain;
223 // Cost of chain.
225};
227
228} // end anonymous namespace
229
231 std::optional<OperandBundleUse> DeoptBundle =
232 Call->getOperandBundle(LLVMContext::OB_deopt);
233
234 if (!DeoptBundle) {
236 "Found non-leaf call without deopt info!");
237 return {};
238 }
239
240 return DeoptBundle->Inputs;
241}
242
243/// Compute the live-in set for every basic block in the function
245 GCPtrLivenessData &Data, GCStrategy *GC);
246
247/// Given results from the dataflow liveness computation, find the set of live
248/// Values at a particular instruction.
249static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
250 StatepointLiveSetTy &out, GCStrategy *GC);
251
252static bool isGCPointerType(Type *T, GCStrategy *GC) {
253 assert(GC && "GC Strategy for isGCPointerType cannot be null");
254
255 if (!isa<PointerType>(T))
256 return false;
257
258 // conservative - same as StatepointLowering
259 return GC->isGCManagedPointer(T).value_or(true);
260}
261
262// Return true if this type is one which a) is a gc pointer or contains a GC
263// pointer and b) is of a type this code expects to encounter as a live value.
264// (The insertion code will assert that a type which matches (a) and not (b)
265// is not encountered.)
267 // We fully support gc pointers
268 if (isGCPointerType(T, GC))
269 return true;
270 // We partially support vectors of gc pointers. The code will assert if it
271 // can't handle something.
272 if (auto VT = dyn_cast<VectorType>(T))
273 if (isGCPointerType(VT->getElementType(), GC))
274 return true;
275 return false;
276}
277
278#ifndef NDEBUG
279/// Returns true if this type contains a gc pointer whether we know how to
280/// handle that type or not.
281static bool containsGCPtrType(Type *Ty, GCStrategy *GC) {
282 if (isGCPointerType(Ty, GC))
283 return true;
284 if (VectorType *VT = dyn_cast<VectorType>(Ty))
285 return isGCPointerType(VT->getScalarType(), GC);
286 if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
287 return containsGCPtrType(AT->getElementType(), GC);
288 if (StructType *ST = dyn_cast<StructType>(Ty))
289 return llvm::any_of(ST->elements(),
290 [GC](Type *Ty) { return containsGCPtrType(Ty, GC); });
291 return false;
292}
293
294// Returns true if this is a type which a) is a gc pointer or contains a GC
295// pointer and b) is of a type which the code doesn't expect (i.e. first class
296// aggregates). Used to trip assertions.
298 return containsGCPtrType(Ty, GC) && !isHandledGCPointerType(Ty, GC);
299}
300#endif
301
302// Return the name of the value suffixed with the provided value, or if the
303// value didn't have a name, the default value specified.
304static std::string suffixed_name_or(Value *V, StringRef Suffix,
305 StringRef DefaultName) {
306 return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
307}
308
309// Conservatively identifies any definitions which might be live at the
310// given instruction. The analysis is performed immediately before the
311// given instruction. Values defined by that instruction are not considered
312// live. Values used by that instruction are considered live.
314 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
315 PartiallyConstructedSafepointRecord &Result, GCStrategy *GC) {
316 StatepointLiveSetTy LiveSet;
317 findLiveSetAtInst(Call, OriginalLivenessData, LiveSet, GC);
318
319 if (PrintLiveSet) {
320 dbgs() << "Live Variables:\n";
321 for (Value *V : LiveSet)
322 dbgs() << " " << V->getName() << " " << *V << "\n";
323 }
324 if (PrintLiveSetSize) {
325 dbgs() << "Safepoint For: " << Call->getCalledOperand()->getName() << "\n";
326 dbgs() << "Number live values: " << LiveSet.size() << "\n";
327 }
328 Result.LiveSet = LiveSet;
329}
330
331/// Returns true if V is a known base.
332static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases);
333
334/// Caches the IsKnownBase flag for a value and asserts that it wasn't present
335/// in the cache before.
336static void setKnownBase(Value *V, bool IsKnownBase,
337 IsKnownBaseMapTy &KnownBases);
338
339static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
340 IsKnownBaseMapTy &KnownBases);
341
342/// Return a base defining value for the 'Index' element of the given vector
343/// instruction 'I'. If Index is null, returns a BDV for the entire vector
344/// 'I'. As an optimization, this method will try to determine when the
345/// element is known to already be a base pointer. If this can be established,
346/// the second value in the returned pair will be true. Note that either a
347/// vector or a pointer typed value can be returned. For the former, the
348/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
349/// If the later, the return pointer is a BDV (or possibly a base) for the
350/// particular element in 'I'.
351static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache,
352 IsKnownBaseMapTy &KnownBases) {
353 // Each case parallels findBaseDefiningValue below, see that code for
354 // detailed motivation.
355
356 auto Cached = Cache.find(I);
357 if (Cached != Cache.end())
358 return Cached->second;
359
360 if (isa<Argument>(I)) {
361 // An incoming argument to the function is a base pointer
362 Cache[I] = I;
363 setKnownBase(I, /* IsKnownBase */true, KnownBases);
364 return I;
365 }
366
367 if (isa<Constant>(I)) {
368 // Base of constant vector consists only of constant null pointers.
369 // For reasoning see similar case inside 'findBaseDefiningValue' function.
370 auto *CAZ = ConstantAggregateZero::get(I->getType());
371 Cache[I] = CAZ;
372 setKnownBase(CAZ, /* IsKnownBase */true, KnownBases);
373 return CAZ;
374 }
375
376 if (isa<LoadInst>(I)) {
377 Cache[I] = I;
378 setKnownBase(I, /* IsKnownBase */true, KnownBases);
379 return I;
380 }
381
383 // We don't know whether this vector contains entirely base pointers or
384 // not. To be conservatively correct, we treat it as a BDV and will
385 // duplicate code as needed to construct a parallel vector of bases.
386 Cache[I] = I;
387 setKnownBase(I, /* IsKnownBase */false, KnownBases);
388 return I;
389 }
390
392 // We don't know whether this vector contains entirely base pointers or
393 // not. To be conservatively correct, we treat it as a BDV and will
394 // duplicate code as needed to construct a parallel vector of bases.
395 // TODO: There a number of local optimizations which could be applied here
396 // for particular sufflevector patterns.
397 Cache[I] = I;
398 setKnownBase(I, /* IsKnownBase */false, KnownBases);
399 return I;
400 }
401
402 // The behavior of getelementptr instructions is the same for vector and
403 // non-vector data types.
404 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
405 auto *BDV =
406 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
407 Cache[GEP] = BDV;
408 return BDV;
409 }
410
411 // The behavior of freeze instructions is the same for vector and
412 // non-vector data types.
413 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
414 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
415 Cache[Freeze] = BDV;
416 return BDV;
417 }
418
419 // If the pointer comes through a bitcast of a vector of pointers to
420 // a vector of another type of pointer, then look through the bitcast
421 if (auto *BC = dyn_cast<BitCastInst>(I)) {
422 auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases);
423 Cache[BC] = BDV;
424 return BDV;
425 }
426
427 // We assume that functions in the source language only return base
428 // pointers. This should probably be generalized via attributes to support
429 // both source language and internal functions.
430 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
431 Cache[I] = I;
432 setKnownBase(I, /* IsKnownBase */true, KnownBases);
433 return I;
434 }
435
436 // A PHI or Select is a base defining value. The outer findBasePointer
437 // algorithm is responsible for constructing a base value for this BDV.
439 "unknown vector instruction - no base found for vector element");
440 Cache[I] = I;
441 setKnownBase(I, /* IsKnownBase */false, KnownBases);
442 return I;
443}
444
445/// Helper function for findBasePointer - Will return a value which either a)
446/// defines the base pointer for the input, b) blocks the simple search
447/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
448/// from pointer to vector type or back.
449static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
450 IsKnownBaseMapTy &KnownBases) {
451 assert(I->getType()->isPtrOrPtrVectorTy() &&
452 "Illegal to ask for the base pointer of a non-pointer type");
453 auto Cached = Cache.find(I);
454 if (Cached != Cache.end())
455 return Cached->second;
456
457 if (I->getType()->isVectorTy())
458 return findBaseDefiningValueOfVector(I, Cache, KnownBases);
459
460 if (isa<Argument>(I)) {
461 // An incoming argument to the function is a base pointer
462 // We should have never reached here if this argument isn't an gc value
463 Cache[I] = I;
464 setKnownBase(I, /* IsKnownBase */true, KnownBases);
465 return I;
466 }
467
468 if (isa<Constant>(I)) {
469 // We assume that objects with a constant base (e.g. a global) can't move
470 // and don't need to be reported to the collector because they are always
471 // live. Besides global references, all kinds of constants (e.g. undef,
472 // constant expressions, null pointers) can be introduced by the inliner or
473 // the optimizer, especially on dynamically dead paths.
474 // Here we treat all of them as having single null base. By doing this we
475 // trying to avoid problems reporting various conflicts in a form of
476 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
477 // See constant.ll file for relevant test cases.
478
479 auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType()));
480 Cache[I] = CPN;
481 setKnownBase(CPN, /* IsKnownBase */true, KnownBases);
482 return CPN;
483 }
484
485 // inttoptrs in an integral address space are currently ill-defined. We
486 // treat them as defining base pointers here for consistency with the
487 // constant rule above and because we don't really have a better semantic
488 // to give them. Note that the optimizer is always free to insert undefined
489 // behavior on dynamically dead paths as well.
490 if (isa<IntToPtrInst>(I)) {
491 Cache[I] = I;
492 setKnownBase(I, /* IsKnownBase */true, KnownBases);
493 return I;
494 }
495
496 if (CastInst *CI = dyn_cast<CastInst>(I)) {
497 Value *Def = CI->stripPointerCasts();
498 // If stripping pointer casts changes the address space there is an
499 // addrspacecast in between.
500 assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
501 cast<PointerType>(CI->getType())->getAddressSpace() &&
502 "unsupported addrspacecast");
503 // If we find a cast instruction here, it means we've found a cast which is
504 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
505 // handle int->ptr conversion.
506 assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
507 auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases);
508 Cache[CI] = BDV;
509 return BDV;
510 }
511
512 if (isa<LoadInst>(I)) {
513 // The value loaded is an gc base itself
514 Cache[I] = I;
515 setKnownBase(I, /* IsKnownBase */true, KnownBases);
516 return I;
517 }
518
520 // The base of this GEP is the base
521 auto *BDV =
522 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
523 Cache[GEP] = BDV;
524 return BDV;
525 }
526
527 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
528 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
529 Cache[Freeze] = BDV;
530 return BDV;
531 }
532
534 switch (II->getIntrinsicID()) {
535 default:
536 // fall through to general call handling
537 break;
538 case Intrinsic::experimental_gc_statepoint:
539 llvm_unreachable("statepoints don't produce pointers");
540 case Intrinsic::experimental_gc_relocate:
541 // Rerunning safepoint insertion after safepoints are already
542 // inserted is not supported. It could probably be made to work,
543 // but why are you doing this? There's no good reason.
544 llvm_unreachable("repeat safepoint insertion is not supported");
545 case Intrinsic::gcroot:
546 // Currently, this mechanism hasn't been extended to work with gcroot.
547 // There's no reason it couldn't be, but I haven't thought about the
548 // implications much.
550 "interaction with the gcroot mechanism is not supported");
551 case Intrinsic::experimental_gc_get_pointer_base:
552 auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases);
553 Cache[II] = BDV;
554 return BDV;
555 }
556 }
557 // We assume that functions in the source language only return base
558 // pointers. This should probably be generalized via attributes to support
559 // both source language and internal functions.
560 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
561 Cache[I] = I;
562 setKnownBase(I, /* IsKnownBase */true, KnownBases);
563 return I;
564 }
565
566 // TODO: I have absolutely no idea how to implement this part yet. It's not
567 // necessarily hard, I just haven't really looked at it yet.
568 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
569
571 // A CAS is effectively a atomic store and load combined under a
572 // predicate. From the perspective of base pointers, we just treat it
573 // like a load.
574 Cache[I] = I;
575 setKnownBase(I, /* IsKnownBase */true, KnownBases);
576 return I;
577 }
578
579 if (isa<AtomicRMWInst>(I)) {
580 assert(cast<AtomicRMWInst>(I)->getOperation() == AtomicRMWInst::Xchg &&
581 "Only Xchg is allowed for pointer values");
582 // A RMW Xchg is a combined atomic load and store, so we can treat the
583 // loaded value as a base pointer.
584 Cache[I] = I;
585 setKnownBase(I, /* IsKnownBase */ true, KnownBases);
586 return I;
587 }
588
589 // The aggregate ops. Aggregates can either be in the heap or on the
590 // stack, but in either case, this is simply a field load. As a result,
591 // this is a defining definition of the base just like a load is.
593 Cache[I] = I;
594 setKnownBase(I, /* IsKnownBase */true, KnownBases);
595 return I;
596 }
597
598 // We should never see an insert vector since that would require we be
599 // tracing back a struct value not a pointer value.
601 "Base pointer for a struct is meaningless");
602
603 // This value might have been generated by findBasePointer() called when
604 // substituting gc.get.pointer.base() intrinsic.
605 bool IsKnownBase =
606 isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value");
607 setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases);
608 Cache[I] = I;
609
610 // An extractelement produces a base result exactly when it's input does.
611 // We may need to insert a parallel instruction to extract the appropriate
612 // element out of the base vector corresponding to the input. Given this,
613 // it's analogous to the phi and select case even though it's not a merge.
615 // Note: There a lot of obvious peephole cases here. This are deliberately
616 // handled after the main base pointer inference algorithm to make writing
617 // test cases to exercise that code easier.
618 return I;
619
620 // The last two cases here don't return a base pointer. Instead, they
621 // return a value which dynamically selects from among several base
622 // derived pointers (each with it's own base potentially). It's the job of
623 // the caller to resolve these.
625 "missing instruction case in findBaseDefiningValue");
626 return I;
627}
628
629/// Returns the base defining value for this value.
630static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
631 IsKnownBaseMapTy &KnownBases) {
632 if (!Cache.contains(I)) {
633 auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
634 Cache[I] = BDV;
635 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
636 << Cache[I]->getName() << ", is known base = "
637 << KnownBases[I] << "\n");
638 }
639 assert(Cache[I] != nullptr);
640 assert(KnownBases.contains(Cache[I]) &&
641 "Cached value must be present in known bases map");
642 return Cache[I];
643}
644
645/// Return a base pointer for this value if known. Otherwise, return it's
646/// base defining value.
647static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache,
648 IsKnownBaseMapTy &KnownBases) {
649 Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases);
650 auto Found = Cache.find(Def);
651 if (Found != Cache.end()) {
652 // Either a base-of relation, or a self reference. Caller must check.
653 return Found->second;
654 }
655 // Only a BDV available
656 return Def;
657}
658
659#ifndef NDEBUG
660/// This value is a base pointer that is not generated by RS4GC, i.e. it already
661/// exists in the code.
663 // no recursion possible
664 return !isa<PHINode>(V) && !isa<SelectInst>(V) &&
667}
668#endif
669
670static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) {
671 auto It = KnownBases.find(V);
672 assert(It != KnownBases.end() && "Value not present in the map");
673 return It->second;
674}
675
676static void setKnownBase(Value *V, bool IsKnownBase,
677 IsKnownBaseMapTy &KnownBases) {
678#ifndef NDEBUG
679 auto It = KnownBases.find(V);
680 if (It != KnownBases.end())
681 assert(It->second == IsKnownBase && "Changing already present value");
682#endif
683 KnownBases[V] = IsKnownBase;
684}
685
686// Returns true if First and Second values are both scalar or both vector.
687static bool areBothVectorOrScalar(Value *First, Value *Second) {
688 return isa<VectorType>(First->getType()) ==
689 isa<VectorType>(Second->getType());
690}
691
692namespace {
693
694/// Models the state of a single base defining value in the findBasePointer
695/// algorithm for determining where a new instruction is needed to propagate
696/// the base of this BDV.
697class BDVState {
698public:
699 enum StatusTy {
700 // Starting state of lattice
701 Unknown,
702 // Some specific base value -- does *not* mean that instruction
703 // propagates the base of the object
704 // ex: gep %arg, 16 -> %arg is the base value
705 Base,
706 // Need to insert a node to represent a merge.
707 Conflict
708 };
709
710 BDVState() {
711 llvm_unreachable("missing state in map");
712 }
713
714 explicit BDVState(Value *OriginalValue)
715 : OriginalValue(OriginalValue) {}
716 explicit BDVState(Value *OriginalValue, StatusTy Status, Value *BaseValue = nullptr)
717 : OriginalValue(OriginalValue), Status(Status), BaseValue(BaseValue) {
718 assert(Status != Base || BaseValue);
719 }
720
721 StatusTy getStatus() const { return Status; }
722 Value *getOriginalValue() const { return OriginalValue; }
723 Value *getBaseValue() const { return BaseValue; }
724
725 bool isBase() const { return getStatus() == Base; }
726 bool isUnknown() const { return getStatus() == Unknown; }
727 bool isConflict() const { return getStatus() == Conflict; }
728
729 // Values of type BDVState form a lattice, and this function implements the
730 // meet
731 // operation.
732 void meet(const BDVState &Other) {
733 auto markConflict = [&]() {
734 Status = BDVState::Conflict;
735 BaseValue = nullptr;
736 };
737 // Conflict is a final state.
738 if (isConflict())
739 return;
740 // if we are not known - just take other state.
741 if (isUnknown()) {
742 Status = Other.getStatus();
743 BaseValue = Other.getBaseValue();
744 return;
745 }
746 // We are base.
747 assert(isBase() && "Unknown state");
748 // If other is unknown - just keep our state.
749 if (Other.isUnknown())
750 return;
751 // If other is conflict - it is a final state.
752 if (Other.isConflict())
753 return markConflict();
754 // Other is base as well.
755 assert(Other.isBase() && "Unknown state");
756 // If bases are different - Conflict.
757 if (getBaseValue() != Other.getBaseValue())
758 return markConflict();
759 // We are identical, do nothing.
760 }
761
762 bool operator==(const BDVState &Other) const {
763 return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
764 Status == Other.Status;
765 }
766
767 bool operator!=(const BDVState &other) const { return !(*this == other); }
768
770 void dump() const {
771 print(dbgs());
772 dbgs() << '\n';
773 }
774
775 void print(raw_ostream &OS) const {
776 switch (getStatus()) {
777 case Unknown:
778 OS << "U";
779 break;
780 case Base:
781 OS << "B";
782 break;
783 case Conflict:
784 OS << "C";
785 break;
786 }
787 OS << " (base " << getBaseValue() << " - "
788 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")"
789 << " for " << OriginalValue->getName() << ":";
790 }
791
792private:
793 AssertingVH<Value> OriginalValue; // instruction this state corresponds to
794 StatusTy Status = Unknown;
795 AssertingVH<Value> BaseValue = nullptr; // Non-null only if Status == Base.
796};
797
798} // end anonymous namespace
799
800#ifndef NDEBUG
801static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
802 State.print(OS);
803 return OS;
804}
805#endif
806
807/// For a given value or instruction, figure out what base ptr its derived from.
808/// For gc objects, this is simply itself. On success, returns a value which is
809/// the base pointer. (This is reliable and can be used for relocation.) On
810/// failure, returns nullptr.
811static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
812 IsKnownBaseMapTy &KnownBases) {
813 Value *Def = findBaseOrBDV(I, Cache, KnownBases);
814
815 if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I))
816 return Def;
817
818 // Here's the rough algorithm:
819 // - For every SSA value, construct a mapping to either an actual base
820 // pointer or a PHI which obscures the base pointer.
821 // - Construct a mapping from PHI to unknown TOP state. Use an
822 // optimistic algorithm to propagate base pointer information. Lattice
823 // looks like:
824 // UNKNOWN
825 // b1 b2 b3 b4
826 // CONFLICT
827 // When algorithm terminates, all PHIs will either have a single concrete
828 // base or be in a conflict state.
829 // - For every conflict, insert a dummy PHI node without arguments. Add
830 // these to the base[Instruction] = BasePtr mapping. For every
831 // non-conflict, add the actual base.
832 // - For every conflict, add arguments for the base[a] of each input
833 // arguments.
834 //
835 // Note: A simpler form of this would be to add the conflict form of all
836 // PHIs without running the optimistic algorithm. This would be
837 // analogous to pessimistic data flow and would likely lead to an
838 // overall worse solution.
839
840#ifndef NDEBUG
841 auto isExpectedBDVType = [](Value *BDV) {
842 return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
845 };
846#endif
847
848 // Once populated, will contain a mapping from each potentially non-base BDV
849 // to a lattice value (described above) which corresponds to that BDV.
850 // We use the order of insertion (DFS over the def/use graph) to provide a
851 // stable deterministic ordering for visiting DenseMaps (which are unordered)
852 // below. This is important for deterministic compilation.
854
855#ifndef NDEBUG
856 auto VerifyStates = [&]() {
857 for (auto &Entry : States) {
858 assert(Entry.first == Entry.second.getOriginalValue());
859 }
860 };
861#endif
862
863 auto visitBDVOperands = [](Value *BDV, std::function<void (Value*)> F) {
864 if (PHINode *PN = dyn_cast<PHINode>(BDV)) {
865 for (Value *InVal : PN->incoming_values())
866 F(InVal);
867 } else if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) {
868 F(SI->getTrueValue());
869 F(SI->getFalseValue());
870 } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
871 F(EE->getVectorOperand());
872 } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)) {
873 F(IE->getOperand(0));
874 F(IE->getOperand(1));
875 } else if (auto *SV = dyn_cast<ShuffleVectorInst>(BDV)) {
876 // For a canonical broadcast, ignore the undef argument
877 // (without this, we insert a parallel base shuffle for every broadcast)
878 F(SV->getOperand(0));
879 if (!SV->isZeroEltSplat())
880 F(SV->getOperand(1));
881 } else {
882 llvm_unreachable("unexpected BDV type");
883 }
884 };
885
886
887 // Recursively fill in all base defining values reachable from the initial
888 // one for which we don't already know a definite base value for
889 /* scope */ {
891 Worklist.push_back(Def);
892 States.insert({Def, BDVState(Def)});
893 while (!Worklist.empty()) {
894 Value *Current = Worklist.pop_back_val();
895 assert(!isOriginalBaseResult(Current) && "why did it get added?");
896
897 auto visitIncomingValue = [&](Value *InVal) {
898 Value *Base = findBaseOrBDV(InVal, Cache, KnownBases);
899 if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal))
900 // Known bases won't need new instructions introduced and can be
901 // ignored safely. However, this can only be done when InVal and Base
902 // are both scalar or both vector. Otherwise, we need to find a
903 // correct BDV for InVal, by creating an entry in the lattice
904 // (States).
905 return;
906 assert(isExpectedBDVType(Base) && "the only non-base values "
907 "we see should be base defining values");
908 if (States.insert(std::make_pair(Base, BDVState(Base))).second)
909 Worklist.push_back(Base);
910 };
911
912 visitBDVOperands(Current, visitIncomingValue);
913 }
914 }
915
916#ifndef NDEBUG
917 VerifyStates();
918 LLVM_DEBUG(dbgs() << "States after initialization:\n");
919 for (const auto &Pair : States) {
920 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
921 }
922#endif
923
924 // Iterate forward through the value graph pruning any node from the state
925 // list where all of the inputs are base pointers. The purpose of this is to
926 // reuse existing values when the derived pointer we were asked to materialize
927 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
928 // feeding it does.)
930 do {
931 ToRemove.clear();
932 for (auto Pair : States) {
933 Value *BDV = Pair.first;
934 auto canPruneInput = [&](Value *V) {
935 // If the input of the BDV is the BDV itself we can prune it. This is
936 // only possible if the BDV is a PHI node.
937 if (V->stripPointerCasts() == BDV)
938 return true;
939 Value *VBDV = findBaseOrBDV(V, Cache, KnownBases);
940 if (V->stripPointerCasts() != VBDV)
941 return false;
942 // The assumption is that anything not in the state list is
943 // propagates a base pointer.
944 return States.count(VBDV) == 0;
945 };
946
947 bool CanPrune = true;
948 visitBDVOperands(BDV, [&](Value *Op) {
949 CanPrune = CanPrune && canPruneInput(Op);
950 });
951 if (CanPrune)
952 ToRemove.push_back(BDV);
953 }
954 for (Value *V : ToRemove) {
955 States.erase(V);
956 // Cache the fact V is it's own base for later usage.
957 Cache[V] = V;
958 }
959 } while (!ToRemove.empty());
960
961 // Did we manage to prove that Def itself must be a base pointer?
962 if (!States.count(Def))
963 return Def;
964
965 // Return a phi state for a base defining value. We'll generate a new
966 // base state for known bases and expect to find a cached state otherwise.
967 auto GetStateForBDV = [&](Value *BaseValue, Value *Input) {
968 auto I = States.find(BaseValue);
969 if (I != States.end())
970 return I->second;
972 return BDVState(BaseValue, BDVState::Base, BaseValue);
973 };
974
975 // Even though we have identified a concrete base (or a conflict) for all live
976 // pointers at this point, there are cases where the base is of an
977 // incompatible type compared to the original instruction. We conservatively
978 // mark those as conflicts to ensure that corresponding BDVs will be generated
979 // in the next steps.
980
981 // this is a rather explicit check for all cases where we should mark the
982 // state as a conflict to force the latter stages of the algorithm to emit
983 // the BDVs.
984 // TODO: in many cases the instructions emited for the conflicting states
985 // will be identical to the I itself (if the I's operate on their BDVs
986 // themselves). We should exploit this, but can't do it here since it would
987 // break the invariant about the BDVs not being known to be a base.
988 // TODO: the code also does not handle constants at all - the algorithm relies
989 // on all constants having the same BDV and therefore constant-only insns
990 // will never be in conflict, but this check is ignored here. If the
991 // constant conflicts will be to BDVs themselves, they will be identical
992 // instructions and will get optimized away (as in the above TODO)
993 auto MarkConflict = [&](Instruction *I, Value *BaseValue) {
994 // II and EE mixes vector & scalar so is always a conflict
996 return true;
997 // Shuffle vector is always a conflict as it creates new vector from
998 // existing ones.
1000 return true;
1001 // Any instructions where the computed base type differs from the
1002 // instruction type. An example is where an extract instruction is used by a
1003 // select. Here the select's BDV is a vector (because of extract's BDV),
1004 // while the select itself is a scalar type. Note that the IE and EE
1005 // instruction check is not fully subsumed by the vector<->scalar check at
1006 // the end, this is due to the BDV algorithm being ignorant of BDV types at
1007 // this junction.
1008 if (!areBothVectorOrScalar(BaseValue, I))
1009 return true;
1010 return false;
1011 };
1012
1013 bool Progress = true;
1014 while (Progress) {
1015#ifndef NDEBUG
1016 const size_t OldSize = States.size();
1017#endif
1018 Progress = false;
1019 // We're only changing values in this loop, thus safe to keep iterators.
1020 // Since this is computing a fixed point, the order of visit does not
1021 // effect the result. TODO: We could use a worklist here and make this run
1022 // much faster.
1023 for (auto Pair : States) {
1024 Value *BDV = Pair.first;
1025 // Only values that do not have known bases or those that have differing
1026 // type (scalar versus vector) from a possible known base should be in the
1027 // lattice.
1028 assert((!isKnownBase(BDV, KnownBases) ||
1029 !areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) &&
1030 "why did it get added?");
1031
1032 BDVState NewState(BDV);
1033 visitBDVOperands(BDV, [&](Value *Op) {
1034 Value *BDV = findBaseOrBDV(Op, Cache, KnownBases);
1035 auto OpState = GetStateForBDV(BDV, Op);
1036 NewState.meet(OpState);
1037 });
1038
1039 // if the instruction has known base, but should in fact be marked as
1040 // conflict because of incompatible in/out types, we mark it as such
1041 // ensuring that it will propagate through the fixpoint iteration
1042 auto I = cast<Instruction>(BDV);
1043 auto BV = NewState.getBaseValue();
1044 if (BV && MarkConflict(I, BV))
1045 NewState = BDVState(I, BDVState::Conflict);
1046
1047 BDVState OldState = Pair.second;
1048 if (OldState != NewState) {
1049 Progress = true;
1050 States[BDV] = NewState;
1051 }
1052 }
1053
1054 assert(OldSize == States.size() &&
1055 "fixed point shouldn't be adding any new nodes to state");
1056 }
1057
1058#ifndef NDEBUG
1059 VerifyStates();
1060 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
1061 for (const auto &Pair : States) {
1062 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
1063 }
1064
1065 // since we do the conflict marking as part of the fixpoint iteration this
1066 // loop only asserts that invariants are met
1067 for (auto Pair : States) {
1068 Instruction *I = cast<Instruction>(Pair.first);
1069 BDVState State = Pair.second;
1070 auto *BaseValue = State.getBaseValue();
1071 // Only values that do not have known bases or those that have differing
1072 // type (scalar versus vector) from a possible known base should be in the
1073 // lattice.
1074 assert(
1075 (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
1076 "why did it get added?");
1077 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1078 }
1079#endif
1080
1081 // Insert Phis for all conflicts
1082 // TODO: adjust naming patterns to avoid this order of iteration dependency
1083 for (auto Pair : States) {
1084 Instruction *I = cast<Instruction>(Pair.first);
1085 BDVState State = Pair.second;
1086 // Only values that do not have known bases or those that have differing
1087 // type (scalar versus vector) from a possible known base should be in the
1088 // lattice.
1089 assert((!isKnownBase(I, KnownBases) ||
1090 !areBothVectorOrScalar(I, State.getBaseValue())) &&
1091 "why did it get added?");
1092 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1093
1094 // Since we're joining a vector and scalar base, they can never be the
1095 // same. As a result, we should always see insert element having reached
1096 // the conflict state.
1097 assert(!isa<InsertElementInst>(I) || State.isConflict());
1098
1099 if (!State.isConflict())
1100 continue;
1101
1102 auto getMangledName = [](Instruction *I) -> std::string {
1103 if (isa<PHINode>(I)) {
1104 return suffixed_name_or(I, ".base", "base_phi");
1105 } else if (isa<SelectInst>(I)) {
1106 return suffixed_name_or(I, ".base", "base_select");
1107 } else if (isa<ExtractElementInst>(I)) {
1108 return suffixed_name_or(I, ".base", "base_ee");
1109 } else if (isa<InsertElementInst>(I)) {
1110 return suffixed_name_or(I, ".base", "base_ie");
1111 } else {
1112 return suffixed_name_or(I, ".base", "base_sv");
1113 }
1114 };
1115
1116 Instruction *BaseInst = I->clone();
1117 BaseInst->insertBefore(I->getIterator());
1118 BaseInst->setName(getMangledName(I));
1119 // Add metadata marking this as a base value
1120 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
1121 States[I] = BDVState(I, BDVState::Conflict, BaseInst);
1122 setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
1123 }
1124
1125#ifndef NDEBUG
1126 VerifyStates();
1127#endif
1128
1129 // Returns a instruction which produces the base pointer for a given
1130 // instruction. The instruction is assumed to be an input to one of the BDVs
1131 // seen in the inference algorithm above. As such, we must either already
1132 // know it's base defining value is a base, or have inserted a new
1133 // instruction to propagate the base of it's BDV and have entered that newly
1134 // introduced instruction into the state table. In either case, we are
1135 // assured to be able to determine an instruction which produces it's base
1136 // pointer.
1137 auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
1138 Value *BDV = findBaseOrBDV(Input, Cache, KnownBases);
1139 Value *Base = nullptr;
1140 if (auto It = States.find(BDV); It == States.end()) {
1142 Base = BDV;
1143 } else {
1144 // Either conflict or base.
1145 Base = It->second.getBaseValue();
1146 }
1147 assert(Base && "Can't be null");
1148 // The cast is needed since base traversal may strip away bitcasts
1149 if (Base->getType() != Input->getType() && InsertPt)
1150 Base = new BitCastInst(Base, Input->getType(), "cast",
1151 InsertPt->getIterator());
1152 return Base;
1153 };
1154
1155 // Fixup all the inputs of the new PHIs. Visit order needs to be
1156 // deterministic and predictable because we're naming newly created
1157 // instructions.
1158 for (auto Pair : States) {
1159 Instruction *BDV = cast<Instruction>(Pair.first);
1160 BDVState State = Pair.second;
1161
1162 // Only values that do not have known bases or those that have differing
1163 // type (scalar versus vector) from a possible known base should be in the
1164 // lattice.
1165 assert((!isKnownBase(BDV, KnownBases) ||
1166 !areBothVectorOrScalar(BDV, State.getBaseValue())) &&
1167 "why did it get added?");
1168 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1169 if (!State.isConflict())
1170 continue;
1171
1172 if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) {
1173 PHINode *PN = cast<PHINode>(BDV);
1174 const unsigned NumPHIValues = PN->getNumIncomingValues();
1175
1176 // The IR verifier requires phi nodes with multiple entries from the
1177 // same basic block to have the same incoming value for each of those
1178 // entries. Since we're inserting bitcasts in the loop, make sure we
1179 // do so at least once per incoming block.
1180 DenseMap<BasicBlock *, Value*> BlockToValue;
1181 for (unsigned i = 0; i < NumPHIValues; i++) {
1182 Value *InVal = PN->getIncomingValue(i);
1183 BasicBlock *InBB = PN->getIncomingBlock(i);
1184 auto [It, Inserted] = BlockToValue.try_emplace(InBB);
1185 if (Inserted)
1186 It->second = getBaseForInput(InVal, InBB->getTerminator());
1187 else {
1188#ifndef NDEBUG
1189 Value *OldBase = It->second;
1190 Value *Base = getBaseForInput(InVal, nullptr);
1191
1192 // We can't use `stripPointerCasts` instead of this function because
1193 // `stripPointerCasts` doesn't handle vectors of pointers.
1194 auto StripBitCasts = [](Value *V) -> Value * {
1195 while (auto *BC = dyn_cast<BitCastInst>(V))
1196 V = BC->getOperand(0);
1197 return V;
1198 };
1199 // In essence this assert states: the only way two values
1200 // incoming from the same basic block may be different is by
1201 // being different bitcasts of the same value. A cleanup
1202 // that remains TODO is changing findBaseOrBDV to return an
1203 // llvm::Value of the correct type (and still remain pure).
1204 // This will remove the need to add bitcasts.
1205 assert(StripBitCasts(Base) == StripBitCasts(OldBase) &&
1206 "findBaseOrBDV should be pure!");
1207#endif
1208 }
1209 Value *Base = It->second;
1210 BasePHI->setIncomingValue(i, Base);
1211 }
1212 } else if (SelectInst *BaseSI =
1213 dyn_cast<SelectInst>(State.getBaseValue())) {
1215
1216 // Find the instruction which produces the base for each input.
1217 // We may need to insert a bitcast.
1218 BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI));
1219 BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI));
1220 } else if (auto *BaseEE =
1221 dyn_cast<ExtractElementInst>(State.getBaseValue())) {
1222 Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
1223 // Find the instruction which produces the base for each input. We may
1224 // need to insert a bitcast.
1225 BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
1226 } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){
1227 auto *BdvIE = cast<InsertElementInst>(BDV);
1228 auto UpdateOperand = [&](int OperandIdx) {
1229 Value *InVal = BdvIE->getOperand(OperandIdx);
1230 Value *Base = getBaseForInput(InVal, BaseIE);
1231 BaseIE->setOperand(OperandIdx, Base);
1232 };
1233 UpdateOperand(0); // vector operand
1234 UpdateOperand(1); // scalar operand
1235 } else {
1236 auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue());
1237 auto *BdvSV = cast<ShuffleVectorInst>(BDV);
1238 auto UpdateOperand = [&](int OperandIdx) {
1239 Value *InVal = BdvSV->getOperand(OperandIdx);
1240 Value *Base = getBaseForInput(InVal, BaseSV);
1241 BaseSV->setOperand(OperandIdx, Base);
1242 };
1243 UpdateOperand(0); // vector operand
1244 if (!BdvSV->isZeroEltSplat())
1245 UpdateOperand(1); // vector operand
1246 else {
1247 // Never read, so just use poison
1248 Value *InVal = BdvSV->getOperand(1);
1249 BaseSV->setOperand(1, PoisonValue::get(InVal->getType()));
1250 }
1251 }
1252 }
1253
1254#ifndef NDEBUG
1255 VerifyStates();
1256#endif
1257
1258 // get the data layout to compare the sizes of base/derived pointer values
1259 [[maybe_unused]] auto &DL =
1260 cast<llvm::Instruction>(Def)->getDataLayout();
1261 // Cache all of our results so we can cheaply reuse them
1262 // NOTE: This is actually two caches: one of the base defining value
1263 // relation and one of the base pointer relation! FIXME
1264 for (auto Pair : States) {
1265 auto *BDV = Pair.first;
1266 Value *Base = Pair.second.getBaseValue();
1267 assert(BDV && Base);
1268 // Whenever we have a derived ptr(s), their base
1269 // ptr(s) must be of the same size, not necessarily the same type
1270 assert(DL.getTypeAllocSize(BDV->getType()) ==
1271 DL.getTypeAllocSize(Base->getType()) &&
1272 "Derived and base values should have same size");
1273 // Only values that do not have known bases or those that have differing
1274 // type (scalar versus vector) from a possible known base should be in the
1275 // lattice.
1276 assert(
1277 (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) &&
1278 "why did it get added?");
1279
1280 LLVM_DEBUG(
1281 dbgs() << "Updating base value cache"
1282 << " for: " << BDV->getName() << " from: "
1283 << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none")
1284 << " to: " << Base->getName() << "\n");
1285
1286 Cache[BDV] = Base;
1287 }
1288 assert(Cache.count(Def));
1289 return Cache[Def];
1290}
1291
1292// For a set of live pointers (base and/or derived), identify the base
1293// pointer of the object which they are derived from. This routine will
1294// mutate the IR graph as needed to make the 'base' pointer live at the
1295// definition site of 'derived'. This ensures that any use of 'derived' can
1296// also use 'base'. This may involve the insertion of a number of
1297// additional PHI nodes.
1298//
1299// preconditions: live is a set of pointer type Values
1300//
1301// side effects: may insert PHI nodes into the existing CFG, will preserve
1302// CFG, will not remove or mutate any existing nodes
1303//
1304// post condition: PointerToBase contains one (derived, base) pair for every
1305// pointer in live. Note that derived can be equal to base if the original
1306// pointer was a base pointer.
1307static void findBasePointers(const StatepointLiveSetTy &live,
1308 PointerToBaseTy &PointerToBase, DominatorTree *DT,
1309 DefiningValueMapTy &DVCache,
1310 IsKnownBaseMapTy &KnownBases) {
1311 for (Value *ptr : live) {
1312 Value *base = findBasePointer(ptr, DVCache, KnownBases);
1313 assert(base && "failed to find base pointer");
1314 PointerToBase[ptr] = base;
1315 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
1316 DT->dominates(cast<Instruction>(base)->getParent(),
1317 cast<Instruction>(ptr)->getParent())) &&
1318 "The base we found better dominate the derived pointer");
1319 }
1320}
1321
1322/// Find the required based pointers (and adjust the live set) for the given
1323/// parse point.
1324static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
1325 CallBase *Call,
1326 PartiallyConstructedSafepointRecord &result,
1327 PointerToBaseTy &PointerToBase,
1328 IsKnownBaseMapTy &KnownBases) {
1329 StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
1330 // We assume that all pointers passed to deopt are base pointers; as an
1331 // optimization, we can use this to avoid separately materializing the base
1332 // pointer graph. This is only relevant since we're very conservative about
1333 // generating new conflict nodes during base pointer insertion. If we were
1334 // smarter there, this would be irrelevant.
1335 if (auto Opt = Call->getOperandBundle(LLVMContext::OB_deopt))
1336 for (Value *V : Opt->Inputs) {
1337 if (!PotentiallyDerivedPointers.count(V))
1338 continue;
1339 PotentiallyDerivedPointers.remove(V);
1340 PointerToBase[V] = V;
1341 }
1342 findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache,
1343 KnownBases);
1344}
1345
1346/// Given an updated version of the dataflow liveness results, update the
1347/// liveset and base pointer maps for the call site CS.
1348static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1349 CallBase *Call,
1350 PartiallyConstructedSafepointRecord &result,
1351 PointerToBaseTy &PointerToBase,
1352 GCStrategy *GC);
1353
1357 PointerToBaseTy &PointerToBase, GCStrategy *GC) {
1358 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1359 // again. The old values are still live and will help it stabilize quickly.
1360 GCPtrLivenessData RevisedLivenessData;
1361 computeLiveInValues(DT, F, RevisedLivenessData, GC);
1362 for (size_t i = 0; i < records.size(); i++) {
1363 struct PartiallyConstructedSafepointRecord &info = records[i];
1364 recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, PointerToBase,
1365 GC);
1366 }
1367}
1368
1369// Utility function which clones all instructions from "ChainToBase"
1370// and inserts them before "InsertBefore". Returns rematerialized value
1371// which should be used after statepoint.
1373 BasicBlock::iterator InsertBefore,
1374 Value *RootOfChain,
1375 Value *AlternateLiveBase) {
1376 Instruction *LastClonedValue = nullptr;
1377 Instruction *LastValue = nullptr;
1378 // Walk backwards to visit top-most instructions first.
1379 for (Instruction *Instr : reverse(ChainToBase)) {
1380 // Only GEP's and casts are supported as we need to be careful to not
1381 // introduce any new uses of pointers not in the liveset.
1382 // Note that it's fine to introduce new uses of pointers which were
1383 // otherwise not used after this statepoint.
1385
1386 Instruction *ClonedValue = Instr->clone();
1387 ClonedValue->insertBefore(InsertBefore);
1388 ClonedValue->setName(Instr->getName() + ".remat");
1389
1390 // If it is not first instruction in the chain then it uses previously
1391 // cloned value. We should update it to use cloned value.
1392 if (LastClonedValue) {
1393 assert(LastValue);
1394 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
1395#ifndef NDEBUG
1396 for (auto *OpValue : ClonedValue->operand_values()) {
1397 // Assert that cloned instruction does not use any instructions from
1398 // this chain other than LastClonedValue
1399 assert(!is_contained(ChainToBase, OpValue) &&
1400 "incorrect use in rematerialization chain");
1401 // Assert that the cloned instruction does not use the RootOfChain
1402 // or the AlternateLiveBase.
1403 assert(OpValue != RootOfChain && OpValue != AlternateLiveBase);
1404 }
1405#endif
1406 } else {
1407 // For the first instruction, replace the use of unrelocated base i.e.
1408 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
1409 // live set. They have been proved to be the same PHI nodes. Note
1410 // that the *only* use of the RootOfChain in the ChainToBase list is
1411 // the first Value in the list.
1412 if (RootOfChain != AlternateLiveBase)
1413 ClonedValue->replaceUsesOfWith(RootOfChain, AlternateLiveBase);
1414 }
1415
1416 LastClonedValue = ClonedValue;
1417 LastValue = Instr;
1418 }
1419 assert(LastClonedValue);
1420 return LastClonedValue;
1421}
1422
1423// When inserting gc.relocate and gc.result calls, we need to ensure there are
1424// no uses of the original value / return value between the gc.statepoint and
1425// the gc.relocate / gc.result call. One case which can arise is a phi node
1426// starting one of the successor blocks. We also need to be able to insert the
1427// gc.relocates only on the path which goes through the statepoint. We might
1428// need to split an edge to make this possible.
1429static BasicBlock *
1431 DominatorTree &DT) {
1432 BasicBlock *Ret = BB;
1433 if (!BB->getUniquePredecessor())
1434 Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
1435
1436 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1437 // from it
1439 assert(!isa<PHINode>(Ret->begin()) &&
1440 "All PHI nodes should have been removed!");
1441
1442 // At this point, we can safely insert a gc.relocate or gc.result as the first
1443 // instruction in Ret if needed.
1444 return Ret;
1445}
1446
1447// List of all function attributes which must be stripped when lowering from
1448// abstract machine model to physical machine model. Essentially, these are
1449// all the effects a safepoint might have which we ignored in the abstract
1450// machine model for purposes of optimization. We have to strip these on
1451// both function declarations and call sites.
1453 {Attribute::Memory, Attribute::NoSync, Attribute::NoFree};
1454
1455// Create new attribute set containing only attributes which can be transferred
1456// from the original call to the safepoint.
1457static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic,
1458 AttributeList StatepointAL) {
1459 AttributeList OrigAL = Call->getAttributes();
1460 if (OrigAL.isEmpty())
1461 return StatepointAL;
1462
1463 // Remove the readonly, readnone, and statepoint function attributes.
1464 LLVMContext &Ctx = Call->getContext();
1465 AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
1466 for (auto Attr : FnAttrsToStrip)
1467 FnAttrs.removeAttribute(Attr);
1468
1469 for (Attribute A : OrigAL.getFnAttrs()) {
1471 FnAttrs.removeAttribute(A);
1472 }
1473
1474 StatepointAL = StatepointAL.addFnAttributes(Ctx, FnAttrs);
1475
1476 // The memory intrinsics do not have a 1:1 correspondence of the original
1477 // call arguments to the produced statepoint. Do not transfer the argument
1478 // attributes to avoid putting them on incorrect arguments.
1479 if (IsMemIntrinsic)
1480 return StatepointAL;
1481
1482 // Attach the argument attributes from the original call at the corresponding
1483 // arguments in the statepoint. Note that any argument attributes that are
1484 // invalid after lowering are stripped in stripNonValidDataFromBody.
1485 for (unsigned I : llvm::seq(Call->arg_size()))
1486 StatepointAL = StatepointAL.addParamAttributes(
1488 AttrBuilder(Ctx, OrigAL.getParamAttrs(I)));
1489
1490 // Return attributes are later attached to the gc.result intrinsic.
1491 return StatepointAL;
1492}
1493
1494/// Helper function to place all gc relocates necessary for the given
1495/// statepoint.
1496/// Inputs:
1497/// liveVariables - list of variables to be relocated.
1498/// basePtrs - base pointers.
1499/// statepointToken - statepoint instruction to which relocates should be
1500/// bound.
1501/// Builder - Llvm IR builder to be used to construct new calls.
1503 ArrayRef<Value *> BasePtrs,
1504 Instruction *StatepointToken,
1505 IRBuilder<> &Builder, GCStrategy *GC) {
1506 if (LiveVariables.empty())
1507 return;
1508
1509 auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
1510 auto ValIt = llvm::find(LiveVec, Val);
1511 assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
1512 size_t Index = std::distance(LiveVec.begin(), ValIt);
1513 assert(Index < LiveVec.size() && "Bug in std::find?");
1514 return Index;
1515 };
1516 Module *M = StatepointToken->getModule();
1517
1518 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1519 // element type is i8 addrspace(1)*). We originally generated unique
1520 // declarations for each pointer type, but this proved problematic because
1521 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1522 // towards a single unified pointer type anyways, we can just cast everything
1523 // to an i8* of the right address space. A bitcast is added later to convert
1524 // gc_relocate to the actual value's type.
1525 auto getGCRelocateDecl = [&](Type *Ty) {
1527 auto AS = Ty->getScalarType()->getPointerAddressSpace();
1528 Type *NewTy = PointerType::get(M->getContext(), AS);
1529 if (auto *VT = dyn_cast<VectorType>(Ty))
1530 NewTy = FixedVectorType::get(NewTy,
1533 M, Intrinsic::experimental_gc_relocate, {NewTy});
1534 };
1535
1536 // Lazily populated map from input types to the canonicalized form mentioned
1537 // in the comment above. This should probably be cached somewhere more
1538 // broadly.
1539 DenseMap<Type *, Function *> TypeToDeclMap;
1540
1541 for (unsigned i = 0; i < LiveVariables.size(); i++) {
1542 // Generate the gc.relocate call and save the result
1543 Value *BaseIdx = Builder.getInt32(FindIndex(LiveVariables, BasePtrs[i]));
1544 Value *LiveIdx = Builder.getInt32(i);
1545
1546 Type *Ty = LiveVariables[i]->getType();
1547 auto [It, Inserted] = TypeToDeclMap.try_emplace(Ty);
1548 if (Inserted)
1549 It->second = getGCRelocateDecl(Ty);
1550 Function *GCRelocateDecl = It->second;
1551
1552 // only specify a debug name if we can give a useful one
1553 CallInst *Reloc = Builder.CreateCall(
1554 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1555 suffixed_name_or(LiveVariables[i], ".relocated", ""));
1556 // Trick CodeGen into thinking there are lots of free registers at this
1557 // fake call.
1558 Reloc->setCallingConv(CallingConv::Cold);
1559 }
1560}
1561
1562namespace {
1563
1564/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1565/// avoids having to worry about keeping around dangling pointers to Values.
1566class DeferredReplacement {
1567 AssertingVH<Instruction> Old;
1568 AssertingVH<Instruction> New;
1569 bool IsDeoptimize = false;
1570
1571 DeferredReplacement() = default;
1572
1573public:
1574 static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
1575 assert(Old != New && Old && New &&
1576 "Cannot RAUW equal values or to / from null!");
1577
1578 DeferredReplacement D;
1579 D.Old = Old;
1580 D.New = New;
1581 return D;
1582 }
1583
1584 static DeferredReplacement createDelete(Instruction *ToErase) {
1585 DeferredReplacement D;
1586 D.Old = ToErase;
1587 return D;
1588 }
1589
1590 static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
1591#ifndef NDEBUG
1592 auto *F = cast<CallInst>(Old)->getCalledFunction();
1593 assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
1594 "Only way to construct a deoptimize deferred replacement");
1595#endif
1596 DeferredReplacement D;
1597 D.Old = Old;
1598 D.IsDeoptimize = true;
1599 return D;
1600 }
1601
1602 /// Does the task represented by this instance.
1603 void doReplacement() {
1604 Instruction *OldI = Old;
1605 Instruction *NewI = New;
1606
1607 assert(OldI != NewI && "Disallowed at construction?!");
1608 assert((!IsDeoptimize || !New) &&
1609 "Deoptimize intrinsics are not replaced!");
1610
1611 Old = nullptr;
1612 New = nullptr;
1613
1614 if (NewI)
1615 OldI->replaceAllUsesWith(NewI);
1616
1617 if (IsDeoptimize) {
1618 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1619 // not necessarily be followed by the matching return.
1620 auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
1621 new UnreachableInst(RI->getContext(), RI->getIterator());
1622 RI->eraseFromParent();
1623 }
1624
1625 OldI->eraseFromParent();
1626 }
1627};
1628
1629} // end anonymous namespace
1630
1632 const char *DeoptLowering = "deopt-lowering";
1633 if (Call->hasFnAttr(DeoptLowering)) {
1634 // FIXME: Calls have a *really* confusing interface around attributes
1635 // with values.
1636 const AttributeList &CSAS = Call->getAttributes();
1637 if (CSAS.hasFnAttr(DeoptLowering))
1638 return CSAS.getFnAttr(DeoptLowering).getValueAsString();
1639 Function *F = Call->getCalledFunction();
1640 assert(F && F->hasFnAttribute(DeoptLowering));
1641 return F->getFnAttribute(DeoptLowering).getValueAsString();
1642 }
1643 return "live-through";
1644}
1645
1646static void
1648 const SmallVectorImpl<Value *> &BasePtrs,
1650 PartiallyConstructedSafepointRecord &Result,
1651 std::vector<DeferredReplacement> &Replacements,
1652 const PointerToBaseTy &PointerToBase,
1653 GCStrategy *GC) {
1654 assert(BasePtrs.size() == LiveVariables.size());
1655
1656 // Then go ahead and use the builder do actually do the inserts. We insert
1657 // immediately before the previous instruction under the assumption that all
1658 // arguments will be available here. We can't insert afterwards since we may
1659 // be replacing a terminator.
1660 IRBuilder<> Builder(Call);
1661
1664 uint32_t NumPatchBytes = 0;
1666
1667 SmallVector<Value *, 8> CallArgs(Call->args());
1668 std::optional<ArrayRef<Use>> DeoptArgs;
1669 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt))
1670 DeoptArgs = Bundle->Inputs;
1671 std::optional<ArrayRef<Use>> TransitionArgs;
1672 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_gc_transition)) {
1673 TransitionArgs = Bundle->Inputs;
1674 // TODO: This flag no longer serves a purpose and can be removed later
1676 }
1677
1678 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1679 // with a return value, we lower then as never returning calls to
1680 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1681 bool IsDeoptimize = false;
1682 bool IsMemIntrinsic = false;
1683
1685 parseStatepointDirectivesFromAttrs(Call->getAttributes());
1686 if (SD.NumPatchBytes)
1687 NumPatchBytes = *SD.NumPatchBytes;
1688 if (SD.StatepointID)
1689 StatepointID = *SD.StatepointID;
1690
1691 // Pass through the requested lowering if any. The default is live-through.
1692 StringRef DeoptLowering = getDeoptLowering(Call);
1693 if (DeoptLowering == "live-in")
1695 else {
1696 assert(DeoptLowering == "live-through" && "Unsupported value!");
1697 }
1698
1699 FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
1700 if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) {
1701 auto IID = F->getIntrinsicID();
1702 if (IID == Intrinsic::experimental_deoptimize) {
1703 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1704 // __llvm_deoptimize symbol. We want to resolve this now, since the
1705 // verifier does not allow taking the address of an intrinsic function.
1706
1707 SmallVector<Type *, 8> DomainTy;
1708 for (Value *Arg : CallArgs)
1709 DomainTy.push_back(Arg->getType());
1710 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1711 /* isVarArg = */ false);
1712
1713 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1714 // calls to @llvm.experimental.deoptimize with different argument types in
1715 // the same module. This is fine -- we assume the frontend knew what it
1716 // was doing when generating this kind of IR.
1717 CallTarget = F->getParent()
1718 ->getOrInsertFunction("__llvm_deoptimize", FTy);
1719
1720 IsDeoptimize = true;
1721 } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
1722 IID == Intrinsic::memmove_element_unordered_atomic) {
1723 IsMemIntrinsic = true;
1724
1725 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1726 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1727 // Specifically, these calls should be lowered to the
1728 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1729 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1730 // verifier does not allow taking the address of an intrinsic function.
1731 //
1732 // Moreover we need to shuffle the arguments for the call in order to
1733 // accommodate GC. The underlying source and destination objects might be
1734 // relocated during copy operation should the GC occur. To relocate the
1735 // derived source and destination pointers the implementation of the
1736 // intrinsic should know the corresponding base pointers.
1737 //
1738 // To make the base pointers available pass them explicitly as arguments:
1739 // memcpy(dest_derived, source_derived, ...) =>
1740 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1741 auto &Context = Call->getContext();
1742 auto &DL = Call->getDataLayout();
1743 auto GetBaseAndOffset = [&](Value *Derived) {
1744 Value *Base = nullptr;
1745 // Optimizations in unreachable code might substitute the real pointer
1746 // with undef, poison or null-derived constant. Return null base for
1747 // them to be consistent with the handling in the main algorithm in
1748 // findBaseDefiningValue.
1749 if (isa<Constant>(Derived))
1750 Base =
1751 ConstantPointerNull::get(cast<PointerType>(Derived->getType()));
1752 else {
1753 assert(PointerToBase.count(Derived));
1754 Base = PointerToBase.find(Derived)->second;
1755 }
1756 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
1757 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
1758 Value *Base_int = Builder.CreatePtrToInt(
1759 Base, Type::getIntNTy(Context, IntPtrSize));
1760 Value *Derived_int = Builder.CreatePtrToInt(
1761 Derived, Type::getIntNTy(Context, IntPtrSize));
1762 return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
1763 };
1764
1765 auto *Dest = CallArgs[0];
1766 Value *DestBase, *DestOffset;
1767 std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
1768
1769 auto *Source = CallArgs[1];
1770 Value *SourceBase, *SourceOffset;
1771 std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
1772
1773 auto *LengthInBytes = CallArgs[2];
1774 auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
1775
1776 CallArgs.clear();
1777 CallArgs.push_back(DestBase);
1778 CallArgs.push_back(DestOffset);
1779 CallArgs.push_back(SourceBase);
1780 CallArgs.push_back(SourceOffset);
1781 CallArgs.push_back(LengthInBytes);
1782
1783 SmallVector<Type *, 8> DomainTy;
1784 for (Value *Arg : CallArgs)
1785 DomainTy.push_back(Arg->getType());
1786 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1787 /* isVarArg = */ false);
1788
1789 auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
1790 uint64_t ElementSize = ElementSizeCI->getZExtValue();
1791 if (IID == Intrinsic::memcpy_element_unordered_atomic) {
1792 switch (ElementSize) {
1793 case 1:
1794 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1795 case 2:
1796 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1797 case 4:
1798 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1799 case 8:
1800 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1801 case 16:
1802 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1803 default:
1804 llvm_unreachable("unexpected element size!");
1805 }
1806 }
1807 assert(IID == Intrinsic::memmove_element_unordered_atomic);
1808 switch (ElementSize) {
1809 case 1:
1810 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1811 case 2:
1812 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1813 case 4:
1814 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1815 case 8:
1816 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1817 case 16:
1818 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1819 default:
1820 llvm_unreachable("unexpected element size!");
1821 }
1822 };
1823
1824 CallTarget =
1825 F->getParent()
1826 ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
1827 }
1828 }
1829
1830 // Create the statepoint given all the arguments
1831 GCStatepointInst *Token = nullptr;
1832 if (auto *CI = dyn_cast<CallInst>(Call)) {
1833 CallInst *SPCall = Builder.CreateGCStatepointCall(
1834 StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1835 TransitionArgs, DeoptArgs, GCLive, "safepoint_token");
1836
1837 SPCall->setTailCallKind(CI->getTailCallKind());
1838 SPCall->setCallingConv(CI->getCallingConv());
1839
1840 // Set up function attrs directly on statepoint and return attrs later for
1841 // gc_result intrinsic.
1842 SPCall->setAttributes(
1843 legalizeCallAttributes(CI, IsMemIntrinsic, SPCall->getAttributes()));
1844
1845 Token = cast<GCStatepointInst>(SPCall);
1846
1847 // Put the following gc_result and gc_relocate calls immediately after the
1848 // the old call (which we're about to delete)
1849 assert(CI->getNextNode() && "Not a terminator, must have next!");
1850 Builder.SetInsertPoint(CI->getNextNode());
1851 Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
1852 } else {
1853 auto *II = cast<InvokeInst>(Call);
1854
1855 // Insert the new invoke into the old block. We'll remove the old one in a
1856 // moment at which point this will become the new terminator for the
1857 // original block.
1858 InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
1859 StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(),
1860 II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
1861 GCLive, "statepoint_token");
1862
1863 SPInvoke->setCallingConv(II->getCallingConv());
1864
1865 // Set up function attrs directly on statepoint and return attrs later for
1866 // gc_result intrinsic.
1867 SPInvoke->setAttributes(
1868 legalizeCallAttributes(II, IsMemIntrinsic, SPInvoke->getAttributes()));
1869
1870 Token = cast<GCStatepointInst>(SPInvoke);
1871
1872 // Generate gc relocates in exceptional path
1873 BasicBlock *UnwindBlock = II->getUnwindDest();
1874 assert(!isa<PHINode>(UnwindBlock->begin()) &&
1875 UnwindBlock->getUniquePredecessor() &&
1876 "can't safely insert in this block!");
1877
1878 Builder.SetInsertPoint(UnwindBlock, UnwindBlock->getFirstInsertionPt());
1879 Builder.SetCurrentDebugLocation(II->getDebugLoc());
1880
1881 // Attach exceptional gc relocates to the landingpad.
1882 Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
1883 Result.UnwindToken = ExceptionalToken;
1884
1885 CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder, GC);
1886
1887 // Generate gc relocates and returns for normal block
1888 BasicBlock *NormalDest = II->getNormalDest();
1889 assert(!isa<PHINode>(NormalDest->begin()) &&
1890 NormalDest->getUniquePredecessor() &&
1891 "can't safely insert in this block!");
1892
1893 Builder.SetInsertPoint(NormalDest, NormalDest->getFirstInsertionPt());
1894
1895 // gc relocates will be generated later as if it were regular call
1896 // statepoint
1897 }
1898 assert(Token && "Should be set in one of the above branches!");
1899
1900 if (IsDeoptimize) {
1901 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1902 // transform the tail-call like structure to a call to a void function
1903 // followed by unreachable to get better codegen.
1904 Replacements.push_back(
1905 DeferredReplacement::createDeoptimizeReplacement(Call));
1906 } else {
1907 Token->setName("statepoint_token");
1908 if (!Call->getType()->isVoidTy() && !Call->use_empty()) {
1909 StringRef Name = Call->hasName() ? Call->getName() : "";
1910 CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
1911 GCResult->setAttributes(
1912 AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
1913 Call->getAttributes().getRetAttrs()));
1914
1915 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1916 // live set of some other safepoint, in which case that safepoint's
1917 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1918 // llvm::Instruction. Instead, we defer the replacement and deletion to
1919 // after the live sets have been made explicit in the IR, and we no longer
1920 // have raw pointers to worry about.
1921 Replacements.emplace_back(
1922 DeferredReplacement::createRAUW(Call, GCResult));
1923 } else {
1924 Replacements.emplace_back(DeferredReplacement::createDelete(Call));
1925 }
1926 }
1927
1928 Result.StatepointToken = Token;
1929
1930 // Second, create a gc.relocate for every live variable
1931 CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder, GC);
1932}
1933
1934// Replace an existing gc.statepoint with a new one and a set of gc.relocates
1935// which make the relocations happening at this safepoint explicit.
1936//
1937// WARNING: Does not do any fixup to adjust users of the original live
1938// values. That's the callers responsibility.
1939static void
1941 PartiallyConstructedSafepointRecord &Result,
1942 std::vector<DeferredReplacement> &Replacements,
1943 const PointerToBaseTy &PointerToBase, GCStrategy *GC) {
1944 const auto &LiveSet = Result.LiveSet;
1945
1946 // Convert to vector for efficient cross referencing.
1947 SmallVector<Value *, 64> BaseVec, LiveVec;
1948 LiveVec.reserve(LiveSet.size());
1949 BaseVec.reserve(LiveSet.size());
1950 for (Value *L : LiveSet) {
1951 LiveVec.push_back(L);
1952 assert(PointerToBase.count(L));
1953 Value *Base = PointerToBase.find(L)->second;
1954 BaseVec.push_back(Base);
1955 }
1956 assert(LiveVec.size() == BaseVec.size());
1957
1958 // Do the actual rewriting and delete the old statepoint
1959 makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
1960 PointerToBase, GC);
1961}
1962
1963// Helper function for the relocationViaAlloca.
1964//
1965// It receives iterator to the statepoint gc relocates and emits a store to the
1966// assigned location (via allocaMap) for the each one of them. It adds the
1967// visited values into the visitedLiveValues set, which we will later use them
1968// for validation checking.
1969static void
1972 DenseSet<Value *> &VisitedLiveValues) {
1973 for (User *U : GCRelocs) {
1975 if (!Relocate)
1976 continue;
1977
1978 Value *OriginalValue = Relocate->getDerivedPtr();
1979 assert(AllocaMap.count(OriginalValue));
1980 Value *Alloca = AllocaMap[OriginalValue];
1981
1982 // Emit store into the related alloca.
1983 assert(Relocate->getNextNode() &&
1984 "Should always have one since it's not a terminator");
1985 new StoreInst(Relocate, Alloca, std::next(Relocate->getIterator()));
1986
1987#ifndef NDEBUG
1988 VisitedLiveValues.insert(OriginalValue);
1989#endif
1990 }
1991}
1992
1993// Helper function for the "relocationViaAlloca". Similar to the
1994// "insertRelocationStores" but works for rematerialized values.
1996 const RematerializedValueMapTy &RematerializedValues,
1998 DenseSet<Value *> &VisitedLiveValues) {
1999 for (auto RematerializedValuePair: RematerializedValues) {
2000 Instruction *RematerializedValue = RematerializedValuePair.first;
2001 Value *OriginalValue = RematerializedValuePair.second;
2002
2003 assert(AllocaMap.count(OriginalValue) &&
2004 "Can not find alloca for rematerialized value");
2005 Value *Alloca = AllocaMap[OriginalValue];
2006
2007 new StoreInst(RematerializedValue, Alloca,
2008 std::next(RematerializedValue->getIterator()));
2009
2010#ifndef NDEBUG
2011 VisitedLiveValues.insert(OriginalValue);
2012#endif
2013 }
2014}
2015
2016/// Do all the relocation update via allocas and mem2reg
2020#ifndef NDEBUG
2021 // record initial number of (static) allocas; we'll check we have the same
2022 // number when we get done.
2023 int InitialAllocaNum = 0;
2024 for (Instruction &I : F.getEntryBlock())
2025 if (isa<AllocaInst>(I))
2026 InitialAllocaNum++;
2027#endif
2028
2029 // TODO-PERF: change data structures, reserve
2031 SmallVector<AllocaInst *, 200> PromotableAllocas;
2032 // Used later to chack that we have enough allocas to store all values
2033 std::size_t NumRematerializedValues = 0;
2034 PromotableAllocas.reserve(Live.size());
2035
2036 // Emit alloca for "LiveValue" and record it in "allocaMap" and
2037 // "PromotableAllocas"
2038 const DataLayout &DL = F.getDataLayout();
2039 auto emitAllocaFor = [&](Value *LiveValue) {
2040 AllocaInst *Alloca =
2041 new AllocaInst(LiveValue->getType(), DL.getAllocaAddrSpace(), "",
2042 F.getEntryBlock().getFirstNonPHIIt());
2043 AllocaMap[LiveValue] = Alloca;
2044 PromotableAllocas.push_back(Alloca);
2045 };
2046
2047 // Emit alloca for each live gc pointer
2048 for (Value *V : Live)
2049 emitAllocaFor(V);
2050
2051 // Emit allocas for rematerialized values
2052 for (const auto &Info : Records)
2053 for (auto RematerializedValuePair : Info.RematerializedValues) {
2054 Value *OriginalValue = RematerializedValuePair.second;
2055 if (AllocaMap.contains(OriginalValue))
2056 continue;
2057
2058 emitAllocaFor(OriginalValue);
2059 ++NumRematerializedValues;
2060 }
2061
2062 // The next two loops are part of the same conceptual operation. We need to
2063 // insert a store to the alloca after the original def and at each
2064 // redefinition. We need to insert a load before each use. These are split
2065 // into distinct loops for performance reasons.
2066
2067 // Update gc pointer after each statepoint: either store a relocated value or
2068 // null (if no relocated value was found for this gc pointer and it is not a
2069 // gc_result). This must happen before we update the statepoint with load of
2070 // alloca otherwise we lose the link between statepoint and old def.
2071 for (const auto &Info : Records) {
2072 Value *Statepoint = Info.StatepointToken;
2073
2074 // This will be used for consistency check
2075 DenseSet<Value *> VisitedLiveValues;
2076
2077 // Insert stores for normal statepoint gc relocates
2078 insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
2079
2080 // In case if it was invoke statepoint
2081 // we will insert stores for exceptional path gc relocates.
2082 if (isa<InvokeInst>(Statepoint)) {
2083 insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
2084 VisitedLiveValues);
2085 }
2086
2087 // Do similar thing with rematerialized values
2088 insertRematerializationStores(Info.RematerializedValues, AllocaMap,
2089 VisitedLiveValues);
2090
2091 if (ClobberNonLive) {
2092 // As a debugging aid, pretend that an unrelocated pointer becomes null at
2093 // the gc.statepoint. This will turn some subtle GC problems into
2094 // slightly easier to debug SEGVs. Note that on large IR files with
2095 // lots of gc.statepoints this is extremely costly both memory and time
2096 // wise.
2098 for (auto Pair : AllocaMap) {
2099 Value *Def = Pair.first;
2100 AllocaInst *Alloca = Pair.second;
2101
2102 // This value was relocated
2103 if (VisitedLiveValues.count(Def)) {
2104 continue;
2105 }
2106 // Track Def's type since the alloca was created with that type.
2107 ToClobber.push_back({Def->getType(), Alloca});
2108 }
2109
2110 auto InsertClobbersAt = [&](BasicBlock::iterator IP) {
2111 for (auto &[Ty, AI] : ToClobber) {
2112 Constant *CPN;
2113 if (Ty->isVectorTy())
2115 else
2117 new StoreInst(CPN, AI, IP);
2118 }
2119 };
2120
2121 // Insert the clobbering stores. These may get intermixed with the
2122 // gc.results and gc.relocates, but that's fine.
2123 if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
2124 InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
2125 InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
2126 } else {
2127 InsertClobbersAt(
2128 std::next(cast<Instruction>(Statepoint)->getIterator()));
2129 }
2130 }
2131 }
2132
2133 // Update use with load allocas and add store for gc_relocated.
2134 for (auto Pair : AllocaMap) {
2135 Value *Def = Pair.first;
2136 AllocaInst *Alloca = Pair.second;
2137
2138 // We pre-record the uses of allocas so that we dont have to worry about
2139 // later update that changes the user information..
2140
2142 // PERF: trade a linear scan for repeated reallocation
2143 Uses.reserve(Def->getNumUses());
2144 for (User *U : Def->users()) {
2145 if (!isa<ConstantExpr>(U)) {
2146 // If the def has a ConstantExpr use, then the def is either a
2147 // ConstantExpr use itself or null. In either case
2148 // (recursively in the first, directly in the second), the oop
2149 // it is ultimately dependent on is null and this particular
2150 // use does not need to be fixed up.
2151 Uses.push_back(cast<Instruction>(U));
2152 }
2153 }
2154
2156 auto Last = llvm::unique(Uses);
2157 Uses.erase(Last, Uses.end());
2158
2159 for (Instruction *Use : Uses) {
2160 if (isa<PHINode>(Use)) {
2161 PHINode *Phi = cast<PHINode>(Use);
2162 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
2163 if (Def == Phi->getIncomingValue(i)) {
2164 // Use Def's type since the alloca was created with that type.
2165 LoadInst *Load = new LoadInst(
2166 Def->getType(), Alloca, "",
2167 Phi->getIncomingBlock(i)->getTerminator()->getIterator());
2168 Phi->setIncomingValue(i, Load);
2169 }
2170 }
2171 } else {
2172 // Use Def's type since the alloca was created with that type.
2173 LoadInst *Load =
2174 new LoadInst(Def->getType(), Alloca, "", Use->getIterator());
2175 Use->replaceUsesOfWith(Def, Load);
2176 }
2177 }
2178
2179 // Emit store for the initial gc value. Store must be inserted after load,
2180 // otherwise store will be in alloca's use list and an extra load will be
2181 // inserted before it.
2182 StoreInst *Store = new StoreInst(Def, Alloca, /*volatile*/ false,
2183 DL.getABITypeAlign(Def->getType()));
2184 if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
2185 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
2186 // InvokeInst is a terminator so the store need to be inserted into its
2187 // normal destination block.
2188 BasicBlock *NormalDest = Invoke->getNormalDest();
2189 Store->insertBefore(NormalDest->getFirstNonPHIIt());
2190 } else {
2191 assert(!Inst->isTerminator() &&
2192 "The only terminator that can produce a value is "
2193 "InvokeInst which is handled above.");
2194 Store->insertAfter(Inst->getIterator());
2195 }
2196 } else {
2197 assert(isa<Argument>(Def));
2198 Store->insertAfter(cast<Instruction>(Alloca)->getIterator());
2199 }
2200 }
2201
2202 assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
2203 "we must have the same allocas with lives");
2204 (void) NumRematerializedValues;
2205 if (!PromotableAllocas.empty()) {
2206 // Apply mem2reg to promote alloca to SSA
2207 PromoteMemToReg(PromotableAllocas, DT);
2208 }
2209
2210#ifndef NDEBUG
2211 for (auto &I : F.getEntryBlock())
2212 if (isa<AllocaInst>(I))
2213 InitialAllocaNum--;
2214 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
2215#endif
2216}
2217
2218/// Insert holders so that each Value is obviously live through the entire
2219/// lifetime of the call.
2221 SmallVectorImpl<CallInst *> &Holders) {
2222 if (Values.empty())
2223 // No values to hold live, might as well not insert the empty holder
2224 return;
2225
2226 Module *M = Call->getModule();
2227 // Use a dummy vararg function to actually hold the values live
2228 FunctionCallee Func = M->getOrInsertFunction(
2229 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true));
2230 if (isa<CallInst>(Call)) {
2231 // For call safepoints insert dummy calls right after safepoint
2232 Holders.push_back(
2233 CallInst::Create(Func, Values, "", std::next(Call->getIterator())));
2234 return;
2235 }
2236 // For invoke safepooints insert dummy calls both in normal and
2237 // exceptional destination blocks
2238 auto *II = cast<InvokeInst>(Call);
2240 Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
2242 Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
2243}
2244
2248 GCStrategy *GC) {
2249 GCPtrLivenessData OriginalLivenessData;
2250 computeLiveInValues(DT, F, OriginalLivenessData, GC);
2251 for (size_t i = 0; i < records.size(); i++) {
2252 struct PartiallyConstructedSafepointRecord &info = records[i];
2253 analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info, GC);
2254 }
2255}
2256
2257// Helper function for the "rematerializeLiveValues". It walks use chain
2258// starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2259// the base or a value it cannot process. Only "simple" values are processed
2260// (currently it is GEP's and casts). The returned root is examined by the
2261// callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2262// with all visited values.
2264 SmallVectorImpl<Instruction*> &ChainToBase,
2265 Value *CurrentValue) {
2266 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
2267 ChainToBase.push_back(GEP);
2268 return findRematerializableChainToBasePointer(ChainToBase,
2269 GEP->getPointerOperand());
2270 }
2271
2272 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
2273 if (!CI->isNoopCast(CI->getDataLayout()))
2274 return CI;
2275
2276 ChainToBase.push_back(CI);
2277 return findRematerializableChainToBasePointer(ChainToBase,
2278 CI->getOperand(0));
2279 }
2280
2281 // We have reached the root of the chain, which is either equal to the base or
2282 // is the first unsupported value along the use chain.
2283 return CurrentValue;
2284}
2285
2286// Helper function for the "rematerializeLiveValues". Compute cost of the use
2287// chain we are going to rematerialize.
2288static InstructionCost
2291 InstructionCost Cost = 0;
2292
2293 for (Instruction *Instr : Chain) {
2294 if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
2295 assert(CI->isNoopCast(CI->getDataLayout()) &&
2296 "non noop cast is found during rematerialization");
2297
2298 Type *SrcTy = CI->getOperand(0)->getType();
2299 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
2302
2303 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
2304 // Cost of the address calculation
2305 Cost += TTI.getAddressComputationCost(
2306 GEP->getType(), nullptr, nullptr,
2308
2309 // And cost of the GEP itself
2310 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2311 // allowed for the external usage)
2312 if (!GEP->hasAllConstantIndices())
2313 Cost += 2;
2314
2315 } else {
2316 llvm_unreachable("unsupported instruction type during rematerialization");
2317 }
2318 }
2319
2320 return Cost;
2321}
2322
2323static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
2324 unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
2325 if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
2326 OrigRootPhi.getParent() != AlternateRootPhi.getParent())
2327 return false;
2328 // Map of incoming values and their corresponding basic blocks of
2329 // OrigRootPhi.
2330 SmallDenseMap<Value *, BasicBlock *, 8> CurrentIncomingValues;
2331 for (unsigned i = 0; i < PhiNum; i++)
2332 CurrentIncomingValues[OrigRootPhi.getIncomingValue(i)] =
2333 OrigRootPhi.getIncomingBlock(i);
2334
2335 // Both current and base PHIs should have same incoming values and
2336 // the same basic blocks corresponding to the incoming values.
2337 for (unsigned i = 0; i < PhiNum; i++) {
2338 auto CIVI =
2339 CurrentIncomingValues.find(AlternateRootPhi.getIncomingValue(i));
2340 if (CIVI == CurrentIncomingValues.end())
2341 return false;
2342 BasicBlock *CurrentIncomingBB = CIVI->second;
2343 if (CurrentIncomingBB != AlternateRootPhi.getIncomingBlock(i))
2344 return false;
2345 }
2346 return true;
2347}
2348
2349// Find derived pointers that can be recomputed cheap enough and fill
2350// RematerizationCandidates with such candidates.
2351static void
2352findRematerializationCandidates(PointerToBaseTy PointerToBase,
2353 RematCandTy &RematerizationCandidates,
2355 const unsigned int ChainLengthThreshold = 10;
2356
2357 for (auto P2B : PointerToBase) {
2358 auto *Derived = P2B.first;
2359 auto *Base = P2B.second;
2360 // Consider only derived pointers.
2361 if (Derived == Base)
2362 continue;
2363
2364 // For each live pointer find its defining chain.
2366 Value *RootOfChain =
2367 findRematerializableChainToBasePointer(ChainToBase, Derived);
2368
2369 // Nothing to do, or chain is too long
2370 if ( ChainToBase.size() == 0 ||
2371 ChainToBase.size() > ChainLengthThreshold)
2372 continue;
2373
2374 // Handle the scenario where the RootOfChain is not equal to the
2375 // Base Value, but they are essentially the same phi values.
2376 if (Value *BaseVal = PointerToBase[Derived]; RootOfChain != BaseVal) {
2377 PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
2378 PHINode *AlternateRootPhi = dyn_cast<PHINode>(BaseVal);
2379 if (!OrigRootPhi || !AlternateRootPhi)
2380 continue;
2381 // PHI nodes that have the same incoming values, and belonging to the same
2382 // basic blocks are essentially the same SSA value. When the original phi
2383 // has incoming values with different base pointers, the original phi is
2384 // marked as conflict, and an additional `AlternateRootPhi` with the same
2385 // incoming values get generated by the findBasePointer function. We need
2386 // to identify the newly generated AlternateRootPhi (.base version of phi)
2387 // and RootOfChain (the original phi node itself) are the same, so that we
2388 // can rematerialize the gep and casts. This is a workaround for the
2389 // deficiency in the findBasePointer algorithm.
2390 if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
2391 continue;
2392 }
2393 // Compute cost of this chain.
2394 InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
2395 // TODO: We can also account for cases when we will be able to remove some
2396 // of the rematerialized values by later optimization passes. I.e if
2397 // we rematerialized several intersecting chains. Or if original values
2398 // don't have any uses besides this statepoint.
2399
2400 // Ok, there is a candidate.
2401 RematerizlizationCandidateRecord Record;
2402 Record.ChainToBase = ChainToBase;
2403 Record.RootOfChain = RootOfChain;
2404 Record.Cost = Cost;
2405 RematerizationCandidates.insert({ Derived, Record });
2406 }
2407}
2408
2409// Try to rematerialize derived pointers immediately before their uses
2410// (instead of rematerializing after every statepoint it is live through).
2411// This can be beneficial when derived pointer is live across many
2412// statepoints, but uses are rare.
2414 RematCandTy &RematerizationCandidates,
2416 PointerToBaseTy &PointerToBase) {
2417 if (!RematDerivedAtUses)
2418 return;
2419
2420 SmallVector<Instruction *, 32> LiveValuesToBeDeleted;
2421
2422 LLVM_DEBUG(dbgs() << "Rematerialize derived pointers at uses, "
2423 << "Num statepoints: " << Records.size() << '\n');
2424
2425 for (auto &It : RematerizationCandidates) {
2426 Instruction *Cand = cast<Instruction>(It.first);
2427 auto &Record = It.second;
2428
2430 continue;
2431
2432 if (Cand->user_empty())
2433 continue;
2434
2435 if (Cand->hasOneUse())
2436 if (auto *U = dyn_cast<Instruction>(Cand->getUniqueUndroppableUser()))
2437 if (U->getParent() == Cand->getParent())
2438 continue;
2439
2440 // Rematerialization before PHI nodes is not implemented.
2441 if (llvm::any_of(Cand->users(),
2442 [](const auto *U) { return isa<PHINode>(U); }))
2443 continue;
2444
2445 LLVM_DEBUG(dbgs() << "Trying cand " << *Cand << " ... ");
2446
2447 // Count of rematerialization instructions we introduce is equal to number
2448 // of candidate uses.
2449 // Count of rematerialization instructions we eliminate is equal to number
2450 // of statepoints it is live through.
2451 // Consider transformation profitable if latter is greater than former
2452 // (in other words, we create less than eliminate).
2453 unsigned NumLiveStatepoints = llvm::count_if(
2454 Records, [Cand](const auto &R) { return R.LiveSet.contains(Cand); });
2455 unsigned NumUses = Cand->getNumUses();
2456
2457 LLVM_DEBUG(dbgs() << "Num uses: " << NumUses << " Num live statepoints: "
2458 << NumLiveStatepoints << " ");
2459
2460 if (NumLiveStatepoints < NumUses) {
2461 LLVM_DEBUG(dbgs() << "not profitable\n");
2462 continue;
2463 }
2464
2465 // If rematerialization is 'free', then favor rematerialization at
2466 // uses as it generally shortens live ranges.
2467 // TODO: Short (size ==1) chains only?
2468 if (NumLiveStatepoints == NumUses && Record.Cost > 0) {
2469 LLVM_DEBUG(dbgs() << "not profitable\n");
2470 continue;
2471 }
2472
2473 LLVM_DEBUG(dbgs() << "looks profitable\n");
2474
2475 // ChainToBase may contain another remat candidate (as a sub chain) which
2476 // has been rewritten by now. Need to recollect chain to have up to date
2477 // value.
2478 // TODO: sort records in findRematerializationCandidates() in
2479 // decreasing chain size order?
2480 if (Record.ChainToBase.size() > 1) {
2481 Record.ChainToBase.clear();
2483 }
2484
2485 // Current rematerialization algorithm is very simple: we rematerialize
2486 // immediately before EVERY use, even if there are several uses in same
2487 // block or if use is local to Cand Def. The reason is that this allows
2488 // us to avoid recomputing liveness without complicated analysis:
2489 // - If we did not eliminate all uses of original Candidate, we do not
2490 // know exaclty in what BBs it is still live.
2491 // - If we rematerialize once per BB, we need to find proper insertion
2492 // place (first use in block, but after Def) and analyze if there is
2493 // statepoint between uses in the block.
2494 while (!Cand->user_empty()) {
2495 Instruction *UserI = cast<Instruction>(*Cand->user_begin());
2496 Instruction *RematChain =
2497 rematerializeChain(Record.ChainToBase, UserI->getIterator(),
2498 Record.RootOfChain, PointerToBase[Cand]);
2499 UserI->replaceUsesOfWith(Cand, RematChain);
2500 PointerToBase[RematChain] = PointerToBase[Cand];
2501 }
2502 LiveValuesToBeDeleted.push_back(Cand);
2503 }
2504
2505 LLVM_DEBUG(dbgs() << "Rematerialized " << LiveValuesToBeDeleted.size()
2506 << " derived pointers\n");
2507 for (auto *Cand : LiveValuesToBeDeleted) {
2508 assert(Cand->use_empty() && "Unexpected user remain");
2509 RematerizationCandidates.erase(Cand);
2510 for (auto &R : Records) {
2511 assert(!R.LiveSet.contains(Cand) ||
2512 R.LiveSet.contains(PointerToBase[Cand]));
2513 R.LiveSet.remove(Cand);
2514 }
2515 }
2516
2517 // Recollect not rematerialized chains - we might have rewritten
2518 // their sub-chains.
2519 if (!LiveValuesToBeDeleted.empty()) {
2520 for (auto &P : RematerizationCandidates) {
2521 auto &R = P.second;
2522 if (R.ChainToBase.size() > 1) {
2523 R.ChainToBase.clear();
2524 findRematerializableChainToBasePointer(R.ChainToBase, P.first);
2525 }
2526 }
2527 }
2528}
2529
2530// From the statepoint live set pick values that are cheaper to recompute then
2531// to relocate. Remove this values from the live set, rematerialize them after
2532// statepoint and record them in "Info" structure. Note that similar to
2533// relocated values we don't do any user adjustments here.
2535 PartiallyConstructedSafepointRecord &Info,
2536 PointerToBaseTy &PointerToBase,
2537 RematCandTy &RematerizationCandidates,
2539 // Record values we are going to delete from this statepoint live set.
2540 // We can not di this in following loop due to iterator invalidation.
2541 SmallVector<Value *, 32> LiveValuesToBeDeleted;
2542
2543 for (Value *LiveValue : Info.LiveSet) {
2544 auto It = RematerizationCandidates.find(LiveValue);
2545 if (It == RematerizationCandidates.end())
2546 continue;
2547
2548 RematerizlizationCandidateRecord &Record = It->second;
2549
2550 InstructionCost Cost = Record.Cost;
2551 // For invokes we need to rematerialize each chain twice - for normal and
2552 // for unwind basic blocks. Model this by multiplying cost by two.
2553 if (isa<InvokeInst>(Call))
2554 Cost *= 2;
2555
2556 // If it's too expensive - skip it.
2557 if (Cost >= RematerializationThreshold)
2558 continue;
2559
2560 // Remove value from the live set
2561 LiveValuesToBeDeleted.push_back(LiveValue);
2562
2563 // Clone instructions and record them inside "Info" structure.
2564
2565 // Different cases for calls and invokes. For invokes we need to clone
2566 // instructions both on normal and unwind path.
2567 if (isa<CallInst>(Call)) {
2568 Instruction *InsertBefore = Call->getNextNode();
2569 assert(InsertBefore);
2570 Instruction *RematerializedValue =
2571 rematerializeChain(Record.ChainToBase, InsertBefore->getIterator(),
2572 Record.RootOfChain, PointerToBase[LiveValue]);
2573 Info.RematerializedValues[RematerializedValue] = LiveValue;
2574 } else {
2575 auto *Invoke = cast<InvokeInst>(Call);
2576
2577 BasicBlock::iterator NormalInsertBefore =
2578 Invoke->getNormalDest()->getFirstInsertionPt();
2579 BasicBlock::iterator UnwindInsertBefore =
2580 Invoke->getUnwindDest()->getFirstInsertionPt();
2581
2582 Instruction *NormalRematerializedValue =
2583 rematerializeChain(Record.ChainToBase, NormalInsertBefore,
2584 Record.RootOfChain, PointerToBase[LiveValue]);
2585 Instruction *UnwindRematerializedValue =
2586 rematerializeChain(Record.ChainToBase, UnwindInsertBefore,
2587 Record.RootOfChain, PointerToBase[LiveValue]);
2588
2589 Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2590 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2591 }
2592 }
2593
2594 // Remove rematerialized values from the live set.
2595 for (auto *LiveValue: LiveValuesToBeDeleted) {
2596 Info.LiveSet.remove(LiveValue);
2597 }
2598}
2599
2601 SmallVectorImpl<CallInst *> &Intrinsics,
2602 DefiningValueMapTy &DVCache,
2603 IsKnownBaseMapTy &KnownBases) {
2604 auto &Context = F.getContext();
2605 auto &DL = F.getDataLayout();
2606 bool Changed = false;
2607
2608 for (auto *Callsite : Intrinsics)
2609 switch (Callsite->getIntrinsicID()) {
2610 case Intrinsic::experimental_gc_get_pointer_base: {
2611 Changed = true;
2612 Value *Base =
2613 findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
2614 assert(!DVCache.count(Callsite));
2615 Callsite->replaceAllUsesWith(Base);
2616 if (!Base->hasName())
2617 Base->takeName(Callsite);
2618 Callsite->eraseFromParent();
2619 break;
2620 }
2621 case Intrinsic::experimental_gc_get_pointer_offset: {
2622 Changed = true;
2623 Value *Derived = Callsite->getOperand(0);
2624 Value *Base = findBasePointer(Derived, DVCache, KnownBases);
2625 assert(!DVCache.count(Callsite));
2626 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
2627 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
2628 IRBuilder<> Builder(Callsite);
2629 Value *BaseInt =
2630 Builder.CreatePtrToInt(Base, Type::getIntNTy(Context, IntPtrSize),
2631 suffixed_name_or(Base, ".int", ""));
2632 Value *DerivedInt =
2633 Builder.CreatePtrToInt(Derived, Type::getIntNTy(Context, IntPtrSize),
2634 suffixed_name_or(Derived, ".int", ""));
2635 Value *Offset = Builder.CreateSub(DerivedInt, BaseInt);
2636 Callsite->replaceAllUsesWith(Offset);
2637 Offset->takeName(Callsite);
2638 Callsite->eraseFromParent();
2639 break;
2640 }
2641 default:
2642 llvm_unreachable("Unknown intrinsic");
2643 }
2644
2645 return Changed;
2646}
2647
2651 DefiningValueMapTy &DVCache,
2652 IsKnownBaseMapTy &KnownBases) {
2653 std::unique_ptr<GCStrategy> GC = findGCStrategy(F);
2654
2655#ifndef NDEBUG
2656 // Validate the input
2657 std::set<CallBase *> Uniqued;
2658 Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
2659 assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
2660
2661 for (CallBase *Call : ToUpdate)
2662 assert(Call->getFunction() == &F);
2663#endif
2664
2665 // When inserting gc.relocates for invokes, we need to be able to insert at
2666 // the top of the successor blocks. See the comment on
2667 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2668 // may restructure the CFG.
2669 for (CallBase *Call : ToUpdate) {
2670 auto *II = dyn_cast<InvokeInst>(Call);
2671 if (!II)
2672 continue;
2673 normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
2674 normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
2675 }
2676
2677 // A list of dummy calls added to the IR to keep various values obviously
2678 // live in the IR. We'll remove all of these when done.
2680
2681 // Insert a dummy call with all of the deopt operands we'll need for the
2682 // actual safepoint insertion as arguments. This ensures reference operands
2683 // in the deopt argument list are considered live through the safepoint (and
2684 // thus makes sure they get relocated.)
2685 for (CallBase *Call : ToUpdate) {
2686 SmallVector<Value *, 64> DeoptValues;
2687
2688 for (Value *Arg : GetDeoptBundleOperands(Call)) {
2689 assert(!isUnhandledGCPointerType(Arg->getType(), GC.get()) &&
2690 "support for FCA unimplemented");
2691 if (isHandledGCPointerType(Arg->getType(), GC.get()))
2692 DeoptValues.push_back(Arg);
2693 }
2694
2695 insertUseHolderAfter(Call, DeoptValues, Holders);
2696 }
2697
2699
2700 // A) Identify all gc pointers which are statically live at the given call
2701 // site.
2702 findLiveReferences(F, DT, ToUpdate, Records, GC.get());
2703
2704 /// Global mapping from live pointers to a base-defining-value.
2705 PointerToBaseTy PointerToBase;
2706
2707 // B) Find the base pointers for each live pointer
2708 for (size_t i = 0; i < Records.size(); i++) {
2709 PartiallyConstructedSafepointRecord &info = Records[i];
2710 findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases);
2711 }
2712 if (PrintBasePointers) {
2713 errs() << "Base Pairs (w/o Relocation):\n";
2714 for (auto &Pair : PointerToBase) {
2715 errs() << " derived ";
2716 Pair.first->printAsOperand(errs(), false);
2717 errs() << " base ";
2718 Pair.second->printAsOperand(errs(), false);
2719 errs() << "\n";
2720 ;
2721 }
2722 }
2723
2724 // The base phi insertion logic (for any safepoint) may have inserted new
2725 // instructions which are now live at some safepoint. The simplest such
2726 // example is:
2727 // loop:
2728 // phi a <-- will be a new base_phi here
2729 // safepoint 1 <-- that needs to be live here
2730 // gep a + 1
2731 // safepoint 2
2732 // br loop
2733 // We insert some dummy calls after each safepoint to definitely hold live
2734 // the base pointers which were identified for that safepoint. We'll then
2735 // ask liveness for _every_ base inserted to see what is now live. Then we
2736 // remove the dummy calls.
2737 Holders.reserve(Holders.size() + Records.size());
2738 for (size_t i = 0; i < Records.size(); i++) {
2739 PartiallyConstructedSafepointRecord &Info = Records[i];
2740
2742 for (auto *Derived : Info.LiveSet) {
2743 assert(PointerToBase.count(Derived) && "Missed base for derived pointer");
2744 Bases.push_back(PointerToBase[Derived]);
2745 }
2746
2747 insertUseHolderAfter(ToUpdate[i], Bases, Holders);
2748 }
2749
2750 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2751 // need to rerun liveness. We may *also* have inserted new defs, but that's
2752 // not the key issue.
2753 recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase, GC.get());
2754
2755 if (PrintBasePointers) {
2756 errs() << "Base Pairs: (w/Relocation)\n";
2757 for (auto Pair : PointerToBase) {
2758 errs() << " derived ";
2759 Pair.first->printAsOperand(errs(), false);
2760 errs() << " base ";
2761 Pair.second->printAsOperand(errs(), false);
2762 errs() << "\n";
2763 }
2764 }
2765
2766 // It is possible that non-constant live variables have a constant base. For
2767 // example, a GEP with a variable offset from a global. In this case we can
2768 // remove it from the liveset. We already don't add constants to the liveset
2769 // because we assume they won't move at runtime and the GC doesn't need to be
2770 // informed about them. The same reasoning applies if the base is constant.
2771 // Note that the relocation placement code relies on this filtering for
2772 // correctness as it expects the base to be in the liveset, which isn't true
2773 // if the base is constant.
2774 for (auto &Info : Records) {
2775 Info.LiveSet.remove_if([&](Value *LiveV) {
2776 assert(PointerToBase.count(LiveV) && "Missed base for derived pointer");
2777 return isa<Constant>(PointerToBase[LiveV]);
2778 });
2779 }
2780
2781 for (CallInst *CI : Holders)
2782 CI->eraseFromParent();
2783
2784 Holders.clear();
2785
2786 // Compute the cost of possible re-materialization of derived pointers.
2787 RematCandTy RematerizationCandidates;
2788 findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI);
2789
2790 // In order to reduce live set of statepoint we might choose to rematerialize
2791 // some values instead of relocating them. This is purely an optimization and
2792 // does not influence correctness.
2793 // First try rematerialization at uses, then after statepoints.
2794 rematerializeLiveValuesAtUses(RematerizationCandidates, Records,
2795 PointerToBase);
2796 for (size_t i = 0; i < Records.size(); i++)
2797 rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase,
2798 RematerizationCandidates, TTI);
2799
2800 // We need this to safely RAUW and delete call or invoke return values that
2801 // may themselves be live over a statepoint. For details, please see usage in
2802 // makeStatepointExplicitImpl.
2803 std::vector<DeferredReplacement> Replacements;
2804
2805 // Now run through and replace the existing statepoints with new ones with
2806 // the live variables listed. We do not yet update uses of the values being
2807 // relocated. We have references to live variables that need to
2808 // survive to the last iteration of this loop. (By construction, the
2809 // previous statepoint can not be a live variable, thus we can and remove
2810 // the old statepoint calls as we go.)
2811 for (size_t i = 0; i < Records.size(); i++)
2812 makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
2813 PointerToBase, GC.get());
2814
2815 ToUpdate.clear(); // prevent accident use of invalid calls.
2816
2817 for (auto &PR : Replacements)
2818 PR.doReplacement();
2819
2820 Replacements.clear();
2821
2822 for (auto &Info : Records) {
2823 // These live sets may contain state Value pointers, since we replaced calls
2824 // with operand bundles with calls wrapped in gc.statepoint, and some of
2825 // those calls may have been def'ing live gc pointers. Clear these out to
2826 // avoid accidentally using them.
2827 //
2828 // TODO: We should create a separate data structure that does not contain
2829 // these live sets, and migrate to using that data structure from this point
2830 // onward.
2831 Info.LiveSet.clear();
2832 }
2833 PointerToBase.clear();
2834
2835 // Do all the fixups of the original live variables to their relocated selves.
2836 // A SmallSetVector is used to collect live variables while retaining the
2837 // order in which we add them, which is important for reproducible tests.
2839 for (const PartiallyConstructedSafepointRecord &Info : Records) {
2840 // We can't simply save the live set from the original insertion. One of
2841 // the live values might be the result of a call which needs a safepoint.
2842 // That Value* no longer exists and we need to use the new gc_result.
2843 // Thankfully, the live set is embedded in the statepoint (and updated), so
2844 // we just grab that.
2845 Live.insert_range(Info.StatepointToken->gc_live());
2846#ifndef NDEBUG
2847 // Do some basic validation checking on our liveness results before
2848 // performing relocation. Relocation can and will turn mistakes in liveness
2849 // results into non-sensical code which is must harder to debug.
2850 // TODO: It would be nice to test consistency as well
2851 assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
2852 "statepoint must be reachable or liveness is meaningless");
2853 for (Value *V : Info.StatepointToken->gc_live()) {
2854 if (!isa<Instruction>(V))
2855 // Non-instruction values trivial dominate all possible uses
2856 continue;
2857 auto *LiveInst = cast<Instruction>(V);
2858 assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2859 "unreachable values should never be live");
2860 assert(DT.dominates(LiveInst, Info.StatepointToken) &&
2861 "basic SSA liveness expectation violated by liveness analysis");
2862 }
2863#endif
2864 }
2865
2866#ifndef NDEBUG
2867 // Validation check
2868 for (auto *Ptr : Live)
2869 assert(isHandledGCPointerType(Ptr->getType(), GC.get()) &&
2870 "must be a gc pointer type");
2871#endif
2872
2873 relocationViaAlloca(F, DT, Live.getArrayRef(), Records);
2874 return !Records.empty();
2875}
2876
2877// List of all parameter and return attributes which must be stripped when
2878// lowering from the abstract machine model. Note that we list attributes
2879// here which aren't valid as return attributes, that is okay.
2881 AttributeMask R;
2882 R.addAttribute(Attribute::Dereferenceable);
2883 R.addAttribute(Attribute::DereferenceableOrNull);
2884 R.addAttribute(Attribute::ReadNone);
2885 R.addAttribute(Attribute::ReadOnly);
2886 R.addAttribute(Attribute::WriteOnly);
2887 R.addAttribute(Attribute::NoAlias);
2888 R.addAttribute(Attribute::NoFree);
2889 return R;
2890}
2891
2893 LLVMContext &Ctx = F.getContext();
2894
2895 // Intrinsics are very delicate. Lowering sometimes depends the presence
2896 // of certain attributes for correctness, but we may have also inferred
2897 // additional ones in the abstract machine model which need stripped. This
2898 // assumes that the attributes defined in Intrinsic.td are conservatively
2899 // correct for both physical and abstract model.
2900 if (Intrinsic::ID id = F.getIntrinsicID()) {
2901 F.setAttributes(Intrinsic::getAttributes(Ctx, id, F.getFunctionType()));
2902 return;
2903 }
2904
2906 for (Argument &A : F.args())
2907 if (isa<PointerType>(A.getType()))
2908 F.removeParamAttrs(A.getArgNo(), R);
2909
2910 if (isa<PointerType>(F.getReturnType()))
2911 F.removeRetAttrs(R);
2912
2913 for (auto Attr : FnAttrsToStrip)
2914 F.removeFnAttr(Attr);
2915}
2916
2917/// Certain metadata on instructions are invalid after running RS4GC.
2918/// Optimizations that run after RS4GC can incorrectly use this metadata to
2919/// optimize functions. We drop such metadata on the instruction.
2921 if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
2922 return;
2923 // These are the attributes that are still valid on loads and stores after
2924 // RS4GC.
2925 // The metadata implying dereferenceability and noalias are (conservatively)
2926 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2927 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2928 // touch the entire heap including noalias objects. Note: The reasoning is
2929 // same as stripping the dereferenceability and noalias attributes that are
2930 // analogous to the metadata counterparts.
2931 // We also drop the invariant.load metadata on the load because that metadata
2932 // implies the address operand to the load points to memory that is never
2933 // changed once it became dereferenceable. This is no longer true after RS4GC.
2934 // Similar reasoning applies to invariant.group metadata, which applies to
2935 // loads within a group.
2936 unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa,
2937 LLVMContext::MD_range,
2938 LLVMContext::MD_alias_scope,
2939 LLVMContext::MD_nontemporal,
2940 LLVMContext::MD_nonnull,
2941 LLVMContext::MD_align,
2942 LLVMContext::MD_type};
2943
2944 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2945 I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
2946}
2947
2949 if (F.empty())
2950 return;
2951
2952 LLVMContext &Ctx = F.getContext();
2953 MDBuilder Builder(Ctx);
2954
2955 // Set of invariantstart instructions that we need to remove.
2956 // Use this to avoid invalidating the instruction iterator.
2957 SmallVector<IntrinsicInst*, 12> InvariantStartInstructions;
2958
2959 for (Instruction &I : instructions(F)) {
2960 // invariant.start on memory location implies that the referenced memory
2961 // location is constant and unchanging. This is no longer true after
2962 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2963 // which frees the entire heap and the presence of invariant.start allows
2964 // the optimizer to sink the load of a memory location past a statepoint,
2965 // which is incorrect.
2966 if (auto *II = dyn_cast<IntrinsicInst>(&I))
2967 if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2968 InvariantStartInstructions.push_back(II);
2969 continue;
2970 }
2971
2972 if (MDNode *Tag = I.getMetadata(LLVMContext::MD_tbaa)) {
2973 MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag);
2974 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2975 }
2976
2978
2980 if (auto *Call = dyn_cast<CallBase>(&I)) {
2981 for (int i = 0, e = Call->arg_size(); i != e; i++)
2982 if (isa<PointerType>(Call->getArgOperand(i)->getType()))
2983 Call->removeParamAttrs(i, R);
2984 if (isa<PointerType>(Call->getType()))
2985 Call->removeRetAttrs(R);
2986 }
2987 }
2988
2989 // Delete the invariant.start instructions and RAUW poison.
2990 for (auto *II : InvariantStartInstructions) {
2991 II->replaceAllUsesWith(PoisonValue::get(II->getType()));
2992 II->eraseFromParent();
2993 }
2994}
2995
2996/// Looks up the GC strategy for a given function, returning null if the
2997/// function doesn't have a GC tag. The strategy is stored in the cache.
2998static std::unique_ptr<GCStrategy> findGCStrategy(Function &F) {
2999 if (!F.hasGC())
3000 return nullptr;
3001
3002 return getGCStrategy(F.getGC());
3003}
3004
3005/// Returns true if this function should be rewritten by this pass. The main
3006/// point of this function is as an extension point for custom logic.
3008 if (!F.hasGC())
3009 return false;
3010
3011 std::unique_ptr<GCStrategy> Strategy = findGCStrategy(F);
3012
3013 assert(Strategy && "GC strategy is required by function, but was not found");
3014
3015 return Strategy->useRS4GC();
3016}
3017
3018static void stripNonValidData(Module &M) {
3019#ifndef NDEBUG
3020 assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!");
3021#endif
3022
3023 for (Function &F : M)
3025
3026 for (Function &F : M)
3028}
3029
3032 const TargetLibraryInfo &TLI) {
3033 assert(!F.isDeclaration() && !F.empty() &&
3034 "need function body to rewrite statepoints in");
3035 assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
3036
3037 auto NeedsRewrite = [&TLI](Instruction &I) {
3038 if (const auto *Call = dyn_cast<CallBase>(&I)) {
3040 return false;
3041 if (callsGCLeafFunction(Call, TLI))
3042 return false;
3043
3044 // Normally it's up to the frontend to make sure that non-leaf calls also
3045 // have proper deopt state if it is required. We make an exception for
3046 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
3047 // these are non-leaf by default. They might be generated by the optimizer
3048 // which doesn't know how to produce a proper deopt state. So if we see a
3049 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
3050 // copy and don't produce a statepoint.
3051 if (!AllowStatepointWithNoDeoptInfo && !Call->hasDeoptState()) {
3053 cast<AnyMemTransferInst>(Call)->isAtomic() &&
3054 "Don't expect any other calls here!");
3055 return false;
3056 }
3057 return true;
3058 }
3059 return false;
3060 };
3061
3062 // Delete any unreachable statepoints so that we don't have unrewritten
3063 // statepoints surviving this pass. This makes testing easier and the
3064 // resulting IR less confusing to human readers.
3065 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
3066 bool MadeChange = removeUnreachableBlocks(F, &DTU);
3067 // Flush the Dominator Tree.
3068 DTU.getDomTree();
3069
3070 // Gather all the statepoints which need rewritten. Be careful to only
3071 // consider those in reachable code since we need to ask dominance queries
3072 // when rewriting. We'll delete the unreachable ones in a moment.
3073 SmallVector<CallBase *, 64> ParsePointNeeded;
3074 SmallVector<CallInst *, 64> Intrinsics;
3075 for (Instruction &I : instructions(F)) {
3076 // TODO: only the ones with the flag set!
3077 if (NeedsRewrite(I)) {
3078 // NOTE removeUnreachableBlocks() is stronger than
3079 // DominatorTree::isReachableFromEntry(). In other words
3080 // removeUnreachableBlocks can remove some blocks for which
3081 // isReachableFromEntry() returns true.
3082 assert(DT.isReachableFromEntry(I.getParent()) &&
3083 "no unreachable blocks expected");
3084 ParsePointNeeded.push_back(cast<CallBase>(&I));
3085 }
3086 if (auto *CI = dyn_cast<CallInst>(&I))
3087 if (CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base ||
3088 CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset)
3089 Intrinsics.emplace_back(CI);
3090 }
3091
3092 // Return early if no work to do.
3093 if (ParsePointNeeded.empty() && Intrinsics.empty())
3094 return MadeChange;
3095
3096 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
3097 // These are created by LCSSA. They have the effect of increasing the size
3098 // of liveness sets for no good reason. It may be harder to do this post
3099 // insertion since relocations and base phis can confuse things.
3100 for (BasicBlock &BB : F)
3101 if (BB.getUniquePredecessor())
3102 MadeChange |= FoldSingleEntryPHINodes(&BB);
3103
3104 // Before we start introducing relocations, we want to tweak the IR a bit to
3105 // avoid unfortunate code generation effects. The main example is that we
3106 // want to try to make sure the comparison feeding a branch is after any
3107 // safepoints. Otherwise, we end up with a comparison of pre-relocation
3108 // values feeding a branch after relocation. This is semantically correct,
3109 // but results in extra register pressure since both the pre-relocation and
3110 // post-relocation copies must be available in registers. For code without
3111 // relocations this is handled elsewhere, but teaching the scheduler to
3112 // reverse the transform we're about to do would be slightly complex.
3113 // Note: This may extend the live range of the inputs to the icmp and thus
3114 // increase the liveset of any statepoint we move over. This is profitable
3115 // as long as all statepoints are in rare blocks. If we had in-register
3116 // lowering for live values this would be a much safer transform.
3117 auto getConditionInst = [](Instruction *TI) -> Instruction * {
3118 if (auto *BI = dyn_cast<BranchInst>(TI))
3119 if (BI->isConditional())
3120 return dyn_cast<Instruction>(BI->getCondition());
3121 // TODO: Extend this to handle switches
3122 return nullptr;
3123 };
3124 for (BasicBlock &BB : F) {
3125 Instruction *TI = BB.getTerminator();
3126 if (auto *Cond = getConditionInst(TI))
3127 // TODO: Handle more than just ICmps here. We should be able to move
3128 // most instructions without side effects or memory access.
3129 if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
3130 MadeChange = true;
3131 Cond->moveBefore(TI->getIterator());
3132 }
3133 }
3134
3135 // Nasty workaround - The base computation code in the main algorithm doesn't
3136 // consider the fact that a GEP can be used to convert a scalar to a vector.
3137 // The right fix for this is to integrate GEPs into the base rewriting
3138 // algorithm properly, this is just a short term workaround to prevent
3139 // crashes by canonicalizing such GEPs into fully vector GEPs.
3140 for (Instruction &I : instructions(F)) {
3142 continue;
3143
3144 unsigned VF = 0;
3145 for (unsigned i = 0; i < I.getNumOperands(); i++)
3146 if (auto *OpndVTy = dyn_cast<VectorType>(I.getOperand(i)->getType())) {
3147 assert(VF == 0 ||
3148 VF == cast<FixedVectorType>(OpndVTy)->getNumElements());
3149 VF = cast<FixedVectorType>(OpndVTy)->getNumElements();
3150 }
3151
3152 // It's the vector to scalar traversal through the pointer operand which
3153 // confuses base pointer rewriting, so limit ourselves to that case.
3154 if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
3155 IRBuilder<> B(&I);
3156 auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
3157 I.setOperand(0, Splat);
3158 MadeChange = true;
3159 }
3160 }
3161
3162 // Cache the 'defining value' relation used in the computation and
3163 // insertion of base phis and selects. This ensures that we don't insert
3164 // large numbers of duplicate base_phis. Use one cache for both
3165 // inlineGetBaseAndOffset() and insertParsePoints().
3166 DefiningValueMapTy DVCache;
3167
3168 // Mapping between a base values and a flag indicating whether it's a known
3169 // base or not.
3170 IsKnownBaseMapTy KnownBases;
3171
3172 if (!Intrinsics.empty())
3173 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
3174 // live references.
3175 MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases);
3176
3177 if (!ParsePointNeeded.empty())
3178 MadeChange |=
3179 insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases);
3180
3181 return MadeChange;
3182}
3183
3184// liveness computation via standard dataflow
3185// -------------------------------------------------------------------
3186
3187// TODO: Consider using bitvectors for liveness, the set of potentially
3188// interesting values should be small and easy to pre-compute.
3189
3190/// Compute the live-in set for the location rbegin starting from
3191/// the live-out set of the basic block
3194 SetVector<Value *> &LiveTmp, GCStrategy *GC) {
3195 for (auto &I : make_range(Begin, End)) {
3196 // KILL/Def - Remove this definition from LiveIn
3197 LiveTmp.remove(&I);
3198
3199 // Don't consider *uses* in PHI nodes, we handle their contribution to
3200 // predecessor blocks when we seed the LiveOut sets
3201 if (isa<PHINode>(I))
3202 continue;
3203
3204 // USE - Add to the LiveIn set for this instruction
3205 for (Value *V : I.operands()) {
3206 assert(!isUnhandledGCPointerType(V->getType(), GC) &&
3207 "support for FCA unimplemented");
3208 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V)) {
3209 // The choice to exclude all things constant here is slightly subtle.
3210 // There are two independent reasons:
3211 // - We assume that things which are constant (from LLVM's definition)
3212 // do not move at runtime. For example, the address of a global
3213 // variable is fixed, even though it's contents may not be.
3214 // - Second, we can't disallow arbitrary inttoptr constants even
3215 // if the language frontend does. Optimization passes are free to
3216 // locally exploit facts without respect to global reachability. This
3217 // can create sections of code which are dynamically unreachable and
3218 // contain just about anything. (see constants.ll in tests)
3219 LiveTmp.insert(V);
3220 }
3221 }
3222 }
3223}
3224
3226 GCStrategy *GC) {
3227 for (BasicBlock *Succ : successors(BB)) {
3228 for (auto &I : *Succ) {
3229 PHINode *PN = dyn_cast<PHINode>(&I);
3230 if (!PN)
3231 break;
3232
3233 Value *V = PN->getIncomingValueForBlock(BB);
3234 assert(!isUnhandledGCPointerType(V->getType(), GC) &&
3235 "support for FCA unimplemented");
3236 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V))
3237 LiveTmp.insert(V);
3238 }
3239 }
3240}
3241
3243 SetVector<Value *> KillSet;
3244 for (Instruction &I : *BB)
3245 if (isHandledGCPointerType(I.getType(), GC))
3246 KillSet.insert(&I);
3247 return KillSet;
3248}
3249
3250#ifndef NDEBUG
3251/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3252/// validation check for the liveness computation.
3254 Instruction *TI, bool TermOkay = false) {
3255 for (Value *V : Live) {
3256 if (auto *I = dyn_cast<Instruction>(V)) {
3257 // The terminator can be a member of the LiveOut set. LLVM's definition
3258 // of instruction dominance states that V does not dominate itself. As
3259 // such, we need to special case this to allow it.
3260 if (TermOkay && TI == I)
3261 continue;
3262 assert(DT.dominates(I, TI) &&
3263 "basic SSA liveness expectation violated by liveness analysis");
3264 }
3265 }
3266}
3267
3268/// Check that all the liveness sets used during the computation of liveness
3269/// obey basic SSA properties. This is useful for finding cases where we miss
3270/// a def.
3271static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
3272 BasicBlock &BB) {
3273 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
3274 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
3275 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
3276}
3277#endif
3278
3280 GCPtrLivenessData &Data, GCStrategy *GC) {
3282
3283 // Seed the liveness for each individual block
3284 for (BasicBlock &BB : F) {
3285 Data.KillSet[&BB] = computeKillSet(&BB, GC);
3286 auto &LiveSet = Data.LiveSet[&BB];
3287 LiveSet.clear();
3288 computeLiveInValues(BB.rbegin(), BB.rend(), LiveSet, GC);
3289
3290#ifndef NDEBUG
3291 for (Value *Kill : Data.KillSet[&BB])
3292 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
3293#endif
3294
3295 auto &Out = Data.LiveOut[&BB] = SetVector<Value *>();
3296 computeLiveOutSeed(&BB, Out, GC);
3297 auto &In = Data.LiveIn[&BB] = Data.LiveSet[&BB];
3298 In.set_union(Out);
3299 In.set_subtract(Data.KillSet[&BB]);
3300 if (!In.empty())
3301 Worklist.insert_range(predecessors(&BB));
3302 }
3303
3304 // Propagate that liveness until stable
3305 while (!Worklist.empty()) {
3306 BasicBlock *BB = Worklist.pop_back_val();
3307
3308 // Compute our new liveout set, then exit early if it hasn't changed despite
3309 // the contribution of our successor.
3310 SetVector<Value *> &LiveOut = Data.LiveOut[BB];
3311 const auto OldLiveOutSize = LiveOut.size();
3312 for (BasicBlock *Succ : successors(BB)) {
3313 assert(Data.LiveIn.count(Succ));
3314 LiveOut.set_union(Data.LiveIn[Succ]);
3315 }
3316 // assert OutLiveOut is a subset of LiveOut
3317 if (OldLiveOutSize == LiveOut.size()) {
3318 // If the sets are the same size, then we didn't actually add anything
3319 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3320 // hasn't changed.
3321 continue;
3322 }
3323
3324 // Apply the effects of this basic block
3325 SetVector<Value *> LiveTmp = LiveOut;
3326 LiveTmp.set_union(Data.LiveSet[BB]);
3327 LiveTmp.set_subtract(Data.KillSet[BB]);
3328
3329 assert(Data.LiveIn.count(BB));
3330 SetVector<Value *> &LiveIn = Data.LiveIn[BB];
3331 // assert: LiveIn is a subset of LiveTmp
3332 if (LiveIn.size() != LiveTmp.size()) {
3333 LiveIn = std::move(LiveTmp);
3334 Worklist.insert_range(predecessors(BB));
3335 }
3336 } // while (!Worklist.empty())
3337
3338#ifndef NDEBUG
3339 // Verify our output against SSA properties. This helps catch any
3340 // missing kills during the above iteration.
3341 for (BasicBlock &BB : F)
3342 checkBasicSSA(DT, Data, BB);
3343#endif
3344}
3345
3346static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
3347 StatepointLiveSetTy &Out, GCStrategy *GC) {
3348 BasicBlock *BB = Inst->getParent();
3349
3350 // Note: The copy is intentional and required
3351 assert(Data.LiveOut.count(BB));
3352 SetVector<Value *> LiveOut = Data.LiveOut[BB];
3353
3354 // We want to handle the statepoint itself oddly. It's
3355 // call result is not live (normal), nor are it's arguments
3356 // (unless they're used again later). This adjustment is
3357 // specifically what we need to relocate
3358 computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), LiveOut,
3359 GC);
3360 LiveOut.remove(Inst);
3361 Out.insert_range(LiveOut);
3362}
3363
3364static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
3365 CallBase *Call,
3366 PartiallyConstructedSafepointRecord &Info,
3367 PointerToBaseTy &PointerToBase,
3368 GCStrategy *GC) {
3369 StatepointLiveSetTy Updated;
3370 findLiveSetAtInst(Call, RevisedLivenessData, Updated, GC);
3371
3372 // We may have base pointers which are now live that weren't before. We need
3373 // to update the PointerToBase structure to reflect this.
3374 for (auto *V : Updated)
3375 PointerToBase.insert({ V, V });
3376
3377 Info.LiveSet = Updated;
3378}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
lazy value info
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
#define T
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static void makeStatepointExplicitImpl(CallBase *Call, const SmallVectorImpl< Value * > &BasePtrs, const SmallVectorImpl< Value * > &LiveVariables, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static void rematerializeLiveValues(CallBase *Call, PartiallyConstructedSafepointRecord &Info, PointerToBaseTy &PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static void findRematerializationCandidates(PointerToBaseTy PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static std::unique_ptr< GCStrategy > findGCStrategy(Function &F)
Looks up the GC strategy for a given function, returning null if the function doesn't have a GC tag.
static void stripNonValidDataFromBody(Function &F)
static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases)
Returns true if V is a known base.
static Value * findBasePointer(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
For a given value or instruction, figure out what base ptr its derived from.
static cl::opt< bool, true > ClobberNonLiveOverride("rs4gc-clobber-non-live", cl::location(ClobberNonLive), cl::Hidden)
static void insertRelocationStores(iterator_range< Value::user_iterator > GCRelocs, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static BasicBlock * normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, DominatorTree &DT)
static void analyzeParsePointLiveness(DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &Result, GCStrategy *GC)
static void computeLiveOutSeed(BasicBlock *BB, SetVector< Value * > &LiveTmp, GCStrategy *GC)
static void relocationViaAlloca(Function &F, DominatorTree &DT, ArrayRef< Value * > Live, ArrayRef< PartiallyConstructedSafepointRecord > Records)
Do all the relocation update via allocas and mem2reg.
static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi)
static cl::opt< unsigned > RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, cl::init(6))
static Value * findBaseOrBDV(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base pointer for this value if known.
static Value * findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Returns the base defining value for this value.
static void insertUseHolderAfter(CallBase *Call, const ArrayRef< Value * > Values, SmallVectorImpl< CallInst * > &Holders)
Insert holders so that each Value is obviously live through the entire lifetime of the call.
static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic, AttributeList StatepointAL)
static void insertRematerializationStores(const RematerializedValueMapTy &RematerializedValues, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static bool insertParsePoints(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, SmallVectorImpl< CallBase * > &ToUpdate, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static void findBasePointers(const StatepointLiveSetTy &live, PointerToBaseTy &PointerToBase, DominatorTree *DT, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static bool shouldRewriteStatepointsIn(Function &F)
Returns true if this function should be rewritten by this pass.
static cl::opt< bool > RematDerivedAtUses("rs4gc-remat-derived-at-uses", cl::Hidden, cl::init(true))
static ArrayRef< Use > GetDeoptBundleOperands(const CallBase *Call)
static Instruction * rematerializeChain(ArrayRef< Instruction * > ChainToBase, BasicBlock::iterator InsertBefore, Value *RootOfChain, Value *AlternateLiveBase)
static void stripNonValidAttributesFromPrototype(Function &F)
static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, StatepointLiveSetTy &out, GCStrategy *GC)
Given results from the dataflow liveness computation, find the set of live Values at a particular ins...
static void computeLiveInValues(DominatorTree &DT, Function &F, GCPtrLivenessData &Data, GCStrategy *GC)
Compute the live-in set for every basic block in the function.
static void stripInvalidMetadataFromInstruction(Instruction &I)
Certain metadata on instructions are invalid after running RS4GC.
static constexpr Attribute::AttrKind FnAttrsToStrip[]
static bool areBothVectorOrScalar(Value *First, Value *Second)
static void rematerializeLiveValuesAtUses(RematCandTy &RematerizationCandidates, MutableArrayRef< PartiallyConstructedSafepointRecord > Records, PointerToBaseTy &PointerToBase)
static bool isHandledGCPointerType(Type *T, GCStrategy *GC)
static Value * findRematerializableChainToBasePointer(SmallVectorImpl< Instruction * > &ChainToBase, Value *CurrentValue)
static cl::opt< bool > PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, cl::init(false))
static Value * findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base defining value for the 'Index' element of the given vector instruction 'I'.
static void stripNonValidData(Module &M)
The IR fed into RewriteStatepointsForGC may have had attributes and metadata implying dereferenceabil...
static InstructionCost chainToBasePointerCost(SmallVectorImpl< Instruction * > &Chain, TargetTransformInfo &TTI)
static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC)
static SetVector< Value * > computeKillSet(BasicBlock *BB, GCStrategy *GC)
static bool ClobberNonLive
static cl::opt< bool > PrintBasePointers("spp-print-base-pointers", cl::Hidden, cl::init(false))
static bool isOriginalBaseResult(Value *V)
This value is a base pointer that is not generated by RS4GC, i.e.
static cl::opt< bool > PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false))
static void setKnownBase(Value *V, bool IsKnownBase, IsKnownBaseMapTy &KnownBases)
Caches the IsKnownBase flag for a value and asserts that it wasn't present in the cache before.
static cl::opt< bool > AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true))
static void makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static std::string suffixed_name_or(Value *V, StringRef Suffix, StringRef DefaultName)
static void CreateGCRelocates(ArrayRef< Value * > LiveVariables, ArrayRef< Value * > BasePtrs, Instruction *StatepointToken, IRBuilder<> &Builder, GCStrategy *GC)
Helper function to place all gc relocates necessary for the given statepoint.
static void checkBasicSSA(DominatorTree &DT, SetVector< Value * > &Live, Instruction *TI, bool TermOkay=false)
Check that the items in 'Live' dominate 'TI'.
static StringRef getDeoptLowering(CallBase *Call)
static void findLiveReferences(Function &F, DominatorTree &DT, ArrayRef< CallBase * > toUpdate, MutableArrayRef< struct PartiallyConstructedSafepointRecord > records, GCStrategy *GC)
static AttributeMask getParamAndReturnAttributesToRemove()
static bool inlineGetBaseAndOffset(Function &F, SmallVectorImpl< CallInst * > &Intrinsics, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static Value * findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Helper function for findBasePointer - Will return a value which either a) defines the base pointer fo...
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &result, PointerToBaseTy &PointerToBase, GCStrategy *GC)
Given an updated version of the dataflow liveness results, update the liveset and base pointer maps f...
static unsigned getNumElements(Type *Ty)
This file contains some templates that are useful if you are working with the STL at all.
verify safepoint Safepoint IR static false bool isGCPointerType(Type *T)
static bool containsGCPtrType(Type *Ty)
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
The Input class is used to parse a yaml document into in-memory structs and vectors.
an instruction to allocate memory on the stack
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Value handle that asserts if the Value is deleted.
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:103
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition Attributes.h:122
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
LLVM_ABI const LandingPadInst * getLandingPadInst() const
Return the landingpad instruction associated with the landing pad.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
Definition BasicBlock.h:486
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class represents a no-op cast from one type to another.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setAttributes(AttributeList A)
Set the attributes for this call.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Represents calls to the gc.relocate intrinsic.
LLVM_ABI Value * getDerivedPtr() const
Represents a gc.statepoint intrinsic call.
Definition Statepoint.h:61
GCStrategy describes a garbage collector algorithm's code generation requirements,...
Definition GCStrategy.h:64
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2776
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A wrapper class for inspecting calls to intrinsic functions.
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
size_type count(const KeyT &Key) const
Definition MapVector.h:150
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
bool remove(const value_type &X)
Remove an item from the set vector.
Definition SetVector.h:181
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool set_union(const STy &S)
Compute This := This u S, return whether 'This' changed.
Definition SetVector.h:297
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
void set_subtract(const STy &S)
Compute This := This - B TODO: We should be able to use set_subtract from SetOperations....
Definition SetVector.h:311
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
Class to represent struct types.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
iterator_range< value_op_iterator > operand_values()
Definition User.h:291
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
Definition Value.cpp:188
LLVM_ABI unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition Value.cpp:265
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
bool user_empty() const
Definition Value.h:389
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
InstructionCost Cost
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2122
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
StatepointDirectives parseStatepointDirectivesFromAttrs(AttributeList AS)
Parse out statepoint directives from the function attributes present in AS.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
DWARFExpression::Operation Op
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ DeoptLiveIn
Mark the deopt arguments associated with the statepoint as only being "live-in".
Definition Statepoint.h:49
@ GCTransition
Indicates that this statepoint is a transition from GC-aware code to code that is not GC-aware.
Definition Statepoint.h:41
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::unique_ptr< GCStrategy > getGCStrategy(const StringRef Name)
Lookup the GCStrategy object associated with the given gc name.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool isStatepointDirectiveAttr(Attribute Attr)
Return true if the Attr is an attribute that is a statepoint directive.
LLVM_ABI bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition Local.cpp:2896
LLVM_ABI bool callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI)
Return true if this call calls a gc leaf function.
Definition Local.cpp:3308
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
bool runOnFunction(Function &F, DominatorTree &, TargetTransformInfo &, const TargetLibraryInfo &)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Call sites that get wrapped by a gc.statepoint (currently only in RewriteStatepointsForGC and potenti...
Definition Statepoint.h:235
std::optional< uint32_t > NumPatchBytes
Definition Statepoint.h:236
std::optional< uint64_t > StatepointID
Definition Statepoint.h:237
static const uint64_t DefaultStatepointID
Definition Statepoint.h:239