LLVM 18.0.0git
RewriteStatepointsForGC.cpp
Go to the documentation of this file.
1//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Rewrite call/invoke instructions so as to make potential relocations
10// performed by the garbage collector explicit in the IR.
11//
12//===----------------------------------------------------------------------===//
13
15
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/StringRef.h"
29#include "llvm/IR/Argument.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/CallingConv.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GCStrategy.h"
41#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/InstrTypes.h"
44#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/IR/LLVMContext.h"
49#include "llvm/IR/MDBuilder.h"
50#include "llvm/IR/Metadata.h"
51#include "llvm/IR/Module.h"
52#include "llvm/IR/Statepoint.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/User.h"
55#include "llvm/IR/Value.h"
56#include "llvm/IR/ValueHandle.h"
60#include "llvm/Support/Debug.h"
66#include <algorithm>
67#include <cassert>
68#include <cstddef>
69#include <cstdint>
70#include <iterator>
71#include <optional>
72#include <set>
73#include <string>
74#include <utility>
75#include <vector>
76
77#define DEBUG_TYPE "rewrite-statepoints-for-gc"
78
79using namespace llvm;
80
81// Print the liveset found at the insert location
82static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
83 cl::init(false));
84static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
85 cl::init(false));
86
87// Print out the base pointers for debugging
88static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
89 cl::init(false));
90
91// Cost threshold measuring when it is profitable to rematerialize value instead
92// of relocating it
94RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
95 cl::init(6));
96
97#ifdef EXPENSIVE_CHECKS
98static bool ClobberNonLive = true;
99#else
100static bool ClobberNonLive = false;
101#endif
102
103static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
105 cl::Hidden);
106
107static cl::opt<bool>
108 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
109 cl::Hidden, cl::init(true));
110
111static cl::opt<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses",
112 cl::Hidden, cl::init(true));
113
114/// The IR fed into RewriteStatepointsForGC may have had attributes and
115/// metadata implying dereferenceability that are no longer valid/correct after
116/// RewriteStatepointsForGC has run. This is because semantically, after
117/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
118/// heap. stripNonValidData (conservatively) restores
119/// correctness by erasing all attributes in the module that externally imply
120/// dereferenceability. Similar reasoning also applies to the noalias
121/// attributes and metadata. gc.statepoint can touch the entire heap including
122/// noalias objects.
123/// Apart from attributes and metadata, we also remove instructions that imply
124/// constant physical memory: llvm.invariant.start.
125static void stripNonValidData(Module &M);
126
127// Find the GC strategy for a function, or null if it doesn't have one.
128static std::unique_ptr<GCStrategy> findGCStrategy(Function &F);
129
131
134 bool Changed = false;
135 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
136 for (Function &F : M) {
137 // Nothing to do for declarations.
138 if (F.isDeclaration() || F.empty())
139 continue;
140
141 // Policy choice says not to rewrite - the most common reason is that we're
142 // compiling code without a GCStrategy.
144 continue;
145
148 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
149 Changed |= runOnFunction(F, DT, TTI, TLI);
150 }
151 if (!Changed)
152 return PreservedAnalyses::all();
153
154 // stripNonValidData asserts that shouldRewriteStatepointsIn
155 // returns true for at least one function in the module. Since at least
156 // one function changed, we know that the precondition is satisfied.
158
162 return PA;
163}
164
165namespace {
166
167struct GCPtrLivenessData {
168 /// Values defined in this block.
170
171 /// Values used in this block (and thus live); does not included values
172 /// killed within this block.
174
175 /// Values live into this basic block (i.e. used by any
176 /// instruction in this basic block or ones reachable from here)
178
179 /// Values live out of this basic block (i.e. live into
180 /// any successor block)
182};
183
184// The type of the internal cache used inside the findBasePointers family
185// of functions. From the callers perspective, this is an opaque type and
186// should not be inspected.
187//
188// In the actual implementation this caches two relations:
189// - The base relation itself (i.e. this pointer is based on that one)
190// - The base defining value relation (i.e. before base_phi insertion)
191// Generally, after the execution of a full findBasePointer call, only the
192// base relation will remain. Internally, we add a mixture of the two
193// types, then update all the second type to the first type
194using DefiningValueMapTy = MapVector<Value *, Value *>;
195using IsKnownBaseMapTy = MapVector<Value *, bool>;
196using PointerToBaseTy = MapVector<Value *, Value *>;
197using StatepointLiveSetTy = SetVector<Value *>;
198using RematerializedValueMapTy =
200
201struct PartiallyConstructedSafepointRecord {
202 /// The set of values known to be live across this safepoint
203 StatepointLiveSetTy LiveSet;
204
205 /// The *new* gc.statepoint instruction itself. This produces the token
206 /// that normal path gc.relocates and the gc.result are tied to.
207 GCStatepointInst *StatepointToken;
208
209 /// Instruction to which exceptional gc relocates are attached
210 /// Makes it easier to iterate through them during relocationViaAlloca.
211 Instruction *UnwindToken;
212
213 /// Record live values we are rematerialized instead of relocating.
214 /// They are not included into 'LiveSet' field.
215 /// Maps rematerialized copy to it's original value.
216 RematerializedValueMapTy RematerializedValues;
217};
218
219struct RematerizlizationCandidateRecord {
220 // Chain from derived pointer to base.
222 // Original base.
223 Value *RootOfChain;
224 // Cost of chain.
226};
228
229} // end anonymous namespace
230
232 std::optional<OperandBundleUse> DeoptBundle =
233 Call->getOperandBundle(LLVMContext::OB_deopt);
234
235 if (!DeoptBundle) {
237 "Found non-leaf call without deopt info!");
238 return std::nullopt;
239 }
240
241 return DeoptBundle->Inputs;
242}
243
244/// Compute the live-in set for every basic block in the function
246 GCPtrLivenessData &Data, GCStrategy *GC);
247
248/// Given results from the dataflow liveness computation, find the set of live
249/// Values at a particular instruction.
250static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
251 StatepointLiveSetTy &out, GCStrategy *GC);
252
253static bool isGCPointerType(Type *T, GCStrategy *GC) {
254 assert(GC && "GC Strategy for isGCPointerType cannot be null");
255
256 if (!isa<PointerType>(T))
257 return false;
258
259 // conservative - same as StatepointLowering
260 return GC->isGCManagedPointer(T).value_or(true);
261}
262
263// Return true if this type is one which a) is a gc pointer or contains a GC
264// pointer and b) is of a type this code expects to encounter as a live value.
265// (The insertion code will assert that a type which matches (a) and not (b)
266// is not encountered.)
268 // We fully support gc pointers
269 if (isGCPointerType(T, GC))
270 return true;
271 // We partially support vectors of gc pointers. The code will assert if it
272 // can't handle something.
273 if (auto VT = dyn_cast<VectorType>(T))
274 if (isGCPointerType(VT->getElementType(), GC))
275 return true;
276 return false;
277}
278
279#ifndef NDEBUG
280/// Returns true if this type contains a gc pointer whether we know how to
281/// handle that type or not.
282static bool containsGCPtrType(Type *Ty, GCStrategy *GC) {
283 if (isGCPointerType(Ty, GC))
284 return true;
285 if (VectorType *VT = dyn_cast<VectorType>(Ty))
286 return isGCPointerType(VT->getScalarType(), GC);
287 if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
288 return containsGCPtrType(AT->getElementType(), GC);
289 if (StructType *ST = dyn_cast<StructType>(Ty))
290 return llvm::any_of(ST->elements(),
291 [GC](Type *Ty) { return containsGCPtrType(Ty, GC); });
292 return false;
293}
294
295// Returns true if this is a type which a) is a gc pointer or contains a GC
296// pointer and b) is of a type which the code doesn't expect (i.e. first class
297// aggregates). Used to trip assertions.
299 return containsGCPtrType(Ty, GC) && !isHandledGCPointerType(Ty, GC);
300}
301#endif
302
303// Return the name of the value suffixed with the provided value, or if the
304// value didn't have a name, the default value specified.
305static std::string suffixed_name_or(Value *V, StringRef Suffix,
306 StringRef DefaultName) {
307 return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
308}
309
310// Conservatively identifies any definitions which might be live at the
311// given instruction. The analysis is performed immediately before the
312// given instruction. Values defined by that instruction are not considered
313// live. Values used by that instruction are considered live.
315 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
316 PartiallyConstructedSafepointRecord &Result, GCStrategy *GC) {
317 StatepointLiveSetTy LiveSet;
318 findLiveSetAtInst(Call, OriginalLivenessData, LiveSet, GC);
319
320 if (PrintLiveSet) {
321 dbgs() << "Live Variables:\n";
322 for (Value *V : LiveSet)
323 dbgs() << " " << V->getName() << " " << *V << "\n";
324 }
325 if (PrintLiveSetSize) {
326 dbgs() << "Safepoint For: " << Call->getCalledOperand()->getName() << "\n";
327 dbgs() << "Number live values: " << LiveSet.size() << "\n";
328 }
329 Result.LiveSet = LiveSet;
330}
331
332/// Returns true if V is a known base.
333static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases);
334
335/// Caches the IsKnownBase flag for a value and asserts that it wasn't present
336/// in the cache before.
337static void setKnownBase(Value *V, bool IsKnownBase,
338 IsKnownBaseMapTy &KnownBases);
339
340static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
341 IsKnownBaseMapTy &KnownBases);
342
343/// Return a base defining value for the 'Index' element of the given vector
344/// instruction 'I'. If Index is null, returns a BDV for the entire vector
345/// 'I'. As an optimization, this method will try to determine when the
346/// element is known to already be a base pointer. If this can be established,
347/// the second value in the returned pair will be true. Note that either a
348/// vector or a pointer typed value can be returned. For the former, the
349/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
350/// If the later, the return pointer is a BDV (or possibly a base) for the
351/// particular element in 'I'.
352static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache,
353 IsKnownBaseMapTy &KnownBases) {
354 // Each case parallels findBaseDefiningValue below, see that code for
355 // detailed motivation.
356
357 auto Cached = Cache.find(I);
358 if (Cached != Cache.end())
359 return Cached->second;
360
361 if (isa<Argument>(I)) {
362 // An incoming argument to the function is a base pointer
363 Cache[I] = I;
364 setKnownBase(I, /* IsKnownBase */true, KnownBases);
365 return I;
366 }
367
368 if (isa<Constant>(I)) {
369 // Base of constant vector consists only of constant null pointers.
370 // For reasoning see similar case inside 'findBaseDefiningValue' function.
371 auto *CAZ = ConstantAggregateZero::get(I->getType());
372 Cache[I] = CAZ;
373 setKnownBase(CAZ, /* IsKnownBase */true, KnownBases);
374 return CAZ;
375 }
376
377 if (isa<LoadInst>(I)) {
378 Cache[I] = I;
379 setKnownBase(I, /* IsKnownBase */true, KnownBases);
380 return I;
381 }
382
383 if (isa<InsertElementInst>(I)) {
384 // We don't know whether this vector contains entirely base pointers or
385 // not. To be conservatively correct, we treat it as a BDV and will
386 // duplicate code as needed to construct a parallel vector of bases.
387 Cache[I] = I;
388 setKnownBase(I, /* IsKnownBase */false, KnownBases);
389 return I;
390 }
391
392 if (isa<ShuffleVectorInst>(I)) {
393 // We don't know whether this vector contains entirely base pointers or
394 // not. To be conservatively correct, we treat it as a BDV and will
395 // duplicate code as needed to construct a parallel vector of bases.
396 // TODO: There a number of local optimizations which could be applied here
397 // for particular sufflevector patterns.
398 Cache[I] = I;
399 setKnownBase(I, /* IsKnownBase */false, KnownBases);
400 return I;
401 }
402
403 // The behavior of getelementptr instructions is the same for vector and
404 // non-vector data types.
405 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
406 auto *BDV =
407 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
408 Cache[GEP] = BDV;
409 return BDV;
410 }
411
412 // The behavior of freeze instructions is the same for vector and
413 // non-vector data types.
414 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
415 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
416 Cache[Freeze] = BDV;
417 return BDV;
418 }
419
420 // If the pointer comes through a bitcast of a vector of pointers to
421 // a vector of another type of pointer, then look through the bitcast
422 if (auto *BC = dyn_cast<BitCastInst>(I)) {
423 auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases);
424 Cache[BC] = BDV;
425 return BDV;
426 }
427
428 // We assume that functions in the source language only return base
429 // pointers. This should probably be generalized via attributes to support
430 // both source language and internal functions.
431 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
432 Cache[I] = I;
433 setKnownBase(I, /* IsKnownBase */true, KnownBases);
434 return I;
435 }
436
437 // A PHI or Select is a base defining value. The outer findBasePointer
438 // algorithm is responsible for constructing a base value for this BDV.
439 assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
440 "unknown vector instruction - no base found for vector element");
441 Cache[I] = I;
442 setKnownBase(I, /* IsKnownBase */false, KnownBases);
443 return I;
444}
445
446/// Helper function for findBasePointer - Will return a value which either a)
447/// defines the base pointer for the input, b) blocks the simple search
448/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
449/// from pointer to vector type or back.
450static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
451 IsKnownBaseMapTy &KnownBases) {
452 assert(I->getType()->isPtrOrPtrVectorTy() &&
453 "Illegal to ask for the base pointer of a non-pointer type");
454 auto Cached = Cache.find(I);
455 if (Cached != Cache.end())
456 return Cached->second;
457
458 if (I->getType()->isVectorTy())
459 return findBaseDefiningValueOfVector(I, Cache, KnownBases);
460
461 if (isa<Argument>(I)) {
462 // An incoming argument to the function is a base pointer
463 // We should have never reached here if this argument isn't an gc value
464 Cache[I] = I;
465 setKnownBase(I, /* IsKnownBase */true, KnownBases);
466 return I;
467 }
468
469 if (isa<Constant>(I)) {
470 // We assume that objects with a constant base (e.g. a global) can't move
471 // and don't need to be reported to the collector because they are always
472 // live. Besides global references, all kinds of constants (e.g. undef,
473 // constant expressions, null pointers) can be introduced by the inliner or
474 // the optimizer, especially on dynamically dead paths.
475 // Here we treat all of them as having single null base. By doing this we
476 // trying to avoid problems reporting various conflicts in a form of
477 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
478 // See constant.ll file for relevant test cases.
479
480 auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType()));
481 Cache[I] = CPN;
482 setKnownBase(CPN, /* IsKnownBase */true, KnownBases);
483 return CPN;
484 }
485
486 // inttoptrs in an integral address space are currently ill-defined. We
487 // treat them as defining base pointers here for consistency with the
488 // constant rule above and because we don't really have a better semantic
489 // to give them. Note that the optimizer is always free to insert undefined
490 // behavior on dynamically dead paths as well.
491 if (isa<IntToPtrInst>(I)) {
492 Cache[I] = I;
493 setKnownBase(I, /* IsKnownBase */true, KnownBases);
494 return I;
495 }
496
497 if (CastInst *CI = dyn_cast<CastInst>(I)) {
498 Value *Def = CI->stripPointerCasts();
499 // If stripping pointer casts changes the address space there is an
500 // addrspacecast in between.
501 assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
502 cast<PointerType>(CI->getType())->getAddressSpace() &&
503 "unsupported addrspacecast");
504 // If we find a cast instruction here, it means we've found a cast which is
505 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
506 // handle int->ptr conversion.
507 assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
508 auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases);
509 Cache[CI] = BDV;
510 return BDV;
511 }
512
513 if (isa<LoadInst>(I)) {
514 // The value loaded is an gc base itself
515 Cache[I] = I;
516 setKnownBase(I, /* IsKnownBase */true, KnownBases);
517 return I;
518 }
519
520 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
521 // The base of this GEP is the base
522 auto *BDV =
523 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
524 Cache[GEP] = BDV;
525 return BDV;
526 }
527
528 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
529 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
530 Cache[Freeze] = BDV;
531 return BDV;
532 }
533
534 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
535 switch (II->getIntrinsicID()) {
536 default:
537 // fall through to general call handling
538 break;
539 case Intrinsic::experimental_gc_statepoint:
540 llvm_unreachable("statepoints don't produce pointers");
541 case Intrinsic::experimental_gc_relocate:
542 // Rerunning safepoint insertion after safepoints are already
543 // inserted is not supported. It could probably be made to work,
544 // but why are you doing this? There's no good reason.
545 llvm_unreachable("repeat safepoint insertion is not supported");
546 case Intrinsic::gcroot:
547 // Currently, this mechanism hasn't been extended to work with gcroot.
548 // There's no reason it couldn't be, but I haven't thought about the
549 // implications much.
551 "interaction with the gcroot mechanism is not supported");
552 case Intrinsic::experimental_gc_get_pointer_base:
553 auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases);
554 Cache[II] = BDV;
555 return BDV;
556 }
557 }
558 // We assume that functions in the source language only return base
559 // pointers. This should probably be generalized via attributes to support
560 // both source language and internal functions.
561 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
562 Cache[I] = I;
563 setKnownBase(I, /* IsKnownBase */true, KnownBases);
564 return I;
565 }
566
567 // TODO: I have absolutely no idea how to implement this part yet. It's not
568 // necessarily hard, I just haven't really looked at it yet.
569 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
570
571 if (isa<AtomicCmpXchgInst>(I)) {
572 // A CAS is effectively a atomic store and load combined under a
573 // predicate. From the perspective of base pointers, we just treat it
574 // like a load.
575 Cache[I] = I;
576 setKnownBase(I, /* IsKnownBase */true, KnownBases);
577 return I;
578 }
579
580 assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
581 "binary ops which don't apply to pointers");
582
583 // The aggregate ops. Aggregates can either be in the heap or on the
584 // stack, but in either case, this is simply a field load. As a result,
585 // this is a defining definition of the base just like a load is.
586 if (isa<ExtractValueInst>(I)) {
587 Cache[I] = I;
588 setKnownBase(I, /* IsKnownBase */true, KnownBases);
589 return I;
590 }
591
592 // We should never see an insert vector since that would require we be
593 // tracing back a struct value not a pointer value.
594 assert(!isa<InsertValueInst>(I) &&
595 "Base pointer for a struct is meaningless");
596
597 // This value might have been generated by findBasePointer() called when
598 // substituting gc.get.pointer.base() intrinsic.
599 bool IsKnownBase =
600 isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value");
601 setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases);
602 Cache[I] = I;
603
604 // An extractelement produces a base result exactly when it's input does.
605 // We may need to insert a parallel instruction to extract the appropriate
606 // element out of the base vector corresponding to the input. Given this,
607 // it's analogous to the phi and select case even though it's not a merge.
608 if (isa<ExtractElementInst>(I))
609 // Note: There a lot of obvious peephole cases here. This are deliberately
610 // handled after the main base pointer inference algorithm to make writing
611 // test cases to exercise that code easier.
612 return I;
613
614 // The last two cases here don't return a base pointer. Instead, they
615 // return a value which dynamically selects from among several base
616 // derived pointers (each with it's own base potentially). It's the job of
617 // the caller to resolve these.
618 assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
619 "missing instruction case in findBaseDefiningValue");
620 return I;
621}
622
623/// Returns the base defining value for this value.
624static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
625 IsKnownBaseMapTy &KnownBases) {
626 if (!Cache.contains(I)) {
627 auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
628 Cache[I] = BDV;
629 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
630 << Cache[I]->getName() << ", is known base = "
631 << KnownBases[I] << "\n");
632 }
633 assert(Cache[I] != nullptr);
634 assert(KnownBases.contains(Cache[I]) &&
635 "Cached value must be present in known bases map");
636 return Cache[I];
637}
638
639/// Return a base pointer for this value if known. Otherwise, return it's
640/// base defining value.
641static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache,
642 IsKnownBaseMapTy &KnownBases) {
643 Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases);
644 auto Found = Cache.find(Def);
645 if (Found != Cache.end()) {
646 // Either a base-of relation, or a self reference. Caller must check.
647 return Found->second;
648 }
649 // Only a BDV available
650 return Def;
651}
652
653#ifndef NDEBUG
654/// This value is a base pointer that is not generated by RS4GC, i.e. it already
655/// exists in the code.
657 // no recursion possible
658 return !isa<PHINode>(V) && !isa<SelectInst>(V) &&
659 !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
660 !isa<ShuffleVectorInst>(V);
661}
662#endif
663
664static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) {
665 auto It = KnownBases.find(V);
666 assert(It != KnownBases.end() && "Value not present in the map");
667 return It->second;
668}
669
670static void setKnownBase(Value *V, bool IsKnownBase,
671 IsKnownBaseMapTy &KnownBases) {
672#ifndef NDEBUG
673 auto It = KnownBases.find(V);
674 if (It != KnownBases.end())
675 assert(It->second == IsKnownBase && "Changing already present value");
676#endif
677 KnownBases[V] = IsKnownBase;
678}
679
680// Returns true if First and Second values are both scalar or both vector.
681static bool areBothVectorOrScalar(Value *First, Value *Second) {
682 return isa<VectorType>(First->getType()) ==
683 isa<VectorType>(Second->getType());
684}
685
686namespace {
687
688/// Models the state of a single base defining value in the findBasePointer
689/// algorithm for determining where a new instruction is needed to propagate
690/// the base of this BDV.
691class BDVState {
692public:
693 enum StatusTy {
694 // Starting state of lattice
695 Unknown,
696 // Some specific base value -- does *not* mean that instruction
697 // propagates the base of the object
698 // ex: gep %arg, 16 -> %arg is the base value
699 Base,
700 // Need to insert a node to represent a merge.
701 Conflict
702 };
703
704 BDVState() {
705 llvm_unreachable("missing state in map");
706 }
707
708 explicit BDVState(Value *OriginalValue)
709 : OriginalValue(OriginalValue) {}
710 explicit BDVState(Value *OriginalValue, StatusTy Status, Value *BaseValue = nullptr)
711 : OriginalValue(OriginalValue), Status(Status), BaseValue(BaseValue) {
712 assert(Status != Base || BaseValue);
713 }
714
715 StatusTy getStatus() const { return Status; }
716 Value *getOriginalValue() const { return OriginalValue; }
717 Value *getBaseValue() const { return BaseValue; }
718
719 bool isBase() const { return getStatus() == Base; }
720 bool isUnknown() const { return getStatus() == Unknown; }
721 bool isConflict() const { return getStatus() == Conflict; }
722
723 // Values of type BDVState form a lattice, and this function implements the
724 // meet
725 // operation.
726 void meet(const BDVState &Other) {
727 auto markConflict = [&]() {
728 Status = BDVState::Conflict;
729 BaseValue = nullptr;
730 };
731 // Conflict is a final state.
732 if (isConflict())
733 return;
734 // if we are not known - just take other state.
735 if (isUnknown()) {
736 Status = Other.getStatus();
737 BaseValue = Other.getBaseValue();
738 return;
739 }
740 // We are base.
741 assert(isBase() && "Unknown state");
742 // If other is unknown - just keep our state.
743 if (Other.isUnknown())
744 return;
745 // If other is conflict - it is a final state.
746 if (Other.isConflict())
747 return markConflict();
748 // Other is base as well.
749 assert(Other.isBase() && "Unknown state");
750 // If bases are different - Conflict.
751 if (getBaseValue() != Other.getBaseValue())
752 return markConflict();
753 // We are identical, do nothing.
754 }
755
756 bool operator==(const BDVState &Other) const {
757 return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
758 Status == Other.Status;
759 }
760
761 bool operator!=(const BDVState &other) const { return !(*this == other); }
762
764 void dump() const {
765 print(dbgs());
766 dbgs() << '\n';
767 }
768
769 void print(raw_ostream &OS) const {
770 switch (getStatus()) {
771 case Unknown:
772 OS << "U";
773 break;
774 case Base:
775 OS << "B";
776 break;
777 case Conflict:
778 OS << "C";
779 break;
780 }
781 OS << " (base " << getBaseValue() << " - "
782 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")"
783 << " for " << OriginalValue->getName() << ":";
784 }
785
786private:
787 AssertingVH<Value> OriginalValue; // instruction this state corresponds to
788 StatusTy Status = Unknown;
789 AssertingVH<Value> BaseValue = nullptr; // Non-null only if Status == Base.
790};
791
792} // end anonymous namespace
793
794#ifndef NDEBUG
795static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
796 State.print(OS);
797 return OS;
798}
799#endif
800
801/// For a given value or instruction, figure out what base ptr its derived from.
802/// For gc objects, this is simply itself. On success, returns a value which is
803/// the base pointer. (This is reliable and can be used for relocation.) On
804/// failure, returns nullptr.
805static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
806 IsKnownBaseMapTy &KnownBases) {
807 Value *Def = findBaseOrBDV(I, Cache, KnownBases);
808
809 if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I))
810 return Def;
811
812 // Here's the rough algorithm:
813 // - For every SSA value, construct a mapping to either an actual base
814 // pointer or a PHI which obscures the base pointer.
815 // - Construct a mapping from PHI to unknown TOP state. Use an
816 // optimistic algorithm to propagate base pointer information. Lattice
817 // looks like:
818 // UNKNOWN
819 // b1 b2 b3 b4
820 // CONFLICT
821 // When algorithm terminates, all PHIs will either have a single concrete
822 // base or be in a conflict state.
823 // - For every conflict, insert a dummy PHI node without arguments. Add
824 // these to the base[Instruction] = BasePtr mapping. For every
825 // non-conflict, add the actual base.
826 // - For every conflict, add arguments for the base[a] of each input
827 // arguments.
828 //
829 // Note: A simpler form of this would be to add the conflict form of all
830 // PHIs without running the optimistic algorithm. This would be
831 // analogous to pessimistic data flow and would likely lead to an
832 // overall worse solution.
833
834#ifndef NDEBUG
835 auto isExpectedBDVType = [](Value *BDV) {
836 return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
837 isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV) ||
838 isa<ShuffleVectorInst>(BDV);
839 };
840#endif
841
842 // Once populated, will contain a mapping from each potentially non-base BDV
843 // to a lattice value (described above) which corresponds to that BDV.
844 // We use the order of insertion (DFS over the def/use graph) to provide a
845 // stable deterministic ordering for visiting DenseMaps (which are unordered)
846 // below. This is important for deterministic compilation.
848
849#ifndef NDEBUG
850 auto VerifyStates = [&]() {
851 for (auto &Entry : States) {
852 assert(Entry.first == Entry.second.getOriginalValue());
853 }
854 };
855#endif
856
857 auto visitBDVOperands = [](Value *BDV, std::function<void (Value*)> F) {
858 if (PHINode *PN = dyn_cast<PHINode>(BDV)) {
859 for (Value *InVal : PN->incoming_values())
860 F(InVal);
861 } else if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) {
862 F(SI->getTrueValue());
863 F(SI->getFalseValue());
864 } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
865 F(EE->getVectorOperand());
866 } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)) {
867 F(IE->getOperand(0));
868 F(IE->getOperand(1));
869 } else if (auto *SV = dyn_cast<ShuffleVectorInst>(BDV)) {
870 // For a canonical broadcast, ignore the undef argument
871 // (without this, we insert a parallel base shuffle for every broadcast)
872 F(SV->getOperand(0));
873 if (!SV->isZeroEltSplat())
874 F(SV->getOperand(1));
875 } else {
876 llvm_unreachable("unexpected BDV type");
877 }
878 };
879
880
881 // Recursively fill in all base defining values reachable from the initial
882 // one for which we don't already know a definite base value for
883 /* scope */ {
885 Worklist.push_back(Def);
886 States.insert({Def, BDVState(Def)});
887 while (!Worklist.empty()) {
888 Value *Current = Worklist.pop_back_val();
889 assert(!isOriginalBaseResult(Current) && "why did it get added?");
890
891 auto visitIncomingValue = [&](Value *InVal) {
892 Value *Base = findBaseOrBDV(InVal, Cache, KnownBases);
893 if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal))
894 // Known bases won't need new instructions introduced and can be
895 // ignored safely. However, this can only be done when InVal and Base
896 // are both scalar or both vector. Otherwise, we need to find a
897 // correct BDV for InVal, by creating an entry in the lattice
898 // (States).
899 return;
900 assert(isExpectedBDVType(Base) && "the only non-base values "
901 "we see should be base defining values");
902 if (States.insert(std::make_pair(Base, BDVState(Base))).second)
903 Worklist.push_back(Base);
904 };
905
906 visitBDVOperands(Current, visitIncomingValue);
907 }
908 }
909
910#ifndef NDEBUG
911 VerifyStates();
912 LLVM_DEBUG(dbgs() << "States after initialization:\n");
913 for (const auto &Pair : States) {
914 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
915 }
916#endif
917
918 // Iterate forward through the value graph pruning any node from the state
919 // list where all of the inputs are base pointers. The purpose of this is to
920 // reuse existing values when the derived pointer we were asked to materialize
921 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
922 // feeding it does.)
924 do {
925 ToRemove.clear();
926 for (auto Pair : States) {
927 Value *BDV = Pair.first;
928 auto canPruneInput = [&](Value *V) {
929 // If the input of the BDV is the BDV itself we can prune it. This is
930 // only possible if the BDV is a PHI node.
931 if (V->stripPointerCasts() == BDV)
932 return true;
933 Value *VBDV = findBaseOrBDV(V, Cache, KnownBases);
934 if (V->stripPointerCasts() != VBDV)
935 return false;
936 // The assumption is that anything not in the state list is
937 // propagates a base pointer.
938 return States.count(VBDV) == 0;
939 };
940
941 bool CanPrune = true;
942 visitBDVOperands(BDV, [&](Value *Op) {
943 CanPrune = CanPrune && canPruneInput(Op);
944 });
945 if (CanPrune)
946 ToRemove.push_back(BDV);
947 }
948 for (Value *V : ToRemove) {
949 States.erase(V);
950 // Cache the fact V is it's own base for later usage.
951 Cache[V] = V;
952 }
953 } while (!ToRemove.empty());
954
955 // Did we manage to prove that Def itself must be a base pointer?
956 if (!States.count(Def))
957 return Def;
958
959 // Return a phi state for a base defining value. We'll generate a new
960 // base state for known bases and expect to find a cached state otherwise.
961 auto GetStateForBDV = [&](Value *BaseValue, Value *Input) {
962 auto I = States.find(BaseValue);
963 if (I != States.end())
964 return I->second;
965 assert(areBothVectorOrScalar(BaseValue, Input));
966 return BDVState(BaseValue, BDVState::Base, BaseValue);
967 };
968
969 bool Progress = true;
970 while (Progress) {
971#ifndef NDEBUG
972 const size_t OldSize = States.size();
973#endif
974 Progress = false;
975 // We're only changing values in this loop, thus safe to keep iterators.
976 // Since this is computing a fixed point, the order of visit does not
977 // effect the result. TODO: We could use a worklist here and make this run
978 // much faster.
979 for (auto Pair : States) {
980 Value *BDV = Pair.first;
981 // Only values that do not have known bases or those that have differing
982 // type (scalar versus vector) from a possible known base should be in the
983 // lattice.
984 assert((!isKnownBase(BDV, KnownBases) ||
985 !areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) &&
986 "why did it get added?");
987
988 BDVState NewState(BDV);
989 visitBDVOperands(BDV, [&](Value *Op) {
990 Value *BDV = findBaseOrBDV(Op, Cache, KnownBases);
991 auto OpState = GetStateForBDV(BDV, Op);
992 NewState.meet(OpState);
993 });
994
995 BDVState OldState = States[BDV];
996 if (OldState != NewState) {
997 Progress = true;
998 States[BDV] = NewState;
999 }
1000 }
1001
1002 assert(OldSize == States.size() &&
1003 "fixed point shouldn't be adding any new nodes to state");
1004 }
1005
1006#ifndef NDEBUG
1007 VerifyStates();
1008 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
1009 for (const auto &Pair : States) {
1010 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
1011 }
1012#endif
1013
1014 // Handle all instructions that have a vector BDV, but the instruction itself
1015 // is of scalar type.
1016 for (auto Pair : States) {
1017 Instruction *I = cast<Instruction>(Pair.first);
1018 BDVState State = Pair.second;
1019 auto *BaseValue = State.getBaseValue();
1020 // Only values that do not have known bases or those that have differing
1021 // type (scalar versus vector) from a possible known base should be in the
1022 // lattice.
1023 assert(
1024 (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
1025 "why did it get added?");
1026 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1027
1028 if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
1029 continue;
1030 // extractelement instructions are a bit special in that we may need to
1031 // insert an extract even when we know an exact base for the instruction.
1032 // The problem is that we need to convert from a vector base to a scalar
1033 // base for the particular indice we're interested in.
1034 if (isa<ExtractElementInst>(I)) {
1035 auto *EE = cast<ExtractElementInst>(I);
1036 // TODO: In many cases, the new instruction is just EE itself. We should
1037 // exploit this, but can't do it here since it would break the invariant
1038 // about the BDV not being known to be a base.
1039 auto *BaseInst = ExtractElementInst::Create(
1040 State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
1041 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
1042 States[I] = BDVState(I, BDVState::Base, BaseInst);
1043 setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
1044 } else if (!isa<VectorType>(I->getType())) {
1045 // We need to handle cases that have a vector base but the instruction is
1046 // a scalar type (these could be phis or selects or any instruction that
1047 // are of scalar type, but the base can be a vector type). We
1048 // conservatively set this as conflict. Setting the base value for these
1049 // conflicts is handled in the next loop which traverses States.
1050 States[I] = BDVState(I, BDVState::Conflict);
1051 }
1052 }
1053
1054#ifndef NDEBUG
1055 VerifyStates();
1056#endif
1057
1058 // Insert Phis for all conflicts
1059 // TODO: adjust naming patterns to avoid this order of iteration dependency
1060 for (auto Pair : States) {
1061 Instruction *I = cast<Instruction>(Pair.first);
1062 BDVState State = Pair.second;
1063 // Only values that do not have known bases or those that have differing
1064 // type (scalar versus vector) from a possible known base should be in the
1065 // lattice.
1066 assert((!isKnownBase(I, KnownBases) ||
1067 !areBothVectorOrScalar(I, State.getBaseValue())) &&
1068 "why did it get added?");
1069 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1070
1071 // Since we're joining a vector and scalar base, they can never be the
1072 // same. As a result, we should always see insert element having reached
1073 // the conflict state.
1074 assert(!isa<InsertElementInst>(I) || State.isConflict());
1075
1076 if (!State.isConflict())
1077 continue;
1078
1079 auto getMangledName = [](Instruction *I) -> std::string {
1080 if (isa<PHINode>(I)) {
1081 return suffixed_name_or(I, ".base", "base_phi");
1082 } else if (isa<SelectInst>(I)) {
1083 return suffixed_name_or(I, ".base", "base_select");
1084 } else if (isa<ExtractElementInst>(I)) {
1085 return suffixed_name_or(I, ".base", "base_ee");
1086 } else if (isa<InsertElementInst>(I)) {
1087 return suffixed_name_or(I, ".base", "base_ie");
1088 } else {
1089 return suffixed_name_or(I, ".base", "base_sv");
1090 }
1091 };
1092
1093 Instruction *BaseInst = I->clone();
1094 BaseInst->insertBefore(I);
1095 BaseInst->setName(getMangledName(I));
1096 // Add metadata marking this as a base value
1097 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
1098 States[I] = BDVState(I, BDVState::Conflict, BaseInst);
1099 setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
1100 }
1101
1102#ifndef NDEBUG
1103 VerifyStates();
1104#endif
1105
1106 // Returns a instruction which produces the base pointer for a given
1107 // instruction. The instruction is assumed to be an input to one of the BDVs
1108 // seen in the inference algorithm above. As such, we must either already
1109 // know it's base defining value is a base, or have inserted a new
1110 // instruction to propagate the base of it's BDV and have entered that newly
1111 // introduced instruction into the state table. In either case, we are
1112 // assured to be able to determine an instruction which produces it's base
1113 // pointer.
1114 auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
1115 Value *BDV = findBaseOrBDV(Input, Cache, KnownBases);
1116 Value *Base = nullptr;
1117 if (!States.count(BDV)) {
1118 assert(areBothVectorOrScalar(BDV, Input));
1119 Base = BDV;
1120 } else {
1121 // Either conflict or base.
1122 assert(States.count(BDV));
1123 Base = States[BDV].getBaseValue();
1124 }
1125 assert(Base && "Can't be null");
1126 // The cast is needed since base traversal may strip away bitcasts
1127 if (Base->getType() != Input->getType() && InsertPt)
1128 Base = new BitCastInst(Base, Input->getType(), "cast", InsertPt);
1129 return Base;
1130 };
1131
1132 // Fixup all the inputs of the new PHIs. Visit order needs to be
1133 // deterministic and predictable because we're naming newly created
1134 // instructions.
1135 for (auto Pair : States) {
1136 Instruction *BDV = cast<Instruction>(Pair.first);
1137 BDVState State = Pair.second;
1138
1139 // Only values that do not have known bases or those that have differing
1140 // type (scalar versus vector) from a possible known base should be in the
1141 // lattice.
1142 assert((!isKnownBase(BDV, KnownBases) ||
1143 !areBothVectorOrScalar(BDV, State.getBaseValue())) &&
1144 "why did it get added?");
1145 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1146 if (!State.isConflict())
1147 continue;
1148
1149 if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) {
1150 PHINode *PN = cast<PHINode>(BDV);
1151 const unsigned NumPHIValues = PN->getNumIncomingValues();
1152
1153 // The IR verifier requires phi nodes with multiple entries from the
1154 // same basic block to have the same incoming value for each of those
1155 // entries. Since we're inserting bitcasts in the loop, make sure we
1156 // do so at least once per incoming block.
1157 DenseMap<BasicBlock *, Value*> BlockToValue;
1158 for (unsigned i = 0; i < NumPHIValues; i++) {
1159 Value *InVal = PN->getIncomingValue(i);
1160 BasicBlock *InBB = PN->getIncomingBlock(i);
1161 if (!BlockToValue.count(InBB))
1162 BlockToValue[InBB] = getBaseForInput(InVal, InBB->getTerminator());
1163 else {
1164#ifndef NDEBUG
1165 Value *OldBase = BlockToValue[InBB];
1166 Value *Base = getBaseForInput(InVal, nullptr);
1167
1168 // We can't use `stripPointerCasts` instead of this function because
1169 // `stripPointerCasts` doesn't handle vectors of pointers.
1170 auto StripBitCasts = [](Value *V) -> Value * {
1171 while (auto *BC = dyn_cast<BitCastInst>(V))
1172 V = BC->getOperand(0);
1173 return V;
1174 };
1175 // In essence this assert states: the only way two values
1176 // incoming from the same basic block may be different is by
1177 // being different bitcasts of the same value. A cleanup
1178 // that remains TODO is changing findBaseOrBDV to return an
1179 // llvm::Value of the correct type (and still remain pure).
1180 // This will remove the need to add bitcasts.
1181 assert(StripBitCasts(Base) == StripBitCasts(OldBase) &&
1182 "findBaseOrBDV should be pure!");
1183#endif
1184 }
1185 Value *Base = BlockToValue[InBB];
1186 BasePHI->setIncomingValue(i, Base);
1187 }
1188 } else if (SelectInst *BaseSI =
1189 dyn_cast<SelectInst>(State.getBaseValue())) {
1190 SelectInst *SI = cast<SelectInst>(BDV);
1191
1192 // Find the instruction which produces the base for each input.
1193 // We may need to insert a bitcast.
1194 BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI));
1195 BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI));
1196 } else if (auto *BaseEE =
1197 dyn_cast<ExtractElementInst>(State.getBaseValue())) {
1198 Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
1199 // Find the instruction which produces the base for each input. We may
1200 // need to insert a bitcast.
1201 BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
1202 } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){
1203 auto *BdvIE = cast<InsertElementInst>(BDV);
1204 auto UpdateOperand = [&](int OperandIdx) {
1205 Value *InVal = BdvIE->getOperand(OperandIdx);
1206 Value *Base = getBaseForInput(InVal, BaseIE);
1207 BaseIE->setOperand(OperandIdx, Base);
1208 };
1209 UpdateOperand(0); // vector operand
1210 UpdateOperand(1); // scalar operand
1211 } else {
1212 auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue());
1213 auto *BdvSV = cast<ShuffleVectorInst>(BDV);
1214 auto UpdateOperand = [&](int OperandIdx) {
1215 Value *InVal = BdvSV->getOperand(OperandIdx);
1216 Value *Base = getBaseForInput(InVal, BaseSV);
1217 BaseSV->setOperand(OperandIdx, Base);
1218 };
1219 UpdateOperand(0); // vector operand
1220 if (!BdvSV->isZeroEltSplat())
1221 UpdateOperand(1); // vector operand
1222 else {
1223 // Never read, so just use poison
1224 Value *InVal = BdvSV->getOperand(1);
1225 BaseSV->setOperand(1, PoisonValue::get(InVal->getType()));
1226 }
1227 }
1228 }
1229
1230#ifndef NDEBUG
1231 VerifyStates();
1232#endif
1233
1234 // Cache all of our results so we can cheaply reuse them
1235 // NOTE: This is actually two caches: one of the base defining value
1236 // relation and one of the base pointer relation! FIXME
1237 for (auto Pair : States) {
1238 auto *BDV = Pair.first;
1239 Value *Base = Pair.second.getBaseValue();
1240 assert(BDV && Base);
1241 // Only values that do not have known bases or those that have differing
1242 // type (scalar versus vector) from a possible known base should be in the
1243 // lattice.
1244 assert(
1245 (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) &&
1246 "why did it get added?");
1247
1248 LLVM_DEBUG(
1249 dbgs() << "Updating base value cache"
1250 << " for: " << BDV->getName() << " from: "
1251 << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none")
1252 << " to: " << Base->getName() << "\n");
1253
1254 Cache[BDV] = Base;
1255 }
1256 assert(Cache.count(Def));
1257 return Cache[Def];
1258}
1259
1260// For a set of live pointers (base and/or derived), identify the base
1261// pointer of the object which they are derived from. This routine will
1262// mutate the IR graph as needed to make the 'base' pointer live at the
1263// definition site of 'derived'. This ensures that any use of 'derived' can
1264// also use 'base'. This may involve the insertion of a number of
1265// additional PHI nodes.
1266//
1267// preconditions: live is a set of pointer type Values
1268//
1269// side effects: may insert PHI nodes into the existing CFG, will preserve
1270// CFG, will not remove or mutate any existing nodes
1271//
1272// post condition: PointerToBase contains one (derived, base) pair for every
1273// pointer in live. Note that derived can be equal to base if the original
1274// pointer was a base pointer.
1275static void findBasePointers(const StatepointLiveSetTy &live,
1276 PointerToBaseTy &PointerToBase, DominatorTree *DT,
1277 DefiningValueMapTy &DVCache,
1278 IsKnownBaseMapTy &KnownBases) {
1279 for (Value *ptr : live) {
1280 Value *base = findBasePointer(ptr, DVCache, KnownBases);
1281 assert(base && "failed to find base pointer");
1282 PointerToBase[ptr] = base;
1283 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
1284 DT->dominates(cast<Instruction>(base)->getParent(),
1285 cast<Instruction>(ptr)->getParent())) &&
1286 "The base we found better dominate the derived pointer");
1287 }
1288}
1289
1290/// Find the required based pointers (and adjust the live set) for the given
1291/// parse point.
1292static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
1293 CallBase *Call,
1294 PartiallyConstructedSafepointRecord &result,
1295 PointerToBaseTy &PointerToBase,
1296 IsKnownBaseMapTy &KnownBases) {
1297 StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
1298 // We assume that all pointers passed to deopt are base pointers; as an
1299 // optimization, we can use this to avoid seperately materializing the base
1300 // pointer graph. This is only relevant since we're very conservative about
1301 // generating new conflict nodes during base pointer insertion. If we were
1302 // smarter there, this would be irrelevant.
1303 if (auto Opt = Call->getOperandBundle(LLVMContext::OB_deopt))
1304 for (Value *V : Opt->Inputs) {
1305 if (!PotentiallyDerivedPointers.count(V))
1306 continue;
1307 PotentiallyDerivedPointers.remove(V);
1308 PointerToBase[V] = V;
1309 }
1310 findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache,
1311 KnownBases);
1312}
1313
1314/// Given an updated version of the dataflow liveness results, update the
1315/// liveset and base pointer maps for the call site CS.
1316static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1317 CallBase *Call,
1318 PartiallyConstructedSafepointRecord &result,
1319 PointerToBaseTy &PointerToBase,
1320 GCStrategy *GC);
1321
1325 PointerToBaseTy &PointerToBase, GCStrategy *GC) {
1326 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1327 // again. The old values are still live and will help it stabilize quickly.
1328 GCPtrLivenessData RevisedLivenessData;
1329 computeLiveInValues(DT, F, RevisedLivenessData, GC);
1330 for (size_t i = 0; i < records.size(); i++) {
1331 struct PartiallyConstructedSafepointRecord &info = records[i];
1332 recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, PointerToBase,
1333 GC);
1334 }
1335}
1336
1337// Utility function which clones all instructions from "ChainToBase"
1338// and inserts them before "InsertBefore". Returns rematerialized value
1339// which should be used after statepoint.
1341 Instruction *InsertBefore,
1342 Value *RootOfChain,
1343 Value *AlternateLiveBase) {
1344 Instruction *LastClonedValue = nullptr;
1345 Instruction *LastValue = nullptr;
1346 // Walk backwards to visit top-most instructions first.
1347 for (Instruction *Instr :
1348 make_range(ChainToBase.rbegin(), ChainToBase.rend())) {
1349 // Only GEP's and casts are supported as we need to be careful to not
1350 // introduce any new uses of pointers not in the liveset.
1351 // Note that it's fine to introduce new uses of pointers which were
1352 // otherwise not used after this statepoint.
1353 assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
1354
1355 Instruction *ClonedValue = Instr->clone();
1356 ClonedValue->insertBefore(InsertBefore);
1357 ClonedValue->setName(Instr->getName() + ".remat");
1358
1359 // If it is not first instruction in the chain then it uses previously
1360 // cloned value. We should update it to use cloned value.
1361 if (LastClonedValue) {
1362 assert(LastValue);
1363 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
1364#ifndef NDEBUG
1365 for (auto *OpValue : ClonedValue->operand_values()) {
1366 // Assert that cloned instruction does not use any instructions from
1367 // this chain other than LastClonedValue
1368 assert(!is_contained(ChainToBase, OpValue) &&
1369 "incorrect use in rematerialization chain");
1370 // Assert that the cloned instruction does not use the RootOfChain
1371 // or the AlternateLiveBase.
1372 assert(OpValue != RootOfChain && OpValue != AlternateLiveBase);
1373 }
1374#endif
1375 } else {
1376 // For the first instruction, replace the use of unrelocated base i.e.
1377 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
1378 // live set. They have been proved to be the same PHI nodes. Note
1379 // that the *only* use of the RootOfChain in the ChainToBase list is
1380 // the first Value in the list.
1381 if (RootOfChain != AlternateLiveBase)
1382 ClonedValue->replaceUsesOfWith(RootOfChain, AlternateLiveBase);
1383 }
1384
1385 LastClonedValue = ClonedValue;
1386 LastValue = Instr;
1387 }
1388 assert(LastClonedValue);
1389 return LastClonedValue;
1390}
1391
1392// When inserting gc.relocate and gc.result calls, we need to ensure there are
1393// no uses of the original value / return value between the gc.statepoint and
1394// the gc.relocate / gc.result call. One case which can arise is a phi node
1395// starting one of the successor blocks. We also need to be able to insert the
1396// gc.relocates only on the path which goes through the statepoint. We might
1397// need to split an edge to make this possible.
1398static BasicBlock *
1400 DominatorTree &DT) {
1401 BasicBlock *Ret = BB;
1402 if (!BB->getUniquePredecessor())
1403 Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
1404
1405 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1406 // from it
1408 assert(!isa<PHINode>(Ret->begin()) &&
1409 "All PHI nodes should have been removed!");
1410
1411 // At this point, we can safely insert a gc.relocate or gc.result as the first
1412 // instruction in Ret if needed.
1413 return Ret;
1414}
1415
1416// List of all function attributes which must be stripped when lowering from
1417// abstract machine model to physical machine model. Essentially, these are
1418// all the effects a safepoint might have which we ignored in the abstract
1419// machine model for purposes of optimization. We have to strip these on
1420// both function declarations and call sites.
1422 {Attribute::Memory, Attribute::NoSync, Attribute::NoFree};
1423
1424// Create new attribute set containing only attributes which can be transferred
1425// from original call to the safepoint.
1427 AttributeList OrigAL,
1428 AttributeList StatepointAL) {
1429 if (OrigAL.isEmpty())
1430 return StatepointAL;
1431
1432 // Remove the readonly, readnone, and statepoint function attributes.
1433 AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
1434 for (auto Attr : FnAttrsToStrip)
1435 FnAttrs.removeAttribute(Attr);
1436
1437 for (Attribute A : OrigAL.getFnAttrs()) {
1439 FnAttrs.removeAttribute(A);
1440 }
1441
1442 // Just skip parameter and return attributes for now
1443 return StatepointAL.addFnAttributes(Ctx, FnAttrs);
1444}
1445
1446/// Helper function to place all gc relocates necessary for the given
1447/// statepoint.
1448/// Inputs:
1449/// liveVariables - list of variables to be relocated.
1450/// basePtrs - base pointers.
1451/// statepointToken - statepoint instruction to which relocates should be
1452/// bound.
1453/// Builder - Llvm IR builder to be used to construct new calls.
1455 ArrayRef<Value *> BasePtrs,
1456 Instruction *StatepointToken,
1457 IRBuilder<> &Builder, GCStrategy *GC) {
1458 if (LiveVariables.empty())
1459 return;
1460
1461 auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
1462 auto ValIt = llvm::find(LiveVec, Val);
1463 assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
1464 size_t Index = std::distance(LiveVec.begin(), ValIt);
1465 assert(Index < LiveVec.size() && "Bug in std::find?");
1466 return Index;
1467 };
1468 Module *M = StatepointToken->getModule();
1469
1470 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1471 // element type is i8 addrspace(1)*). We originally generated unique
1472 // declarations for each pointer type, but this proved problematic because
1473 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1474 // towards a single unified pointer type anyways, we can just cast everything
1475 // to an i8* of the right address space. A bitcast is added later to convert
1476 // gc_relocate to the actual value's type.
1477 auto getGCRelocateDecl = [&](Type *Ty) {
1479 auto AS = Ty->getScalarType()->getPointerAddressSpace();
1480 Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
1481 if (auto *VT = dyn_cast<VectorType>(Ty))
1482 NewTy = FixedVectorType::get(NewTy,
1483 cast<FixedVectorType>(VT)->getNumElements());
1484 return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
1485 {NewTy});
1486 };
1487
1488 // Lazily populated map from input types to the canonicalized form mentioned
1489 // in the comment above. This should probably be cached somewhere more
1490 // broadly.
1491 DenseMap<Type *, Function *> TypeToDeclMap;
1492
1493 for (unsigned i = 0; i < LiveVariables.size(); i++) {
1494 // Generate the gc.relocate call and save the result
1495 Value *BaseIdx = Builder.getInt32(FindIndex(LiveVariables, BasePtrs[i]));
1496 Value *LiveIdx = Builder.getInt32(i);
1497
1498 Type *Ty = LiveVariables[i]->getType();
1499 if (!TypeToDeclMap.count(Ty))
1500 TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
1501 Function *GCRelocateDecl = TypeToDeclMap[Ty];
1502
1503 // only specify a debug name if we can give a useful one
1504 CallInst *Reloc = Builder.CreateCall(
1505 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1506 suffixed_name_or(LiveVariables[i], ".relocated", ""));
1507 // Trick CodeGen into thinking there are lots of free registers at this
1508 // fake call.
1510 }
1511}
1512
1513namespace {
1514
1515/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1516/// avoids having to worry about keeping around dangling pointers to Values.
1517class DeferredReplacement {
1520 bool IsDeoptimize = false;
1521
1522 DeferredReplacement() = default;
1523
1524public:
1525 static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
1526 assert(Old != New && Old && New &&
1527 "Cannot RAUW equal values or to / from null!");
1528
1529 DeferredReplacement D;
1530 D.Old = Old;
1531 D.New = New;
1532 return D;
1533 }
1534
1535 static DeferredReplacement createDelete(Instruction *ToErase) {
1536 DeferredReplacement D;
1537 D.Old = ToErase;
1538 return D;
1539 }
1540
1541 static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
1542#ifndef NDEBUG
1543 auto *F = cast<CallInst>(Old)->getCalledFunction();
1544 assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
1545 "Only way to construct a deoptimize deferred replacement");
1546#endif
1547 DeferredReplacement D;
1548 D.Old = Old;
1549 D.IsDeoptimize = true;
1550 return D;
1551 }
1552
1553 /// Does the task represented by this instance.
1554 void doReplacement() {
1555 Instruction *OldI = Old;
1556 Instruction *NewI = New;
1557
1558 assert(OldI != NewI && "Disallowed at construction?!");
1559 assert((!IsDeoptimize || !New) &&
1560 "Deoptimize intrinsics are not replaced!");
1561
1562 Old = nullptr;
1563 New = nullptr;
1564
1565 if (NewI)
1566 OldI->replaceAllUsesWith(NewI);
1567
1568 if (IsDeoptimize) {
1569 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1570 // not necessarily be followed by the matching return.
1571 auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
1572 new UnreachableInst(RI->getContext(), RI);
1573 RI->eraseFromParent();
1574 }
1575
1576 OldI->eraseFromParent();
1577 }
1578};
1579
1580} // end anonymous namespace
1581
1583 const char *DeoptLowering = "deopt-lowering";
1584 if (Call->hasFnAttr(DeoptLowering)) {
1585 // FIXME: Calls have a *really* confusing interface around attributes
1586 // with values.
1587 const AttributeList &CSAS = Call->getAttributes();
1588 if (CSAS.hasFnAttr(DeoptLowering))
1589 return CSAS.getFnAttr(DeoptLowering).getValueAsString();
1590 Function *F = Call->getCalledFunction();
1591 assert(F && F->hasFnAttribute(DeoptLowering));
1592 return F->getFnAttribute(DeoptLowering).getValueAsString();
1593 }
1594 return "live-through";
1595}
1596
1597static void
1599 const SmallVectorImpl<Value *> &BasePtrs,
1601 PartiallyConstructedSafepointRecord &Result,
1602 std::vector<DeferredReplacement> &Replacements,
1603 const PointerToBaseTy &PointerToBase,
1604 GCStrategy *GC) {
1605 assert(BasePtrs.size() == LiveVariables.size());
1606
1607 // Then go ahead and use the builder do actually do the inserts. We insert
1608 // immediately before the previous instruction under the assumption that all
1609 // arguments will be available here. We can't insert afterwards since we may
1610 // be replacing a terminator.
1611 IRBuilder<> Builder(Call);
1612
1615 uint32_t NumPatchBytes = 0;
1617
1618 SmallVector<Value *, 8> CallArgs(Call->args());
1619 std::optional<ArrayRef<Use>> DeoptArgs;
1620 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt))
1621 DeoptArgs = Bundle->Inputs;
1622 std::optional<ArrayRef<Use>> TransitionArgs;
1623 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_gc_transition)) {
1624 TransitionArgs = Bundle->Inputs;
1625 // TODO: This flag no longer serves a purpose and can be removed later
1627 }
1628
1629 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1630 // with a return value, we lower then as never returning calls to
1631 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1632 bool IsDeoptimize = false;
1633
1635 parseStatepointDirectivesFromAttrs(Call->getAttributes());
1636 if (SD.NumPatchBytes)
1637 NumPatchBytes = *SD.NumPatchBytes;
1638 if (SD.StatepointID)
1639 StatepointID = *SD.StatepointID;
1640
1641 // Pass through the requested lowering if any. The default is live-through.
1642 StringRef DeoptLowering = getDeoptLowering(Call);
1643 if (DeoptLowering.equals("live-in"))
1645 else {
1646 assert(DeoptLowering.equals("live-through") && "Unsupported value!");
1647 }
1648
1649 FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
1650 if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) {
1651 auto IID = F->getIntrinsicID();
1652 if (IID == Intrinsic::experimental_deoptimize) {
1653 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1654 // __llvm_deoptimize symbol. We want to resolve this now, since the
1655 // verifier does not allow taking the address of an intrinsic function.
1656
1657 SmallVector<Type *, 8> DomainTy;
1658 for (Value *Arg : CallArgs)
1659 DomainTy.push_back(Arg->getType());
1660 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1661 /* isVarArg = */ false);
1662
1663 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1664 // calls to @llvm.experimental.deoptimize with different argument types in
1665 // the same module. This is fine -- we assume the frontend knew what it
1666 // was doing when generating this kind of IR.
1667 CallTarget = F->getParent()
1668 ->getOrInsertFunction("__llvm_deoptimize", FTy);
1669
1670 IsDeoptimize = true;
1671 } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
1672 IID == Intrinsic::memmove_element_unordered_atomic) {
1673 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1674 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1675 // Specifically, these calls should be lowered to the
1676 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1677 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1678 // verifier does not allow taking the address of an intrinsic function.
1679 //
1680 // Moreover we need to shuffle the arguments for the call in order to
1681 // accommodate GC. The underlying source and destination objects might be
1682 // relocated during copy operation should the GC occur. To relocate the
1683 // derived source and destination pointers the implementation of the
1684 // intrinsic should know the corresponding base pointers.
1685 //
1686 // To make the base pointers available pass them explicitly as arguments:
1687 // memcpy(dest_derived, source_derived, ...) =>
1688 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1689 auto &Context = Call->getContext();
1690 auto &DL = Call->getModule()->getDataLayout();
1691 auto GetBaseAndOffset = [&](Value *Derived) {
1692 Value *Base = nullptr;
1693 // Optimizations in unreachable code might substitute the real pointer
1694 // with undef, poison or null-derived constant. Return null base for
1695 // them to be consistent with the handling in the main algorithm in
1696 // findBaseDefiningValue.
1697 if (isa<Constant>(Derived))
1698 Base =
1699 ConstantPointerNull::get(cast<PointerType>(Derived->getType()));
1700 else {
1701 assert(PointerToBase.count(Derived));
1702 Base = PointerToBase.find(Derived)->second;
1703 }
1704 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
1705 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
1706 Value *Base_int = Builder.CreatePtrToInt(
1707 Base, Type::getIntNTy(Context, IntPtrSize));
1708 Value *Derived_int = Builder.CreatePtrToInt(
1709 Derived, Type::getIntNTy(Context, IntPtrSize));
1710 return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
1711 };
1712
1713 auto *Dest = CallArgs[0];
1714 Value *DestBase, *DestOffset;
1715 std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
1716
1717 auto *Source = CallArgs[1];
1718 Value *SourceBase, *SourceOffset;
1719 std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
1720
1721 auto *LengthInBytes = CallArgs[2];
1722 auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
1723
1724 CallArgs.clear();
1725 CallArgs.push_back(DestBase);
1726 CallArgs.push_back(DestOffset);
1727 CallArgs.push_back(SourceBase);
1728 CallArgs.push_back(SourceOffset);
1729 CallArgs.push_back(LengthInBytes);
1730
1731 SmallVector<Type *, 8> DomainTy;
1732 for (Value *Arg : CallArgs)
1733 DomainTy.push_back(Arg->getType());
1734 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1735 /* isVarArg = */ false);
1736
1737 auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
1738 uint64_t ElementSize = ElementSizeCI->getZExtValue();
1739 if (IID == Intrinsic::memcpy_element_unordered_atomic) {
1740 switch (ElementSize) {
1741 case 1:
1742 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1743 case 2:
1744 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1745 case 4:
1746 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1747 case 8:
1748 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1749 case 16:
1750 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1751 default:
1752 llvm_unreachable("unexpected element size!");
1753 }
1754 }
1755 assert(IID == Intrinsic::memmove_element_unordered_atomic);
1756 switch (ElementSize) {
1757 case 1:
1758 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1759 case 2:
1760 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1761 case 4:
1762 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1763 case 8:
1764 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1765 case 16:
1766 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1767 default:
1768 llvm_unreachable("unexpected element size!");
1769 }
1770 };
1771
1772 CallTarget =
1773 F->getParent()
1774 ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
1775 }
1776 }
1777
1778 // Create the statepoint given all the arguments
1779 GCStatepointInst *Token = nullptr;
1780 if (auto *CI = dyn_cast<CallInst>(Call)) {
1781 CallInst *SPCall = Builder.CreateGCStatepointCall(
1782 StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1783 TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
1784
1785 SPCall->setTailCallKind(CI->getTailCallKind());
1786 SPCall->setCallingConv(CI->getCallingConv());
1787
1788 // Currently we will fail on parameter attributes and on certain
1789 // function attributes. In case if we can handle this set of attributes -
1790 // set up function attrs directly on statepoint and return attrs later for
1791 // gc_result intrinsic.
1793 CI->getContext(), CI->getAttributes(), SPCall->getAttributes()));
1794
1795 Token = cast<GCStatepointInst>(SPCall);
1796
1797 // Put the following gc_result and gc_relocate calls immediately after the
1798 // the old call (which we're about to delete)
1799 assert(CI->getNextNode() && "Not a terminator, must have next!");
1800 Builder.SetInsertPoint(CI->getNextNode());
1801 Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
1802 } else {
1803 auto *II = cast<InvokeInst>(Call);
1804
1805 // Insert the new invoke into the old block. We'll remove the old one in a
1806 // moment at which point this will become the new terminator for the
1807 // original block.
1808 InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
1809 StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(),
1810 II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs,
1811 "statepoint_token");
1812
1813 SPInvoke->setCallingConv(II->getCallingConv());
1814
1815 // Currently we will fail on parameter attributes and on certain
1816 // function attributes. In case if we can handle this set of attributes -
1817 // set up function attrs directly on statepoint and return attrs later for
1818 // gc_result intrinsic.
1820 II->getContext(), II->getAttributes(), SPInvoke->getAttributes()));
1821
1822 Token = cast<GCStatepointInst>(SPInvoke);
1823
1824 // Generate gc relocates in exceptional path
1825 BasicBlock *UnwindBlock = II->getUnwindDest();
1826 assert(!isa<PHINode>(UnwindBlock->begin()) &&
1827 UnwindBlock->getUniquePredecessor() &&
1828 "can't safely insert in this block!");
1829
1830 Builder.SetInsertPoint(UnwindBlock, UnwindBlock->getFirstInsertionPt());
1831 Builder.SetCurrentDebugLocation(II->getDebugLoc());
1832
1833 // Attach exceptional gc relocates to the landingpad.
1834 Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
1835 Result.UnwindToken = ExceptionalToken;
1836
1837 CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder, GC);
1838
1839 // Generate gc relocates and returns for normal block
1840 BasicBlock *NormalDest = II->getNormalDest();
1841 assert(!isa<PHINode>(NormalDest->begin()) &&
1842 NormalDest->getUniquePredecessor() &&
1843 "can't safely insert in this block!");
1844
1845 Builder.SetInsertPoint(NormalDest, NormalDest->getFirstInsertionPt());
1846
1847 // gc relocates will be generated later as if it were regular call
1848 // statepoint
1849 }
1850 assert(Token && "Should be set in one of the above branches!");
1851
1852 if (IsDeoptimize) {
1853 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1854 // transform the tail-call like structure to a call to a void function
1855 // followed by unreachable to get better codegen.
1856 Replacements.push_back(
1857 DeferredReplacement::createDeoptimizeReplacement(Call));
1858 } else {
1859 Token->setName("statepoint_token");
1860 if (!Call->getType()->isVoidTy() && !Call->use_empty()) {
1861 StringRef Name = Call->hasName() ? Call->getName() : "";
1862 CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
1863 GCResult->setAttributes(
1865 Call->getAttributes().getRetAttrs()));
1866
1867 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1868 // live set of some other safepoint, in which case that safepoint's
1869 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1870 // llvm::Instruction. Instead, we defer the replacement and deletion to
1871 // after the live sets have been made explicit in the IR, and we no longer
1872 // have raw pointers to worry about.
1873 Replacements.emplace_back(
1874 DeferredReplacement::createRAUW(Call, GCResult));
1875 } else {
1876 Replacements.emplace_back(DeferredReplacement::createDelete(Call));
1877 }
1878 }
1879
1880 Result.StatepointToken = Token;
1881
1882 // Second, create a gc.relocate for every live variable
1883 CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder, GC);
1884}
1885
1886// Replace an existing gc.statepoint with a new one and a set of gc.relocates
1887// which make the relocations happening at this safepoint explicit.
1888//
1889// WARNING: Does not do any fixup to adjust users of the original live
1890// values. That's the callers responsibility.
1891static void
1893 PartiallyConstructedSafepointRecord &Result,
1894 std::vector<DeferredReplacement> &Replacements,
1895 const PointerToBaseTy &PointerToBase, GCStrategy *GC) {
1896 const auto &LiveSet = Result.LiveSet;
1897
1898 // Convert to vector for efficient cross referencing.
1899 SmallVector<Value *, 64> BaseVec, LiveVec;
1900 LiveVec.reserve(LiveSet.size());
1901 BaseVec.reserve(LiveSet.size());
1902 for (Value *L : LiveSet) {
1903 LiveVec.push_back(L);
1904 assert(PointerToBase.count(L));
1905 Value *Base = PointerToBase.find(L)->second;
1906 BaseVec.push_back(Base);
1907 }
1908 assert(LiveVec.size() == BaseVec.size());
1909
1910 // Do the actual rewriting and delete the old statepoint
1911 makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
1912 PointerToBase, GC);
1913}
1914
1915// Helper function for the relocationViaAlloca.
1916//
1917// It receives iterator to the statepoint gc relocates and emits a store to the
1918// assigned location (via allocaMap) for the each one of them. It adds the
1919// visited values into the visitedLiveValues set, which we will later use them
1920// for validation checking.
1921static void
1924 DenseSet<Value *> &VisitedLiveValues) {
1925 for (User *U : GCRelocs) {
1926 GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
1927 if (!Relocate)
1928 continue;
1929
1930 Value *OriginalValue = Relocate->getDerivedPtr();
1931 assert(AllocaMap.count(OriginalValue));
1932 Value *Alloca = AllocaMap[OriginalValue];
1933
1934 // Emit store into the related alloca
1935 // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
1936 // the correct type according to alloca.
1937 assert(Relocate->getNextNode() &&
1938 "Should always have one since it's not a terminator");
1939 IRBuilder<> Builder(Relocate->getNextNode());
1940 Value *CastedRelocatedValue =
1941 Builder.CreateBitCast(Relocate,
1942 cast<AllocaInst>(Alloca)->getAllocatedType(),
1943 suffixed_name_or(Relocate, ".casted", ""));
1944
1945 new StoreInst(CastedRelocatedValue, Alloca,
1946 cast<Instruction>(CastedRelocatedValue)->getNextNode());
1947
1948#ifndef NDEBUG
1949 VisitedLiveValues.insert(OriginalValue);
1950#endif
1951 }
1952}
1953
1954// Helper function for the "relocationViaAlloca". Similar to the
1955// "insertRelocationStores" but works for rematerialized values.
1957 const RematerializedValueMapTy &RematerializedValues,
1959 DenseSet<Value *> &VisitedLiveValues) {
1960 for (auto RematerializedValuePair: RematerializedValues) {
1961 Instruction *RematerializedValue = RematerializedValuePair.first;
1962 Value *OriginalValue = RematerializedValuePair.second;
1963
1964 assert(AllocaMap.count(OriginalValue) &&
1965 "Can not find alloca for rematerialized value");
1966 Value *Alloca = AllocaMap[OriginalValue];
1967
1968 new StoreInst(RematerializedValue, Alloca,
1969 RematerializedValue->getNextNode());
1970
1971#ifndef NDEBUG
1972 VisitedLiveValues.insert(OriginalValue);
1973#endif
1974 }
1975}
1976
1977/// Do all the relocation update via allocas and mem2reg
1981#ifndef NDEBUG
1982 // record initial number of (static) allocas; we'll check we have the same
1983 // number when we get done.
1984 int InitialAllocaNum = 0;
1985 for (Instruction &I : F.getEntryBlock())
1986 if (isa<AllocaInst>(I))
1987 InitialAllocaNum++;
1988#endif
1989
1990 // TODO-PERF: change data structures, reserve
1992 SmallVector<AllocaInst *, 200> PromotableAllocas;
1993 // Used later to chack that we have enough allocas to store all values
1994 std::size_t NumRematerializedValues = 0;
1995 PromotableAllocas.reserve(Live.size());
1996
1997 // Emit alloca for "LiveValue" and record it in "allocaMap" and
1998 // "PromotableAllocas"
1999 const DataLayout &DL = F.getParent()->getDataLayout();
2000 auto emitAllocaFor = [&](Value *LiveValue) {
2001 AllocaInst *Alloca = new AllocaInst(LiveValue->getType(),
2002 DL.getAllocaAddrSpace(), "",
2003 F.getEntryBlock().getFirstNonPHI());
2004 AllocaMap[LiveValue] = Alloca;
2005 PromotableAllocas.push_back(Alloca);
2006 };
2007
2008 // Emit alloca for each live gc pointer
2009 for (Value *V : Live)
2010 emitAllocaFor(V);
2011
2012 // Emit allocas for rematerialized values
2013 for (const auto &Info : Records)
2014 for (auto RematerializedValuePair : Info.RematerializedValues) {
2015 Value *OriginalValue = RematerializedValuePair.second;
2016 if (AllocaMap.count(OriginalValue) != 0)
2017 continue;
2018
2019 emitAllocaFor(OriginalValue);
2020 ++NumRematerializedValues;
2021 }
2022
2023 // The next two loops are part of the same conceptual operation. We need to
2024 // insert a store to the alloca after the original def and at each
2025 // redefinition. We need to insert a load before each use. These are split
2026 // into distinct loops for performance reasons.
2027
2028 // Update gc pointer after each statepoint: either store a relocated value or
2029 // null (if no relocated value was found for this gc pointer and it is not a
2030 // gc_result). This must happen before we update the statepoint with load of
2031 // alloca otherwise we lose the link between statepoint and old def.
2032 for (const auto &Info : Records) {
2033 Value *Statepoint = Info.StatepointToken;
2034
2035 // This will be used for consistency check
2036 DenseSet<Value *> VisitedLiveValues;
2037
2038 // Insert stores for normal statepoint gc relocates
2039 insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
2040
2041 // In case if it was invoke statepoint
2042 // we will insert stores for exceptional path gc relocates.
2043 if (isa<InvokeInst>(Statepoint)) {
2044 insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
2045 VisitedLiveValues);
2046 }
2047
2048 // Do similar thing with rematerialized values
2049 insertRematerializationStores(Info.RematerializedValues, AllocaMap,
2050 VisitedLiveValues);
2051
2052 if (ClobberNonLive) {
2053 // As a debugging aid, pretend that an unrelocated pointer becomes null at
2054 // the gc.statepoint. This will turn some subtle GC problems into
2055 // slightly easier to debug SEGVs. Note that on large IR files with
2056 // lots of gc.statepoints this is extremely costly both memory and time
2057 // wise.
2059 for (auto Pair : AllocaMap) {
2060 Value *Def = Pair.first;
2061 AllocaInst *Alloca = Pair.second;
2062
2063 // This value was relocated
2064 if (VisitedLiveValues.count(Def)) {
2065 continue;
2066 }
2067 ToClobber.push_back(Alloca);
2068 }
2069
2070 auto InsertClobbersAt = [&](Instruction *IP) {
2071 for (auto *AI : ToClobber) {
2072 auto AT = AI->getAllocatedType();
2073 Constant *CPN;
2074 if (AT->isVectorTy())
2076 else
2077 CPN = ConstantPointerNull::get(cast<PointerType>(AT));
2078 new StoreInst(CPN, AI, IP);
2079 }
2080 };
2081
2082 // Insert the clobbering stores. These may get intermixed with the
2083 // gc.results and gc.relocates, but that's fine.
2084 if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
2085 InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
2086 InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
2087 } else {
2088 InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
2089 }
2090 }
2091 }
2092
2093 // Update use with load allocas and add store for gc_relocated.
2094 for (auto Pair : AllocaMap) {
2095 Value *Def = Pair.first;
2096 AllocaInst *Alloca = Pair.second;
2097
2098 // We pre-record the uses of allocas so that we dont have to worry about
2099 // later update that changes the user information..
2100
2102 // PERF: trade a linear scan for repeated reallocation
2103 Uses.reserve(Def->getNumUses());
2104 for (User *U : Def->users()) {
2105 if (!isa<ConstantExpr>(U)) {
2106 // If the def has a ConstantExpr use, then the def is either a
2107 // ConstantExpr use itself or null. In either case
2108 // (recursively in the first, directly in the second), the oop
2109 // it is ultimately dependent on is null and this particular
2110 // use does not need to be fixed up.
2111 Uses.push_back(cast<Instruction>(U));
2112 }
2113 }
2114
2116 auto Last = std::unique(Uses.begin(), Uses.end());
2117 Uses.erase(Last, Uses.end());
2118
2119 for (Instruction *Use : Uses) {
2120 if (isa<PHINode>(Use)) {
2121 PHINode *Phi = cast<PHINode>(Use);
2122 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
2123 if (Def == Phi->getIncomingValue(i)) {
2124 LoadInst *Load =
2125 new LoadInst(Alloca->getAllocatedType(), Alloca, "",
2126 Phi->getIncomingBlock(i)->getTerminator());
2127 Phi->setIncomingValue(i, Load);
2128 }
2129 }
2130 } else {
2131 LoadInst *Load =
2132 new LoadInst(Alloca->getAllocatedType(), Alloca, "", Use);
2133 Use->replaceUsesOfWith(Def, Load);
2134 }
2135 }
2136
2137 // Emit store for the initial gc value. Store must be inserted after load,
2138 // otherwise store will be in alloca's use list and an extra load will be
2139 // inserted before it.
2140 StoreInst *Store = new StoreInst(Def, Alloca, /*volatile*/ false,
2141 DL.getABITypeAlign(Def->getType()));
2142 if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
2143 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
2144 // InvokeInst is a terminator so the store need to be inserted into its
2145 // normal destination block.
2146 BasicBlock *NormalDest = Invoke->getNormalDest();
2147 Store->insertBefore(NormalDest->getFirstNonPHI());
2148 } else {
2149 assert(!Inst->isTerminator() &&
2150 "The only terminator that can produce a value is "
2151 "InvokeInst which is handled above.");
2152 Store->insertAfter(Inst);
2153 }
2154 } else {
2155 assert(isa<Argument>(Def));
2156 Store->insertAfter(cast<Instruction>(Alloca));
2157 }
2158 }
2159
2160 assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
2161 "we must have the same allocas with lives");
2162 (void) NumRematerializedValues;
2163 if (!PromotableAllocas.empty()) {
2164 // Apply mem2reg to promote alloca to SSA
2165 PromoteMemToReg(PromotableAllocas, DT);
2166 }
2167
2168#ifndef NDEBUG
2169 for (auto &I : F.getEntryBlock())
2170 if (isa<AllocaInst>(I))
2171 InitialAllocaNum--;
2172 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
2173#endif
2174}
2175
2176/// Implement a unique function which doesn't require we sort the input
2177/// vector. Doing so has the effect of changing the output of a couple of
2178/// tests in ways which make them less useful in testing fused safepoints.
2179template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
2180 SmallSet<T, 8> Seen;
2181 erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; });
2182}
2183
2184/// Insert holders so that each Value is obviously live through the entire
2185/// lifetime of the call.
2186static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
2187 SmallVectorImpl<CallInst *> &Holders) {
2188 if (Values.empty())
2189 // No values to hold live, might as well not insert the empty holder
2190 return;
2191
2192 Module *M = Call->getModule();
2193 // Use a dummy vararg function to actually hold the values live
2194 FunctionCallee Func = M->getOrInsertFunction(
2195 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true));
2196 if (isa<CallInst>(Call)) {
2197 // For call safepoints insert dummy calls right after safepoint
2198 Holders.push_back(
2199 CallInst::Create(Func, Values, "", &*++Call->getIterator()));
2200 return;
2201 }
2202 // For invoke safepooints insert dummy calls both in normal and
2203 // exceptional destination blocks
2204 auto *II = cast<InvokeInst>(Call);
2206 Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
2208 Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
2209}
2210
2214 GCStrategy *GC) {
2215 GCPtrLivenessData OriginalLivenessData;
2216 computeLiveInValues(DT, F, OriginalLivenessData, GC);
2217 for (size_t i = 0; i < records.size(); i++) {
2218 struct PartiallyConstructedSafepointRecord &info = records[i];
2219 analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info, GC);
2220 }
2221}
2222
2223// Helper function for the "rematerializeLiveValues". It walks use chain
2224// starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2225// the base or a value it cannot process. Only "simple" values are processed
2226// (currently it is GEP's and casts). The returned root is examined by the
2227// callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2228// with all visited values.
2230 SmallVectorImpl<Instruction*> &ChainToBase,
2231 Value *CurrentValue) {
2232 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
2233 ChainToBase.push_back(GEP);
2234 return findRematerializableChainToBasePointer(ChainToBase,
2235 GEP->getPointerOperand());
2236 }
2237
2238 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
2239 if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
2240 return CI;
2241
2242 ChainToBase.push_back(CI);
2243 return findRematerializableChainToBasePointer(ChainToBase,
2244 CI->getOperand(0));
2245 }
2246
2247 // We have reached the root of the chain, which is either equal to the base or
2248 // is the first unsupported value along the use chain.
2249 return CurrentValue;
2250}
2251
2252// Helper function for the "rematerializeLiveValues". Compute cost of the use
2253// chain we are going to rematerialize.
2254static InstructionCost
2258
2259 for (Instruction *Instr : Chain) {
2260 if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
2261 assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
2262 "non noop cast is found during rematerialization");
2263
2264 Type *SrcTy = CI->getOperand(0)->getType();
2265 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
2268
2269 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
2270 // Cost of the address calculation
2271 Type *ValTy = GEP->getSourceElementType();
2273
2274 // And cost of the GEP itself
2275 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2276 // allowed for the external usage)
2277 if (!GEP->hasAllConstantIndices())
2278 Cost += 2;
2279
2280 } else {
2281 llvm_unreachable("unsupported instruction type during rematerialization");
2282 }
2283 }
2284
2285 return Cost;
2286}
2287
2288static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
2289 unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
2290 if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
2291 OrigRootPhi.getParent() != AlternateRootPhi.getParent())
2292 return false;
2293 // Map of incoming values and their corresponding basic blocks of
2294 // OrigRootPhi.
2295 SmallDenseMap<Value *, BasicBlock *, 8> CurrentIncomingValues;
2296 for (unsigned i = 0; i < PhiNum; i++)
2297 CurrentIncomingValues[OrigRootPhi.getIncomingValue(i)] =
2298 OrigRootPhi.getIncomingBlock(i);
2299
2300 // Both current and base PHIs should have same incoming values and
2301 // the same basic blocks corresponding to the incoming values.
2302 for (unsigned i = 0; i < PhiNum; i++) {
2303 auto CIVI =
2304 CurrentIncomingValues.find(AlternateRootPhi.getIncomingValue(i));
2305 if (CIVI == CurrentIncomingValues.end())
2306 return false;
2307 BasicBlock *CurrentIncomingBB = CIVI->second;
2308 if (CurrentIncomingBB != AlternateRootPhi.getIncomingBlock(i))
2309 return false;
2310 }
2311 return true;
2312}
2313
2314// Find derived pointers that can be recomputed cheap enough and fill
2315// RematerizationCandidates with such candidates.
2316static void
2317findRematerializationCandidates(PointerToBaseTy PointerToBase,
2318 RematCandTy &RematerizationCandidates,
2320 const unsigned int ChainLengthThreshold = 10;
2321
2322 for (auto P2B : PointerToBase) {
2323 auto *Derived = P2B.first;
2324 auto *Base = P2B.second;
2325 // Consider only derived pointers.
2326 if (Derived == Base)
2327 continue;
2328
2329 // For each live pointer find its defining chain.
2331 Value *RootOfChain =
2332 findRematerializableChainToBasePointer(ChainToBase, Derived);
2333
2334 // Nothing to do, or chain is too long
2335 if ( ChainToBase.size() == 0 ||
2336 ChainToBase.size() > ChainLengthThreshold)
2337 continue;
2338
2339 // Handle the scenario where the RootOfChain is not equal to the
2340 // Base Value, but they are essentially the same phi values.
2341 if (RootOfChain != PointerToBase[Derived]) {
2342 PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
2343 PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[Derived]);
2344 if (!OrigRootPhi || !AlternateRootPhi)
2345 continue;
2346 // PHI nodes that have the same incoming values, and belonging to the same
2347 // basic blocks are essentially the same SSA value. When the original phi
2348 // has incoming values with different base pointers, the original phi is
2349 // marked as conflict, and an additional `AlternateRootPhi` with the same
2350 // incoming values get generated by the findBasePointer function. We need
2351 // to identify the newly generated AlternateRootPhi (.base version of phi)
2352 // and RootOfChain (the original phi node itself) are the same, so that we
2353 // can rematerialize the gep and casts. This is a workaround for the
2354 // deficiency in the findBasePointer algorithm.
2355 if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
2356 continue;
2357 }
2358 // Compute cost of this chain.
2360 // TODO: We can also account for cases when we will be able to remove some
2361 // of the rematerialized values by later optimization passes. I.e if
2362 // we rematerialized several intersecting chains. Or if original values
2363 // don't have any uses besides this statepoint.
2364
2365 // Ok, there is a candidate.
2366 RematerizlizationCandidateRecord Record;
2367 Record.ChainToBase = ChainToBase;
2368 Record.RootOfChain = RootOfChain;
2369 Record.Cost = Cost;
2370 RematerizationCandidates.insert({ Derived, Record });
2371 }
2372}
2373
2374// Try to rematerialize derived pointers immediately before their uses
2375// (instead of rematerializing after every statepoint it is live through).
2376// This can be beneficial when derived pointer is live across many
2377// statepoints, but uses are rare.
2379 RematCandTy &RematerizationCandidates,
2381 PointerToBaseTy &PointerToBase) {
2382 if (!RematDerivedAtUses)
2383 return;
2384
2385 SmallVector<Instruction *, 32> LiveValuesToBeDeleted;
2386
2387 LLVM_DEBUG(dbgs() << "Rematerialize derived pointers at uses, "
2388 << "Num statepoints: " << Records.size() << '\n');
2389
2390 for (auto &It : RematerizationCandidates) {
2391 Instruction *Cand = cast<Instruction>(It.first);
2392 auto &Record = It.second;
2393
2395 continue;
2396
2397 if (Cand->user_empty())
2398 continue;
2399
2400 if (Cand->hasOneUse())
2401 if (auto *U = dyn_cast<Instruction>(Cand->getUniqueUndroppableUser()))
2402 if (U->getParent() == Cand->getParent())
2403 continue;
2404
2405 // Rematerialization before PHI nodes is not implemented.
2406 if (llvm::any_of(Cand->users(),
2407 [](const auto *U) { return isa<PHINode>(U); }))
2408 continue;
2409
2410 LLVM_DEBUG(dbgs() << "Trying cand " << *Cand << " ... ");
2411
2412 // Count of rematerialization instructions we introduce is equal to number
2413 // of candidate uses.
2414 // Count of rematerialization instructions we eliminate is equal to number
2415 // of statepoints it is live through.
2416 // Consider transformation profitable if latter is greater than former
2417 // (in other words, we create less than eliminate).
2418 unsigned NumLiveStatepoints = llvm::count_if(
2419 Records, [Cand](const auto &R) { return R.LiveSet.contains(Cand); });
2420 unsigned NumUses = Cand->getNumUses();
2421
2422 LLVM_DEBUG(dbgs() << "Num uses: " << NumUses << " Num live statepoints: "
2423 << NumLiveStatepoints << " ");
2424
2425 if (NumLiveStatepoints < NumUses) {
2426 LLVM_DEBUG(dbgs() << "not profitable\n");
2427 continue;
2428 }
2429
2430 // If rematerialization is 'free', then favor rematerialization at
2431 // uses as it generally shortens live ranges.
2432 // TODO: Short (size ==1) chains only?
2433 if (NumLiveStatepoints == NumUses && Record.Cost > 0) {
2434 LLVM_DEBUG(dbgs() << "not profitable\n");
2435 continue;
2436 }
2437
2438 LLVM_DEBUG(dbgs() << "looks profitable\n");
2439
2440 // ChainToBase may contain another remat candidate (as a sub chain) which
2441 // has been rewritten by now. Need to recollect chain to have up to date
2442 // value.
2443 // TODO: sort records in findRematerializationCandidates() in
2444 // decreasing chain size order?
2445 if (Record.ChainToBase.size() > 1) {
2446 Record.ChainToBase.clear();
2448 }
2449
2450 // Current rematerialization algorithm is very simple: we rematerialize
2451 // immediately before EVERY use, even if there are several uses in same
2452 // block or if use is local to Cand Def. The reason is that this allows
2453 // us to avoid recomputing liveness without complicated analysis:
2454 // - If we did not eliminate all uses of original Candidate, we do not
2455 // know exaclty in what BBs it is still live.
2456 // - If we rematerialize once per BB, we need to find proper insertion
2457 // place (first use in block, but after Def) and analyze if there is
2458 // statepoint between uses in the block.
2459 while (!Cand->user_empty()) {
2460 Instruction *UserI = cast<Instruction>(*Cand->user_begin());
2461 Instruction *RematChain = rematerializeChain(
2462 Record.ChainToBase, UserI, Record.RootOfChain, PointerToBase[Cand]);
2463 UserI->replaceUsesOfWith(Cand, RematChain);
2464 PointerToBase[RematChain] = PointerToBase[Cand];
2465 }
2466 LiveValuesToBeDeleted.push_back(Cand);
2467 }
2468
2469 LLVM_DEBUG(dbgs() << "Rematerialized " << LiveValuesToBeDeleted.size()
2470 << " derived pointers\n");
2471 for (auto *Cand : LiveValuesToBeDeleted) {
2472 assert(Cand->use_empty() && "Unexpected user remain");
2473 RematerizationCandidates.erase(Cand);
2474 for (auto &R : Records) {
2475 assert(!R.LiveSet.contains(Cand) ||
2476 R.LiveSet.contains(PointerToBase[Cand]));
2477 R.LiveSet.remove(Cand);
2478 }
2479 }
2480
2481 // Recollect not rematerialized chains - we might have rewritten
2482 // their sub-chains.
2483 if (!LiveValuesToBeDeleted.empty()) {
2484 for (auto &P : RematerizationCandidates) {
2485 auto &R = P.second;
2486 if (R.ChainToBase.size() > 1) {
2487 R.ChainToBase.clear();
2488 findRematerializableChainToBasePointer(R.ChainToBase, P.first);
2489 }
2490 }
2491 }
2492}
2493
2494// From the statepoint live set pick values that are cheaper to recompute then
2495// to relocate. Remove this values from the live set, rematerialize them after
2496// statepoint and record them in "Info" structure. Note that similar to
2497// relocated values we don't do any user adjustments here.
2499 PartiallyConstructedSafepointRecord &Info,
2500 PointerToBaseTy &PointerToBase,
2501 RematCandTy &RematerizationCandidates,
2503 // Record values we are going to delete from this statepoint live set.
2504 // We can not di this in following loop due to iterator invalidation.
2505 SmallVector<Value *, 32> LiveValuesToBeDeleted;
2506
2507 for (Value *LiveValue : Info.LiveSet) {
2508 auto It = RematerizationCandidates.find(LiveValue);
2509 if (It == RematerizationCandidates.end())
2510 continue;
2511
2512 RematerizlizationCandidateRecord &Record = It->second;
2513
2515 // For invokes we need to rematerialize each chain twice - for normal and
2516 // for unwind basic blocks. Model this by multiplying cost by two.
2517 if (isa<InvokeInst>(Call))
2518 Cost *= 2;
2519
2520 // If it's too expensive - skip it.
2522 continue;
2523
2524 // Remove value from the live set
2525 LiveValuesToBeDeleted.push_back(LiveValue);
2526
2527 // Clone instructions and record them inside "Info" structure.
2528
2529 // Different cases for calls and invokes. For invokes we need to clone
2530 // instructions both on normal and unwind path.
2531 if (isa<CallInst>(Call)) {
2532 Instruction *InsertBefore = Call->getNextNode();
2533 assert(InsertBefore);
2534 Instruction *RematerializedValue =
2535 rematerializeChain(Record.ChainToBase, InsertBefore,
2536 Record.RootOfChain, PointerToBase[LiveValue]);
2537 Info.RematerializedValues[RematerializedValue] = LiveValue;
2538 } else {
2539 auto *Invoke = cast<InvokeInst>(Call);
2540
2541 Instruction *NormalInsertBefore =
2542 &*Invoke->getNormalDest()->getFirstInsertionPt();
2543 Instruction *UnwindInsertBefore =
2544 &*Invoke->getUnwindDest()->getFirstInsertionPt();
2545
2546 Instruction *NormalRematerializedValue =
2547 rematerializeChain(Record.ChainToBase, NormalInsertBefore,
2548 Record.RootOfChain, PointerToBase[LiveValue]);
2549 Instruction *UnwindRematerializedValue =
2550 rematerializeChain(Record.ChainToBase, UnwindInsertBefore,
2551 Record.RootOfChain, PointerToBase[LiveValue]);
2552
2553 Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2554 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2555 }
2556 }
2557
2558 // Remove rematerialized values from the live set.
2559 for (auto *LiveValue: LiveValuesToBeDeleted) {
2560 Info.LiveSet.remove(LiveValue);
2561 }
2562}
2563
2565 SmallVectorImpl<CallInst *> &Intrinsics,
2566 DefiningValueMapTy &DVCache,
2567 IsKnownBaseMapTy &KnownBases) {
2568 auto &Context = F.getContext();
2569 auto &DL = F.getParent()->getDataLayout();
2570 bool Changed = false;
2571
2572 for (auto *Callsite : Intrinsics)
2573 switch (Callsite->getIntrinsicID()) {
2574 case Intrinsic::experimental_gc_get_pointer_base: {
2575 Changed = true;
2576 Value *Base =
2577 findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
2578 assert(!DVCache.count(Callsite));
2579 auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast(
2580 Base, Callsite->getType(), suffixed_name_or(Base, ".cast", ""));
2581 if (BaseBC != Base)
2582 DVCache[BaseBC] = Base;
2583 Callsite->replaceAllUsesWith(BaseBC);
2584 if (!BaseBC->hasName())
2585 BaseBC->takeName(Callsite);
2586 Callsite->eraseFromParent();
2587 break;
2588 }
2589 case Intrinsic::experimental_gc_get_pointer_offset: {
2590 Changed = true;
2591 Value *Derived = Callsite->getOperand(0);
2592 Value *Base = findBasePointer(Derived, DVCache, KnownBases);
2593 assert(!DVCache.count(Callsite));
2594 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
2595 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
2596 IRBuilder<> Builder(Callsite);
2597 Value *BaseInt =
2598 Builder.CreatePtrToInt(Base, Type::getIntNTy(Context, IntPtrSize),
2599 suffixed_name_or(Base, ".int", ""));
2600 Value *DerivedInt =
2601 Builder.CreatePtrToInt(Derived, Type::getIntNTy(Context, IntPtrSize),
2602 suffixed_name_or(Derived, ".int", ""));
2603 Value *Offset = Builder.CreateSub(DerivedInt, BaseInt);
2604 Callsite->replaceAllUsesWith(Offset);
2605 Offset->takeName(Callsite);
2606 Callsite->eraseFromParent();
2607 break;
2608 }
2609 default:
2610 llvm_unreachable("Unknown intrinsic");
2611 }
2612
2613 return Changed;
2614}
2615
2619 DefiningValueMapTy &DVCache,
2620 IsKnownBaseMapTy &KnownBases) {
2621 std::unique_ptr<GCStrategy> GC = findGCStrategy(F);
2622
2623#ifndef NDEBUG
2624 // Validate the input
2625 std::set<CallBase *> Uniqued;
2626 Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
2627 assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
2628
2629 for (CallBase *Call : ToUpdate)
2630 assert(Call->getFunction() == &F);
2631#endif
2632
2633 // When inserting gc.relocates for invokes, we need to be able to insert at
2634 // the top of the successor blocks. See the comment on
2635 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2636 // may restructure the CFG.
2637 for (CallBase *Call : ToUpdate) {
2638 auto *II = dyn_cast<InvokeInst>(Call);
2639 if (!II)
2640 continue;
2641 normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
2642 normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
2643 }
2644
2645 // A list of dummy calls added to the IR to keep various values obviously
2646 // live in the IR. We'll remove all of these when done.
2648
2649 // Insert a dummy call with all of the deopt operands we'll need for the
2650 // actual safepoint insertion as arguments. This ensures reference operands
2651 // in the deopt argument list are considered live through the safepoint (and
2652 // thus makes sure they get relocated.)
2653 for (CallBase *Call : ToUpdate) {
2654 SmallVector<Value *, 64> DeoptValues;
2655
2656 for (Value *Arg : GetDeoptBundleOperands(Call)) {
2657 assert(!isUnhandledGCPointerType(Arg->getType(), GC.get()) &&
2658 "support for FCA unimplemented");
2659 if (isHandledGCPointerType(Arg->getType(), GC.get()))
2660 DeoptValues.push_back(Arg);
2661 }
2662
2663 insertUseHolderAfter(Call, DeoptValues, Holders);
2664 }
2665
2667
2668 // A) Identify all gc pointers which are statically live at the given call
2669 // site.
2670 findLiveReferences(F, DT, ToUpdate, Records, GC.get());
2671
2672 /// Global mapping from live pointers to a base-defining-value.
2673 PointerToBaseTy PointerToBase;
2674
2675 // B) Find the base pointers for each live pointer
2676 for (size_t i = 0; i < Records.size(); i++) {
2677 PartiallyConstructedSafepointRecord &info = Records[i];
2678 findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases);
2679 }
2680 if (PrintBasePointers) {
2681 errs() << "Base Pairs (w/o Relocation):\n";
2682 for (auto &Pair : PointerToBase) {
2683 errs() << " derived ";
2684 Pair.first->printAsOperand(errs(), false);
2685 errs() << " base ";
2686 Pair.second->printAsOperand(errs(), false);
2687 errs() << "\n";
2688 ;
2689 }
2690 }
2691
2692 // The base phi insertion logic (for any safepoint) may have inserted new
2693 // instructions which are now live at some safepoint. The simplest such
2694 // example is:
2695 // loop:
2696 // phi a <-- will be a new base_phi here
2697 // safepoint 1 <-- that needs to be live here
2698 // gep a + 1
2699 // safepoint 2
2700 // br loop
2701 // We insert some dummy calls after each safepoint to definitely hold live
2702 // the base pointers which were identified for that safepoint. We'll then
2703 // ask liveness for _every_ base inserted to see what is now live. Then we
2704 // remove the dummy calls.
2705 Holders.reserve(Holders.size() + Records.size());
2706 for (size_t i = 0; i < Records.size(); i++) {
2707 PartiallyConstructedSafepointRecord &Info = Records[i];
2708
2710 for (auto *Derived : Info.LiveSet) {
2711 assert(PointerToBase.count(Derived) && "Missed base for derived pointer");
2712 Bases.push_back(PointerToBase[Derived]);
2713 }
2714
2715 insertUseHolderAfter(ToUpdate[i], Bases, Holders);
2716 }
2717
2718 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2719 // need to rerun liveness. We may *also* have inserted new defs, but that's
2720 // not the key issue.
2721 recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase, GC.get());
2722
2723 if (PrintBasePointers) {
2724 errs() << "Base Pairs: (w/Relocation)\n";
2725 for (auto Pair : PointerToBase) {
2726 errs() << " derived ";
2727 Pair.first->printAsOperand(errs(), false);
2728 errs() << " base ";
2729 Pair.second->printAsOperand(errs(), false);
2730 errs() << "\n";
2731 }
2732 }
2733
2734 // It is possible that non-constant live variables have a constant base. For
2735 // example, a GEP with a variable offset from a global. In this case we can
2736 // remove it from the liveset. We already don't add constants to the liveset
2737 // because we assume they won't move at runtime and the GC doesn't need to be
2738 // informed about them. The same reasoning applies if the base is constant.
2739 // Note that the relocation placement code relies on this filtering for
2740 // correctness as it expects the base to be in the liveset, which isn't true
2741 // if the base is constant.
2742 for (auto &Info : Records) {
2743 Info.LiveSet.remove_if([&](Value *LiveV) {
2744 assert(PointerToBase.count(LiveV) && "Missed base for derived pointer");
2745 return isa<Constant>(PointerToBase[LiveV]);
2746 });
2747 }
2748
2749 for (CallInst *CI : Holders)
2750 CI->eraseFromParent();
2751
2752 Holders.clear();
2753
2754 // Compute the cost of possible re-materialization of derived pointers.
2755 RematCandTy RematerizationCandidates;
2756 findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI);
2757
2758 // In order to reduce live set of statepoint we might choose to rematerialize
2759 // some values instead of relocating them. This is purely an optimization and
2760 // does not influence correctness.
2761 // First try rematerialization at uses, then after statepoints.
2762 rematerializeLiveValuesAtUses(RematerizationCandidates, Records,
2763 PointerToBase);
2764 for (size_t i = 0; i < Records.size(); i++)
2765 rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase,
2766 RematerizationCandidates, TTI);
2767
2768 // We need this to safely RAUW and delete call or invoke return values that
2769 // may themselves be live over a statepoint. For details, please see usage in
2770 // makeStatepointExplicitImpl.
2771 std::vector<DeferredReplacement> Replacements;
2772
2773 // Now run through and replace the existing statepoints with new ones with
2774 // the live variables listed. We do not yet update uses of the values being
2775 // relocated. We have references to live variables that need to
2776 // survive to the last iteration of this loop. (By construction, the
2777 // previous statepoint can not be a live variable, thus we can and remove
2778 // the old statepoint calls as we go.)
2779 for (size_t i = 0; i < Records.size(); i++)
2780 makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
2781 PointerToBase, GC.get());
2782
2783 ToUpdate.clear(); // prevent accident use of invalid calls.
2784
2785 for (auto &PR : Replacements)
2786 PR.doReplacement();
2787
2788 Replacements.clear();
2789
2790 for (auto &Info : Records) {
2791 // These live sets may contain state Value pointers, since we replaced calls
2792 // with operand bundles with calls wrapped in gc.statepoint, and some of
2793 // those calls may have been def'ing live gc pointers. Clear these out to
2794 // avoid accidentally using them.
2795 //
2796 // TODO: We should create a separate data structure that does not contain
2797 // these live sets, and migrate to using that data structure from this point
2798 // onward.
2799 Info.LiveSet.clear();
2800 }
2801 PointerToBase.clear();
2802
2803 // Do all the fixups of the original live variables to their relocated selves
2805 for (const PartiallyConstructedSafepointRecord &Info : Records) {
2806 // We can't simply save the live set from the original insertion. One of
2807 // the live values might be the result of a call which needs a safepoint.
2808 // That Value* no longer exists and we need to use the new gc_result.
2809 // Thankfully, the live set is embedded in the statepoint (and updated), so
2810 // we just grab that.
2811 llvm::append_range(Live, Info.StatepointToken->gc_args());
2812#ifndef NDEBUG
2813 // Do some basic validation checking on our liveness results before
2814 // performing relocation. Relocation can and will turn mistakes in liveness
2815 // results into non-sensical code which is must harder to debug.
2816 // TODO: It would be nice to test consistency as well
2817 assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
2818 "statepoint must be reachable or liveness is meaningless");
2819 for (Value *V : Info.StatepointToken->gc_args()) {
2820 if (!isa<Instruction>(V))
2821 // Non-instruction values trivial dominate all possible uses
2822 continue;
2823 auto *LiveInst = cast<Instruction>(V);
2824 assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2825 "unreachable values should never be live");
2826 assert(DT.dominates(LiveInst, Info.StatepointToken) &&
2827 "basic SSA liveness expectation violated by liveness analysis");
2828 }
2829#endif
2830 }
2832
2833#ifndef NDEBUG
2834 // Validation check
2835 for (auto *Ptr : Live)
2836 assert(isHandledGCPointerType(Ptr->getType(), GC.get()) &&
2837 "must be a gc pointer type");
2838#endif
2839
2840 relocationViaAlloca(F, DT, Live, Records);
2841 return !Records.empty();
2842}
2843
2844// List of all parameter and return attributes which must be stripped when
2845// lowering from the abstract machine model. Note that we list attributes
2846// here which aren't valid as return attributes, that is okay.
2848 AttributeMask R;
2849 R.addAttribute(Attribute::Dereferenceable);
2850 R.addAttribute(Attribute::DereferenceableOrNull);
2851 R.addAttribute(Attribute::ReadNone);
2852 R.addAttribute(Attribute::ReadOnly);
2853 R.addAttribute(Attribute::WriteOnly);
2854 R.addAttribute(Attribute::NoAlias);
2855 R.addAttribute(Attribute::NoFree);
2856 return R;
2857}
2858
2860 LLVMContext &Ctx = F.getContext();
2861
2862 // Intrinsics are very delicate. Lowering sometimes depends the presence
2863 // of certain attributes for correctness, but we may have also inferred
2864 // additional ones in the abstract machine model which need stripped. This
2865 // assumes that the attributes defined in Intrinsic.td are conservatively
2866 // correct for both physical and abstract model.
2867 if (Intrinsic::ID id = F.getIntrinsicID()) {
2868 F.setAttributes(Intrinsic::getAttributes(Ctx, id));
2869 return;
2870 }
2871
2873 for (Argument &A : F.args())
2874 if (isa<PointerType>(A.getType()))
2875 F.removeParamAttrs(A.getArgNo(), R);
2876
2877 if (isa<PointerType>(F.getReturnType()))
2878 F.removeRetAttrs(R);
2879
2880 for (auto Attr : FnAttrsToStrip)
2881 F.removeFnAttr(Attr);
2882}
2883
2884/// Certain metadata on instructions are invalid after running RS4GC.
2885/// Optimizations that run after RS4GC can incorrectly use this metadata to
2886/// optimize functions. We drop such metadata on the instruction.
2888 if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
2889 return;
2890 // These are the attributes that are still valid on loads and stores after
2891 // RS4GC.
2892 // The metadata implying dereferenceability and noalias are (conservatively)
2893 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2894 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2895 // touch the entire heap including noalias objects. Note: The reasoning is
2896 // same as stripping the dereferenceability and noalias attributes that are
2897 // analogous to the metadata counterparts.
2898 // We also drop the invariant.load metadata on the load because that metadata
2899 // implies the address operand to the load points to memory that is never
2900 // changed once it became dereferenceable. This is no longer true after RS4GC.
2901 // Similar reasoning applies to invariant.group metadata, which applies to
2902 // loads within a group.
2903 unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa,
2904 LLVMContext::MD_range,
2905 LLVMContext::MD_alias_scope,
2906 LLVMContext::MD_nontemporal,
2907 LLVMContext::MD_nonnull,
2908 LLVMContext::MD_align,
2909 LLVMContext::MD_type};
2910
2911 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2912 I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
2913}
2914
2916 if (F.empty())
2917 return;
2918
2919 LLVMContext &Ctx = F.getContext();
2920 MDBuilder Builder(Ctx);
2921
2922 // Set of invariantstart instructions that we need to remove.
2923 // Use this to avoid invalidating the instruction iterator.
2924 SmallVector<IntrinsicInst*, 12> InvariantStartInstructions;
2925
2926 for (Instruction &I : instructions(F)) {
2927 // invariant.start on memory location implies that the referenced memory
2928 // location is constant and unchanging. This is no longer true after
2929 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2930 // which frees the entire heap and the presence of invariant.start allows
2931 // the optimizer to sink the load of a memory location past a statepoint,
2932 // which is incorrect.
2933 if (auto *II = dyn_cast<IntrinsicInst>(&I))
2934 if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2935 InvariantStartInstructions.push_back(II);
2936 continue;
2937 }
2938
2939 if (MDNode *Tag = I.getMetadata(LLVMContext::MD_tbaa)) {
2940 MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag);
2941 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2942 }
2943
2945
2947 if (auto *Call = dyn_cast<CallBase>(&I)) {
2948 for (int i = 0, e = Call->arg_size(); i != e; i++)
2949 if (isa<PointerType>(Call->getArgOperand(i)->getType()))
2950 Call->removeParamAttrs(i, R);
2951 if (isa<PointerType>(Call->getType()))
2952 Call->removeRetAttrs(R);
2953 }
2954 }
2955
2956 // Delete the invariant.start instructions and RAUW poison.
2957 for (auto *II : InvariantStartInstructions) {
2958 II->replaceAllUsesWith(PoisonValue::get(II->getType()));
2959 II->eraseFromParent();
2960 }
2961}
2962
2963/// Looks up the GC strategy for a given function, returning null if the
2964/// function doesn't have a GC tag. The strategy is stored in the cache.
2965static std::unique_ptr<GCStrategy> findGCStrategy(Function &F) {
2966 if (!F.hasGC())
2967 return nullptr;
2968
2969 return getGCStrategy(F.getGC());
2970}
2971
2972/// Returns true if this function should be rewritten by this pass. The main
2973/// point of this function is as an extension point for custom logic.
2975 if (!F.hasGC())
2976 return false;
2977
2978 std::unique_ptr<GCStrategy> Strategy = findGCStrategy(F);
2979
2980 assert(Strategy && "GC strategy is required by function, but was not found");
2981
2982 return Strategy->useRS4GC();
2983}
2984
2985static void stripNonValidData(Module &M) {
2986#ifndef NDEBUG
2987 assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!");
2988#endif
2989
2990 for (Function &F : M)
2992
2993 for (Function &F : M)
2995}
2996
2999 const TargetLibraryInfo &TLI) {
3000 assert(!F.isDeclaration() && !F.empty() &&
3001 "need function body to rewrite statepoints in");
3002 assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
3003
3004 auto NeedsRewrite = [&TLI](Instruction &I) {
3005 if (const auto *Call = dyn_cast<CallBase>(&I)) {
3006 if (isa<GCStatepointInst>(Call))
3007 return false;
3008 if (callsGCLeafFunction(Call, TLI))
3009 return false;
3010
3011 // Normally it's up to the frontend to make sure that non-leaf calls also
3012 // have proper deopt state if it is required. We make an exception for
3013 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
3014 // these are non-leaf by default. They might be generated by the optimizer
3015 // which doesn't know how to produce a proper deopt state. So if we see a
3016 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
3017 // copy and don't produce a statepoint.
3019 !Call->getOperandBundle(LLVMContext::OB_deopt)) {
3020 assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
3021 "Don't expect any other calls here!");
3022 return false;
3023 }
3024 return true;
3025 }
3026 return false;
3027 };
3028
3029 // Delete any unreachable statepoints so that we don't have unrewritten
3030 // statepoints surviving this pass. This makes testing easier and the
3031 // resulting IR less confusing to human readers.
3033 bool MadeChange = removeUnreachableBlocks(F, &DTU);
3034 // Flush the Dominator Tree.
3035 DTU.getDomTree();
3036
3037 // Gather all the statepoints which need rewritten. Be careful to only
3038 // consider those in reachable code since we need to ask dominance queries
3039 // when rewriting. We'll delete the unreachable ones in a moment.
3040 SmallVector<CallBase *, 64> ParsePointNeeded;
3041 SmallVector<CallInst *, 64> Intrinsics;
3042 for (Instruction &I : instructions(F)) {
3043 // TODO: only the ones with the flag set!
3044 if (NeedsRewrite(I)) {
3045 // NOTE removeUnreachableBlocks() is stronger than
3046 // DominatorTree::isReachableFromEntry(). In other words
3047 // removeUnreachableBlocks can remove some blocks for which
3048 // isReachableFromEntry() returns true.
3049 assert(DT.isReachableFromEntry(I.getParent()) &&
3050 "no unreachable blocks expected");
3051 ParsePointNeeded.push_back(cast<CallBase>(&I));
3052 }
3053 if (auto *CI = dyn_cast<CallInst>(&I))
3054 if (CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base ||
3055 CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset)
3056 Intrinsics.emplace_back(CI);
3057 }
3058
3059 // Return early if no work to do.
3060 if (ParsePointNeeded.empty() && Intrinsics.empty())
3061 return MadeChange;
3062
3063 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
3064 // These are created by LCSSA. They have the effect of increasing the size
3065 // of liveness sets for no good reason. It may be harder to do this post
3066 // insertion since relocations and base phis can confuse things.
3067 for (BasicBlock &BB : F)
3068 if (BB.getUniquePredecessor())
3069 MadeChange |= FoldSingleEntryPHINodes(&BB);
3070
3071 // Before we start introducing relocations, we want to tweak the IR a bit to
3072 // avoid unfortunate code generation effects. The main example is that we
3073 // want to try to make sure the comparison feeding a branch is after any
3074 // safepoints. Otherwise, we end up with a comparison of pre-relocation
3075 // values feeding a branch after relocation. This is semantically correct,
3076 // but results in extra register pressure since both the pre-relocation and
3077 // post-relocation copies must be available in registers. For code without
3078 // relocations this is handled elsewhere, but teaching the scheduler to
3079 // reverse the transform we're about to do would be slightly complex.
3080 // Note: This may extend the live range of the inputs to the icmp and thus
3081 // increase the liveset of any statepoint we move over. This is profitable
3082 // as long as all statepoints are in rare blocks. If we had in-register
3083 // lowering for live values this would be a much safer transform.
3084 auto getConditionInst = [](Instruction *TI) -> Instruction * {
3085 if (auto *BI = dyn_cast<BranchInst>(TI))
3086 if (BI->isConditional())
3087 return dyn_cast<Instruction>(BI->getCondition());
3088 // TODO: Extend this to handle switches
3089 return nullptr;
3090 };
3091 for (BasicBlock &BB : F) {
3092 Instruction *TI = BB.getTerminator();
3093 if (auto *Cond = getConditionInst(TI))
3094 // TODO: Handle more than just ICmps here. We should be able to move
3095 // most instructions without side effects or memory access.
3096 if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
3097 MadeChange = true;
3098 Cond->moveBefore(TI);
3099 }
3100 }
3101
3102 // Nasty workaround - The base computation code in the main algorithm doesn't
3103 // consider the fact that a GEP can be used to convert a scalar to a vector.
3104 // The right fix for this is to integrate GEPs into the base rewriting
3105 // algorithm properly, this is just a short term workaround to prevent
3106 // crashes by canonicalizing such GEPs into fully vector GEPs.
3107 for (Instruction &I : instructions(F)) {
3108 if (!isa<GetElementPtrInst>(I))
3109 continue;
3110
3111 unsigned VF = 0;
3112 for (unsigned i = 0; i < I.getNumOperands(); i++)
3113 if (auto *OpndVTy = dyn_cast<VectorType>(I.getOperand(i)->getType())) {
3114 assert(VF == 0 ||
3115 VF == cast<FixedVectorType>(OpndVTy)->getNumElements());
3116 VF = cast<FixedVectorType>(OpndVTy)->getNumElements();
3117 }
3118
3119 // It's the vector to scalar traversal through the pointer operand which
3120 // confuses base pointer rewriting, so limit ourselves to that case.
3121 if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
3122 IRBuilder<> B(&I);
3123 auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
3124 I.setOperand(0, Splat);
3125 MadeChange = true;
3126 }
3127 }
3128
3129 // Cache the 'defining value' relation used in the computation and
3130 // insertion of base phis and selects. This ensures that we don't insert
3131 // large numbers of duplicate base_phis. Use one cache for both
3132 // inlineGetBaseAndOffset() and insertParsePoints().
3133 DefiningValueMapTy DVCache;
3134
3135 // Mapping between a base values and a flag indicating whether it's a known
3136 // base or not.
3137 IsKnownBaseMapTy KnownBases;
3138
3139 if (!Intrinsics.empty())
3140 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
3141 // live references.
3142 MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases);
3143
3144 if (!ParsePointNeeded.empty())
3145 MadeChange |=
3146 insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases);
3147
3148 return MadeChange;
3149}
3150
3151// liveness computation via standard dataflow
3152// -------------------------------------------------------------------
3153
3154// TODO: Consider using bitvectors for liveness, the set of potentially
3155// interesting values should be small and easy to pre-compute.
3156
3157/// Compute the live-in set for the location rbegin starting from
3158/// the live-out set of the basic block
3161 SetVector<Value *> &LiveTmp, GCStrategy *GC) {
3162 for (auto &I : make_range(Begin, End)) {
3163 // KILL/Def - Remove this definition from LiveIn
3164 LiveTmp.remove(&I);
3165
3166 // Don't consider *uses* in PHI nodes, we handle their contribution to
3167 // predecessor blocks when we seed the LiveOut sets
3168 if (isa<PHINode>(I))
3169 continue;
3170
3171 // USE - Add to the LiveIn set for this instruction
3172 for (Value *V : I.operands()) {
3173 assert(!isUnhandledGCPointerType(V->getType(), GC) &&
3174 "support for FCA unimplemented");
3175 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V)) {
3176 // The choice to exclude all things constant here is slightly subtle.
3177 // There are two independent reasons:
3178 // - We assume that things which are constant (from LLVM's definition)
3179 // do not move at runtime. For example, the address of a global
3180 // variable is fixed, even though it's contents may not be.
3181 // - Second, we can't disallow arbitrary inttoptr constants even
3182 // if the language frontend does. Optimization passes are free to
3183 // locally exploit facts without respect to global reachability. This
3184 // can create sections of code which are dynamically unreachable and
3185 // contain just about anything. (see constants.ll in tests)
3186 LiveTmp.insert(V);
3187 }
3188 }
3189 }
3190}
3191
3193 GCStrategy *GC) {
3194 for (BasicBlock *Succ : successors(BB)) {
3195 for (auto &I : *Succ) {
3196 PHINode *PN = dyn_cast<PHINode>(&I);
3197 if (!PN)
3198 break;
3199
3200 Value *V = PN->getIncomingValueForBlock(BB);
3201 assert(!isUnhandledGCPointerType(V->getType(), GC) &&
3202 "support for FCA unimplemented");
3203 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V))
3204 LiveTmp.insert(V);
3205 }
3206 }
3207}
3208
3210 SetVector<Value *> KillSet;
3211 for (Instruction &I : *BB)
3212 if (isHandledGCPointerType(I.getType(), GC))
3213 KillSet.insert(&I);
3214 return KillSet;
3215}
3216
3217#ifndef NDEBUG
3218/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3219/// validation check for the liveness computation.
3221 Instruction *TI, bool TermOkay = false) {
3222 for (Value *V : Live) {
3223 if (auto *I = dyn_cast<Instruction>(V)) {
3224 // The terminator can be a member of the LiveOut set. LLVM's definition
3225 // of instruction dominance states that V does not dominate itself. As
3226 // such, we need to special case this to allow it.
3227 if (TermOkay && TI == I)
3228 continue;
3229 assert(DT.dominates(I, TI) &&
3230 "basic SSA liveness expectation violated by liveness analysis");
3231 }
3232 }
3233}
3234
3235/// Check that all the liveness sets used during the computation of liveness
3236/// obey basic SSA properties. This is useful for finding cases where we miss
3237/// a def.
3238static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
3239 BasicBlock &BB) {
3240 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
3241 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
3242 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
3243}
3244#endif
3245
3247 GCPtrLivenessData &Data, GCStrategy *GC) {
3249
3250 // Seed the liveness for each individual block
3251 for (BasicBlock &BB : F) {
3252 Data.KillSet[&BB] = computeKillSet(&BB, GC);
3253 Data.LiveSet[&BB].clear();
3254 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC);
3255
3256#ifndef NDEBUG
3257 for (Value *Kill : Data.KillSet[&BB])
3258 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
3259#endif
3260
3261 Data.LiveOut[&BB] = SetVector<Value *>();
3262 computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC);
3263 Data.LiveIn[&BB] = Data.LiveSet[&BB];
3264 Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]);
3265 Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]);
3266 if (!Data.LiveIn[&BB].empty())
3267 Worklist.insert(pred_begin(&BB), pred_end(&BB));
3268 }
3269
3270 // Propagate that liveness until stable
3271 while (!Worklist.empty()) {
3272 BasicBlock *BB = Worklist.pop_back_val();
3273
3274 // Compute our new liveout set, then exit early if it hasn't changed despite
3275 // the contribution of our successor.
3276 SetVector<Value *> LiveOut = Data.LiveOut[BB];
3277 const auto OldLiveOutSize = LiveOut.size();
3278 for (BasicBlock *Succ : successors(BB)) {
3279 assert(Data.LiveIn.count(Succ));
3280 LiveOut.set_union(Data.LiveIn[Succ]);
3281 }
3282 // assert OutLiveOut is a subset of LiveOut
3283 if (OldLiveOutSize == LiveOut.size()) {
3284 // If the sets are the same size, then we didn't actually add anything
3285 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3286 // hasn't changed.
3287 continue;
3288 }
3289 Data.LiveOut[BB] = LiveOut;
3290
3291 // Apply the effects of this basic block
3292 SetVector<Value *> LiveTmp = LiveOut;
3293 LiveTmp.set_union(Data.LiveSet[BB]);
3294 LiveTmp.set_subtract(Data.KillSet[BB]);
3295
3296 assert(Data.LiveIn.count(BB));
3297 const SetVector<Value *> &OldLiveIn = Data.LiveIn[BB];
3298 // assert: OldLiveIn is a subset of LiveTmp
3299 if (OldLiveIn.size() != LiveTmp.size()) {
3300 Data.LiveIn[BB] = LiveTmp;
3301 Worklist.insert(pred_begin(BB), pred_end(BB));
3302 }
3303 } // while (!Worklist.empty())
3304
3305#ifndef NDEBUG
3306 // Verify our output against SSA properties. This helps catch any
3307 // missing kills during the above iteration.
3308 for (BasicBlock &BB : F)
3309 checkBasicSSA(DT, Data, BB);
3310#endif
3311}
3312
3313static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
3314 StatepointLiveSetTy &Out, GCStrategy *GC) {
3315 BasicBlock *BB = Inst->getParent();
3316
3317 // Note: The copy is intentional and required
3318 assert(Data.LiveOut.count(BB));
3319 SetVector<Value *> LiveOut = Data.LiveOut[BB];
3320
3321 // We want to handle the statepoint itself oddly. It's
3322 // call result is not live (normal), nor are it's arguments
3323 // (unless they're used again later). This adjustment is
3324 // specifically what we need to relocate
3325 computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), LiveOut,
3326 GC);
3327 LiveOut.remove(Inst);
3328 Out.insert(LiveOut.begin(), LiveOut.end());
3329}
3330
3331static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
3332 CallBase *Call,
3333 PartiallyConstructedSafepointRecord &Info,
3334 PointerToBaseTy &PointerToBase,
3335 GCStrategy *GC) {
3336 StatepointLiveSetTy Updated;
3337 findLiveSetAtInst(Call, RevisedLivenessData, Updated, GC);
3338
3339 // We may have base pointers which are now live that weren't before. We need
3340 // to update the PointerToBase structure to reflect this.
3341 for (auto *V : Updated)
3342 PointerToBase.insert({ V, V });
3343
3344 Info.LiveSet = Updated;
3345}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
ReachingDefAnalysis InstSet & ToRemove
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
assume Assume Builder
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:510
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Checks Use for liveness in LiveValues If Use is not live
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is Live
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1272
bool End
Definition: ELF_riscv.cpp:469
Rewrite Partial Register Uses
Hexagon Common GEP
Select target instructions out of generic instructions
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
static void makeStatepointExplicitImpl(CallBase *Call, const SmallVectorImpl< Value * > &BasePtrs, const SmallVectorImpl< Value * > &LiveVariables, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static void rematerializeLiveValues(CallBase *Call, PartiallyConstructedSafepointRecord &Info, PointerToBaseTy &PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static void findRematerializationCandidates(PointerToBaseTy PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static std::unique_ptr< GCStrategy > findGCStrategy(Function &F)
Looks up the GC strategy for a given function, returning null if the function doesn't have a GC tag.
static Instruction * rematerializeChain(ArrayRef< Instruction * > ChainToBase, Instruction *InsertBefore, Value *RootOfChain, Value *AlternateLiveBase)
static void unique_unsorted(SmallVectorImpl< T > &Vec)
Implement a unique function which doesn't require we sort the input vector.
static void stripNonValidDataFromBody(Function &F)
static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases)
Returns true if V is a known base.
static Value * findBasePointer(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
For a given value or instruction, figure out what base ptr its derived from.
static cl::opt< bool, true > ClobberNonLiveOverride("rs4gc-clobber-non-live", cl::location(ClobberNonLive), cl::Hidden)
static void insertRelocationStores(iterator_range< Value::user_iterator > GCRelocs, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static BasicBlock * normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, DominatorTree &DT)
static void analyzeParsePointLiveness(DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &Result, GCStrategy *GC)
static AttributeList legalizeCallAttributes(LLVMContext &Ctx, AttributeList OrigAL, AttributeList StatepointAL)
static void computeLiveOutSeed(BasicBlock *BB, SetVector< Value * > &LiveTmp, GCStrategy *GC)
static void relocationViaAlloca(Function &F, DominatorTree &DT, ArrayRef< Value * > Live, ArrayRef< PartiallyConstructedSafepointRecord > Records)
Do all the relocation update via allocas and mem2reg.
static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi)
static cl::opt< unsigned > RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, cl::init(6))
static Value * findBaseOrBDV(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base pointer for this value if known.
static Value * findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Returns the base defining value for this value.
static void insertUseHolderAfter(CallBase *Call, const ArrayRef< Value * > Values, SmallVectorImpl< CallInst * > &Holders)
Insert holders so that each Value is obviously live through the entire lifetime of the call.
static void insertRematerializationStores(const RematerializedValueMapTy &RematerializedValues, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static bool insertParsePoints(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, SmallVectorImpl< CallBase * > &ToUpdate, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static void findBasePointers(const StatepointLiveSetTy &live, PointerToBaseTy &PointerToBase, DominatorTree *DT, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static bool shouldRewriteStatepointsIn(Function &F)
Returns true if this function should be rewritten by this pass.
static cl::opt< bool > RematDerivedAtUses("rs4gc-remat-derived-at-uses", cl::Hidden, cl::init(true))
static ArrayRef< Use > GetDeoptBundleOperands(const CallBase *Call)
static void stripNonValidAttributesFromPrototype(Function &F)
static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, StatepointLiveSetTy &out, GCStrategy *GC)
Given results from the dataflow liveness computation, find the set of live Values at a particular ins...
static void computeLiveInValues(DominatorTree &DT, Function &F, GCPtrLivenessData &Data, GCStrategy *GC)
Compute the live-in set for every basic block in the function.
static void stripInvalidMetadataFromInstruction(Instruction &I)
Certain metadata on instructions are invalid after running RS4GC.
static constexpr Attribute::AttrKind FnAttrsToStrip[]
static bool areBothVectorOrScalar(Value *First, Value *Second)
static void rematerializeLiveValuesAtUses(RematCandTy &RematerizationCandidates, MutableArrayRef< PartiallyConstructedSafepointRecord > Records, PointerToBaseTy &PointerToBase)
static bool isHandledGCPointerType(Type *T, GCStrategy *GC)
static Value * findRematerializableChainToBasePointer(SmallVectorImpl< Instruction * > &ChainToBase, Value *CurrentValue)
static cl::opt< bool > PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, cl::init(false))
static Value * findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base defining value for the 'Index' element of the given vector instruction 'I'.
static void stripNonValidData(Module &M)
The IR fed into RewriteStatepointsForGC may have had attributes and metadata implying dereferenceabil...
static InstructionCost chainToBasePointerCost(SmallVectorImpl< Instruction * > &Chain, TargetTransformInfo &TTI)
static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC)
static SetVector< Value * > computeKillSet(BasicBlock *BB, GCStrategy *GC)
static bool ClobberNonLive
static cl::opt< bool > PrintBasePointers("spp-print-base-pointers", cl::Hidden, cl::init(false))
static bool isOriginalBaseResult(Value *V)
This value is a base pointer that is not generated by RS4GC, i.e.
static cl::opt< bool > PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false))
static void setKnownBase(Value *V, bool IsKnownBase, IsKnownBaseMapTy &KnownBases)
Caches the IsKnownBase flag for a value and asserts that it wasn't present in the cache before.
static cl::opt< bool > AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true))
static void makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static std::string suffixed_name_or(Value *V, StringRef Suffix, StringRef DefaultName)
static void CreateGCRelocates(ArrayRef< Value * > LiveVariables, ArrayRef< Value * > BasePtrs, Instruction *StatepointToken, IRBuilder<> &Builder, GCStrategy *GC)
Helper function to place all gc relocates necessary for the given statepoint.
static void checkBasicSSA(DominatorTree &DT, SetVector< Value * > &Live, Instruction *TI, bool TermOkay=false)
Check that the items in 'Live' dominate 'TI'.
static StringRef getDeoptLowering(CallBase *Call)
static void findLiveReferences(Function &F, DominatorTree &DT, ArrayRef< CallBase * > toUpdate, MutableArrayRef< struct PartiallyConstructedSafepointRecord > records, GCStrategy *GC)
static AttributeMask getParamAndReturnAttributesToRemove()
static bool inlineGetBaseAndOffset(Function &F, SmallVectorImpl< CallInst * > &Intrinsics, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static Value * findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Helper function for findBasePointer - Will return a value which either a) defines the base pointer fo...
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &result, PointerToBaseTy &PointerToBase, GCStrategy *GC)
Given an updated version of the dataflow liveness results, update the liveset and base pointer maps f...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
verify safepoint Safepoint IR static false bool isGCPointerType(Type *T)
static bool containsGCPtrType(Type *Ty)
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
an instruction to allocate memory on the stack
Definition: Instructions.h:58
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:118
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:951
AttributeList addFnAttributes(LLVMContext &C, const AttrBuilder &B) const
Add function attribute to the list.
Definition: Attributes.h:544
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
Attribute getFnAttr(Attribute::AttrKind Kind) const
Return the attribute object that exists for the function.
Definition: Attributes.h:826
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:318
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:84
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
const LandingPadInst * getLandingPadInst() const
Return the landingpad instruction associated with the landing pad.
Definition: BasicBlock.cpp:530
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
reverse_iterator rbegin()
Definition: BasicBlock.h:340
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:89
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:304
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
This class represents a no-op cast from one type to another.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1474
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1493
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1489
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1579
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1691
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:693
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Represents calls to the gc.relocate intrinsic.
Value * getDerivedPtr() const
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
GCStrategy describes a garbage collector algorithm's code generation requirements,...
Definition: GCStrategy.h:63
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2089
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2625
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:89
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
const BasicBlock * getParent() const
Definition: Instruction.h:90
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
Metadata node.
Definition: Metadata.h:950
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_type count(const KeyT &Key) const
Definition: MapVector.h:144
iterator end()
Definition: MapVector.h:71
iterator find(const KeyT &Key)
Definition: MapVector.h:146
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:117
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool set_union(const STy &S)
Compute This := This u S, return whether 'This' changed.
Definition: SetVector.h:303
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
void set_subtract(const STy &S)
Compute This := This - B TODO: We should be able to use set_subtract from SetOperations....
Definition: SetVector.h:318
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
Class to represent struct types.
Definition: DerivedTypes.h:213
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:378
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
iterator_range< user_iterator > users()
Definition: Value.h:421
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
Definition: Value.cpp:179
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
bool user_empty() const
Definition: Value.h:385
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ilist_iterator< OptionsT, !IsReverse, IsConst > getReverse() const
Get a reverse iterator to the same node.
self_iterator getIterator()
Definition: ilist_node.h:82
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:465
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:440
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1747
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
auto successors(const MachineBasicBlock *BB)
AddressSpace
Definition: NVPTXBaseInfo.h:21
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2036
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2037
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1652
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
StatepointDirectives parseStatepointDirectivesFromAttrs(AttributeList AS)
Parse out statepoint directives from the function attributes present in AS.
Definition: Statepoint.cpp:24
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
@ DeoptLiveIn
Mark the deopt arguments associated with the statepoint as only being "live-in".
@ GCTransition
Indicates that this statepoint is a transition from GC-aware code to code that is not GC-aware.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1926
std::unique_ptr< GCStrategy > getGCStrategy(const StringRef Name)
Lookup the GCStrategy object associated with the given gc name.
Definition: GCStrategy.cpp:24
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2021
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1884
InstructionCost Cost
bool isStatepointDirectiveAttr(Attribute Attr)
Return true if the Attr is an attribute that is a statepoint directive.
Definition: Statepoint.cpp:18
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition: Local.cpp:2670
bool callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI)
Return true if this call calls a gc leaf function.
Definition: Local.cpp:2951
bool runOnFunction(Function &F, DominatorTree &, TargetTransformInfo &, const TargetLibraryInfo &)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Call sites that get wrapped by a gc.statepoint (currently only in RewriteStatepointsForGC and potenti...
Definition: Statepoint.h:235
std::optional< uint32_t > NumPatchBytes
Definition: Statepoint.h:236
std::optional< uint64_t > StatepointID
Definition: Statepoint.h:237
static const uint64_t DefaultStatepointID
Definition: Statepoint.h:239