LLVM  10.0.0svn
ShadowStackGCLowering.cpp
Go to the documentation of this file.
1 //===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the custom lowering code required by the shadow-stack GC
10 // strategy.
11 //
12 // This pass implements the code transformation described in this paper:
13 // "Accurate Garbage Collection in an Uncooperative Environment"
14 // Fergus Henderson, ISMM, 2002
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/CodeGen/Passes.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Casting.h"
38 #include <cassert>
39 #include <cstddef>
40 #include <string>
41 #include <utility>
42 #include <vector>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "shadow-stack-gc-lowering"
47 
48 namespace {
49 
50 class ShadowStackGCLowering : public FunctionPass {
51  /// RootChain - This is the global linked-list that contains the chain of GC
52  /// roots.
53  GlobalVariable *Head = nullptr;
54 
55  /// StackEntryTy - Abstract type of a link in the shadow stack.
56  StructType *StackEntryTy = nullptr;
57  StructType *FrameMapTy = nullptr;
58 
59  /// Roots - GC roots in the current function. Each is a pair of the
60  /// intrinsic call and its corresponding alloca.
61  std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
62 
63 public:
64  static char ID;
65 
66  ShadowStackGCLowering();
67 
68  bool doInitialization(Module &M) override;
69  bool runOnFunction(Function &F) override;
70 
71 private:
72  bool IsNullValue(Value *V);
73  Constant *GetFrameMap(Function &F);
74  Type *GetConcreteStackEntryType(Function &F);
75  void CollectRoots(Function &F);
76 
77  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
78  Type *Ty, Value *BasePtr, int Idx1,
79  const char *Name);
80  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
81  Type *Ty, Value *BasePtr, int Idx1, int Idx2,
82  const char *Name);
83 };
84 
85 } // end anonymous namespace
86 
88 
89 INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
90  "Shadow Stack GC Lowering", false, false)
92 INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
93  "Shadow Stack GC Lowering", false, false)
94 
95 FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
96 
97 ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) {
99 }
100 
101 Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
102  // doInitialization creates the abstract type of this value.
103  Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
104 
105  // Truncate the ShadowStackDescriptor if some metadata is null.
106  unsigned NumMeta = 0;
108  for (unsigned I = 0; I != Roots.size(); ++I) {
109  Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
110  if (!C->isNullValue())
111  NumMeta = I + 1;
112  Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
113  }
114  Metadata.resize(NumMeta);
115 
117 
118  Constant *BaseElts[] = {
119  ConstantInt::get(Int32Ty, Roots.size(), false),
120  ConstantInt::get(Int32Ty, NumMeta, false),
121  };
122 
123  Constant *DescriptorElts[] = {
124  ConstantStruct::get(FrameMapTy, BaseElts),
125  ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)};
126 
127  Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
128  StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta));
129 
130  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
131 
132  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
133  // that, short of multithreaded LLVM, it should be safe; all that is
134  // necessary is that a simple Module::iterator loop not be invalidated.
135  // Appending to the GlobalVariable list is safe in that sense.
136  //
137  // All of the output passes emit globals last. The ExecutionEngine
138  // explicitly supports adding globals to the module after
139  // initialization.
140  //
141  // Still, if it isn't deemed acceptable, then this transformation needs
142  // to be a ModulePass (which means it cannot be in the 'llc' pipeline
143  // (which uses a FunctionPassManager (which segfaults (not asserts) if
144  // provided a ModulePass))).
145  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
147  "__gc_" + F.getName());
148 
149  Constant *GEPIndices[2] = {
152  return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
153 }
154 
155 Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
156  // doInitialization creates the generic version of this type.
157  std::vector<Type *> EltTys;
158  EltTys.push_back(StackEntryTy);
159  for (size_t I = 0; I != Roots.size(); I++)
160  EltTys.push_back(Roots[I].second->getAllocatedType());
161 
162  return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
163 }
164 
165 /// doInitialization - If this module uses the GC intrinsics, find them now. If
166 /// not, exit fast.
167 bool ShadowStackGCLowering::doInitialization(Module &M) {
168  bool Active = false;
169  for (Function &F : M) {
170  if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
171  Active = true;
172  break;
173  }
174  }
175  if (!Active)
176  return false;
177 
178  // struct FrameMap {
179  // int32_t NumRoots; // Number of roots in stack frame.
180  // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
181  // void *Meta[]; // May be absent for roots without metadata.
182  // };
183  std::vector<Type *> EltTys;
184  // 32 bits is ok up to a 32GB stack frame. :)
185  EltTys.push_back(Type::getInt32Ty(M.getContext()));
186  // Specifies length of variable length array.
187  EltTys.push_back(Type::getInt32Ty(M.getContext()));
188  FrameMapTy = StructType::create(EltTys, "gc_map");
189  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
190 
191  // struct StackEntry {
192  // ShadowStackEntry *Next; // Caller's stack entry.
193  // FrameMap *Map; // Pointer to constant FrameMap.
194  // void *Roots[]; // Stack roots (in-place array, so we pretend).
195  // };
196 
197  StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
198 
199  EltTys.clear();
200  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
201  EltTys.push_back(FrameMapPtrTy);
202  StackEntryTy->setBody(EltTys);
203  PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
204 
205  // Get the root chain if it already exists.
206  Head = M.getGlobalVariable("llvm_gc_root_chain");
207  if (!Head) {
208  // If the root chain does not exist, insert a new one with linkonce
209  // linkage!
210  Head = new GlobalVariable(
211  M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
212  Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain");
213  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
214  Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
215  Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
216  }
217 
218  return true;
219 }
220 
221 bool ShadowStackGCLowering::IsNullValue(Value *V) {
222  if (Constant *C = dyn_cast<Constant>(V))
223  return C->isNullValue();
224  return false;
225 }
226 
227 void ShadowStackGCLowering::CollectRoots(Function &F) {
228  // FIXME: Account for original alignment. Could fragment the root array.
229  // Approach 1: Null initialize empty slots at runtime. Yuck.
230  // Approach 2: Emit a map of the array instead of just a count.
231 
232  assert(Roots.empty() && "Not cleaned up?");
233 
235 
236  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
237  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
238  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
239  if (Function *F = CI->getCalledFunction())
240  if (F->getIntrinsicID() == Intrinsic::gcroot) {
241  std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
242  CI,
243  cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
244  if (IsNullValue(CI->getArgOperand(1)))
245  Roots.push_back(Pair);
246  else
247  MetaRoots.push_back(Pair);
248  }
249 
250  // Number roots with metadata (usually empty) at the beginning, so that the
251  // FrameMap::Meta array can be elided.
252  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
253 }
254 
255 GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
256  IRBuilder<> &B, Type *Ty,
257  Value *BasePtr, int Idx,
258  int Idx2,
259  const char *Name) {
260  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
261  ConstantInt::get(Type::getInt32Ty(Context), Idx),
262  ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
263  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
264 
265  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
266 
267  return dyn_cast<GetElementPtrInst>(Val);
268 }
269 
270 GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
271  IRBuilder<> &B, Type *Ty, Value *BasePtr,
272  int Idx, const char *Name) {
273  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
274  ConstantInt::get(Type::getInt32Ty(Context), Idx)};
275  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
276 
277  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
278 
279  return dyn_cast<GetElementPtrInst>(Val);
280 }
281 
282 /// runOnFunction - Insert code to maintain the shadow stack.
284  // Quick exit for functions that do not use the shadow stack GC.
285  if (!F.hasGC() ||
286  F.getGC() != std::string("shadow-stack"))
287  return false;
288 
289  LLVMContext &Context = F.getContext();
290 
291  // Find calls to llvm.gcroot.
292  CollectRoots(F);
293 
294  // If there are no roots in this function, then there is no need to add a
295  // stack map entry for it.
296  if (Roots.empty())
297  return false;
298 
299  // Build the constant map and figure the type of the shadow stack entry.
300  Value *FrameMap = GetFrameMap(F);
301  Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
302 
303  // Build the shadow stack entry at the very start of the function.
305  IRBuilder<> AtEntry(IP->getParent(), IP);
306 
307  Instruction *StackEntry =
308  AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
309 
310  while (isa<AllocaInst>(IP))
311  ++IP;
312  AtEntry.SetInsertPoint(IP->getParent(), IP);
313 
314  // Initialize the map pointer and load the current head of the shadow stack.
315  Instruction *CurrentHead =
316  AtEntry.CreateLoad(StackEntryTy->getPointerTo(), Head, "gc_currhead");
317  Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
318  StackEntry, 0, 1, "gc_frame.map");
319  AtEntry.CreateStore(FrameMap, EntryMapPtr);
320 
321  // After all the allocas...
322  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
323  // For each root, find the corresponding slot in the aggregate...
324  Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
325  StackEntry, 1 + I, "gc_root");
326 
327  // And use it in lieu of the alloca.
328  AllocaInst *OriginalAlloca = Roots[I].second;
329  SlotPtr->takeName(OriginalAlloca);
330  OriginalAlloca->replaceAllUsesWith(SlotPtr);
331  }
332 
333  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
334  // really necessary (the collector would never see the intermediate state at
335  // runtime), but it's nicer not to push the half-initialized entry onto the
336  // shadow stack.
337  while (isa<StoreInst>(IP))
338  ++IP;
339  AtEntry.SetInsertPoint(IP->getParent(), IP);
340 
341  // Push the entry onto the shadow stack.
342  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
343  StackEntry, 0, 0, "gc_frame.next");
344  Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
345  StackEntry, 0, "gc_newhead");
346  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
347  AtEntry.CreateStore(NewHeadVal, Head);
348 
349  // For each instruction that escapes...
350  EscapeEnumerator EE(F, "gc_cleanup");
351  while (IRBuilder<> *AtExit = EE.Next()) {
352  // Pop the entry from the shadow stack. Don't reuse CurrentHead from
353  // AtEntry, since that would make the value live for the entire function.
354  Instruction *EntryNextPtr2 =
355  CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
356  "gc_frame.next");
357  Value *SavedHead = AtExit->CreateLoad(StackEntryTy->getPointerTo(),
358  EntryNextPtr2, "gc_savedhead");
359  AtExit->CreateStore(SavedHead, Head);
360  }
361 
362  // Delete the original allocas (which are no longer used) and the intrinsic
363  // calls (which are no longer valid). Doing this last avoids invalidating
364  // iterators.
365  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
366  Roots[I].first->eraseFromParent();
367  Roots[I].second->eraseFromParent();
368  }
369 
370  Roots.clear();
371  return true;
372 }
uint64_t CallInst * C
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant *> IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1153
Shadow Stack GC Lowering
iterator end()
Definition: Function.h:687
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
void initializeShadowStackGCLoweringPass(PassRegistry &)
F(f)
FunctionPass * createShadowStackGCLoweringPass()
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC...
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:1014
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:289
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
EscapeEnumerator - This is a little algorithm to find all escape points from a function so that "fina...
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Class to represent struct types.
Definition: DerivedTypes.h:238
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
An analysis pass which caches information about the entire Module.
Definition: GCMetadata.h:152
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
const std::string & getGC() const
Definition: Function.cpp:478
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) INITIALIZE_PASS_END(ShadowStackGCLowering
void setBody(ArrayRef< Type *> Elements, bool isPacked=false)
Specify a body for an opaque identified type.
Definition: Type.cpp:373
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
iterator begin()
Definition: Function.h:685
Class to represent pointers.
Definition: DerivedTypes.h:579
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1804
const BasicBlock & getEntryBlock() const
Definition: Function.h:669
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
static bool runOnFunction(Function &F, bool PostInlining)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static Constant * get(StructType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:1075
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:224
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1676
Iterator for intrusive lists based on ilist_node.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:594
#define DEBUG_TYPE
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Keep one copy of function when linking (inline)
Definition: GlobalValue.h:50
Module.h This file contains the declarations for the Module class.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
std::string utostr(uint64_t X, bool isNeg=false)
Definition: StringExtras.h:223
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
bool hasGC() const
hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm to use during code generatio...
Definition: Function.h:354
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:180
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:587
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:74
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:441
Root of the metadata hierarchy.
Definition: Metadata.h:57
IntegerType * Int32Ty
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
an instruction to allocate memory on the stack
Definition: Instructions.h:59
void resize(size_type N)
Definition: SmallVector.h:344