LLVM  16.0.0git
JMCInstrumenter.cpp
Go to the documentation of this file.
1 //===- JMCInstrumenter.cpp - JMC Instrumentation --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // JMCInstrumenter pass:
10 // - instrument each function with a call to __CheckForDebuggerJustMyCode. The
11 // sole argument should be defined in .msvcjmc. Each flag is 1 byte initilized
12 // to 1.
13 // - create the dummy COMDAT function __JustMyCode_Default to prevent linking
14 // error if __CheckForDebuggerJustMyCode is not available.
15 // - For MSVC:
16 // add "/alternatename:__CheckForDebuggerJustMyCode=__JustMyCode_Default" to
17 // "llvm.linker.options"
18 // For ELF:
19 // Rename __JustMyCode_Default to __CheckForDebuggerJustMyCode and mark it as
20 // weak symbol.
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/IR/DIBuilder.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/InitializePasses.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/DJB.h"
37 #include "llvm/Support/Path.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "jmc-instrument"
43 
44 namespace {
45 struct JMCInstrumenter : public ModulePass {
46  static char ID;
47  JMCInstrumenter() : ModulePass(ID) {
49  }
50  bool runOnModule(Module &M) override;
51 };
52 char JMCInstrumenter::ID = 0;
53 } // namespace
54 
56  JMCInstrumenter, DEBUG_TYPE,
57  "Instrument function entry with call to __CheckForDebuggerJustMyCode",
58  false, false)
59 
60 ModulePass *llvm::createJMCInstrumenterPass() { return new JMCInstrumenter(); }
61 
62 namespace {
63 const char CheckFunctionName[] = "__CheckForDebuggerJustMyCode";
64 
65 std::string getFlagName(DISubprogram &SP, bool UseX86FastCall) {
66  // absolute windows path: windows_backslash
67  // relative windows backslash path: windows_backslash
68  // relative windows slash path: posix
69  // absolute posix path: posix
70  // relative posix path: posix
71  sys::path::Style PathStyle =
73  SP.getDirectory().contains("\\") ||
74  SP.getFilename().contains("\\")
77  // Best effort path normalization. This is to guarantee an unique flag symbol
78  // is produced for the same directory. Some builds may want to use relative
79  // paths, or paths with a specific prefix (see the -fdebug-compilation-dir
80  // flag), so only hash paths in debuginfo. Don't expand them to absolute
81  // paths.
82  SmallString<256> FilePath(SP.getDirectory());
83  sys::path::append(FilePath, PathStyle, SP.getFilename());
84  sys::path::native(FilePath, PathStyle);
85  sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, PathStyle);
86 
87  // The naming convention for the flag name is __<hash>_<file name> with '.' in
88  // <file name> replaced with '@'. For example C:\file.any.c would have a flag
89  // __D032E919_file@any@c. The naming convention match MSVC's format however
90  // the match is not required to make JMC work. The hashing function used here
91  // is different from MSVC's.
92 
93  std::string Suffix;
94  for (auto C : sys::path::filename(FilePath, PathStyle))
95  Suffix.push_back(C == '.' ? '@' : C);
96 
97  sys::path::remove_filename(FilePath, PathStyle);
98  return (UseX86FastCall ? "_" : "__") +
99  utohexstr(djbHash(FilePath), /*LowerCase=*/false,
100  /*Width=*/8) +
101  "_" + Suffix;
102 }
103 
104 void attachDebugInfo(GlobalVariable &GV, DISubprogram &SP) {
105  Module &M = *GV.getParent();
106  DICompileUnit *CU = SP.getUnit();
107  assert(CU);
108  DIBuilder DB(M, false, CU);
109 
110  auto *DType =
111  DB.createBasicType("unsigned char", 8, dwarf::DW_ATE_unsigned_char,
112  llvm::DINode::FlagArtificial);
113 
114  auto *DGVE = DB.createGlobalVariableExpression(
115  CU, GV.getName(), /*LinkageName=*/StringRef(), SP.getFile(),
116  /*LineNo=*/0, DType, /*IsLocalToUnit=*/true, /*IsDefined=*/true);
117  GV.addMetadata(LLVMContext::MD_dbg, *DGVE);
118  DB.finalize();
119 }
120 
121 FunctionType *getCheckFunctionType(LLVMContext &Ctx) {
122  Type *VoidTy = Type::getVoidTy(Ctx);
123  PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx);
124  return FunctionType::get(VoidTy, VoidPtrTy, false);
125 }
126 
127 Function *createDefaultCheckFunction(Module &M, bool UseX86FastCall) {
128  LLVMContext &Ctx = M.getContext();
129  const char *DefaultCheckFunctionName =
130  UseX86FastCall ? "_JustMyCode_Default" : "__JustMyCode_Default";
131  // Create the function.
132  Function *DefaultCheckFunc =
133  Function::Create(getCheckFunctionType(Ctx), GlobalValue::ExternalLinkage,
134  DefaultCheckFunctionName, &M);
136  DefaultCheckFunc->addParamAttr(0, Attribute::NoUndef);
137  if (UseX86FastCall)
138  DefaultCheckFunc->addParamAttr(0, Attribute::InReg);
139 
140  BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", DefaultCheckFunc);
141  ReturnInst::Create(Ctx, EntryBB);
142  return DefaultCheckFunc;
143 }
144 } // namespace
145 
146 bool JMCInstrumenter::runOnModule(Module &M) {
147  bool Changed = false;
148  LLVMContext &Ctx = M.getContext();
149  Triple ModuleTriple(M.getTargetTriple());
150  bool IsMSVC = ModuleTriple.isKnownWindowsMSVCEnvironment();
151  bool IsELF = ModuleTriple.isOSBinFormatELF();
152  assert((IsELF || IsMSVC) && "Unsupported triple for JMC");
153  bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86;
154  const char *const FlagSymbolSection = IsELF ? ".just.my.code" : ".msvcjmc";
155 
156  GlobalValue *CheckFunction = nullptr;
158  for (auto &F : M) {
159  if (F.isDeclaration())
160  continue;
161  auto *SP = F.getSubprogram();
162  if (!SP)
163  continue;
164 
165  Constant *&Flag = SavedFlags[SP];
166  if (!Flag) {
167  std::string FlagName = getFlagName(*SP, UseX86FastCall);
168  IntegerType *FlagTy = Type::getInt8Ty(Ctx);
169  Flag = M.getOrInsertGlobal(FlagName, FlagTy, [&] {
170  // FIXME: Put the GV in comdat and have linkonce_odr linkage to save
171  // .msvcjmc section space? maybe not worth it.
172  GlobalVariable *GV = new GlobalVariable(
173  M, FlagTy, /*isConstant=*/false, GlobalValue::InternalLinkage,
174  ConstantInt::get(FlagTy, 1), FlagName);
175  GV->setSection(FlagSymbolSection);
176  GV->setAlignment(Align(1));
178  attachDebugInfo(*GV, *SP);
179  return GV;
180  });
181  }
182 
183  if (!CheckFunction) {
184  Function *DefaultCheckFunc =
185  createDefaultCheckFunction(M, UseX86FastCall);
186  if (IsELF) {
187  DefaultCheckFunc->setName(CheckFunctionName);
188  DefaultCheckFunc->setLinkage(GlobalValue::WeakAnyLinkage);
189  CheckFunction = DefaultCheckFunc;
190  } else {
191  assert(!M.getFunction(CheckFunctionName) &&
192  "JMC instrument more than once?");
193  auto *CheckFunc = cast<Function>(
194  M.getOrInsertFunction(CheckFunctionName, getCheckFunctionType(Ctx))
195  .getCallee());
196  CheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
197  CheckFunc->addParamAttr(0, Attribute::NoUndef);
198  if (UseX86FastCall) {
199  CheckFunc->setCallingConv(CallingConv::X86_FastCall);
200  CheckFunc->addParamAttr(0, Attribute::InReg);
201  }
202  CheckFunction = CheckFunc;
203 
204  StringRef DefaultCheckFunctionName = DefaultCheckFunc->getName();
205  appendToUsed(M, {DefaultCheckFunc});
206  Comdat *C = M.getOrInsertComdat(DefaultCheckFunctionName);
207  C->setSelectionKind(Comdat::Any);
208  DefaultCheckFunc->setComdat(C);
209  // Add a linker option /alternatename to set the default implementation
210  // for the check function.
211  // https://devblogs.microsoft.com/oldnewthing/20200731-00/?p=104024
212  std::string AltOption = std::string("/alternatename:") +
213  CheckFunctionName + "=" +
214  DefaultCheckFunctionName.str();
215  llvm::Metadata *Ops[] = {llvm::MDString::get(Ctx, AltOption)};
216  MDTuple *N = MDNode::get(Ctx, Ops);
217  M.getOrInsertNamedMetadata("llvm.linker.options")->addOperand(N);
218  }
219  }
220  // FIXME: it would be nice to make CI scheduling boundary, although in
221  // practice it does not matter much.
222  auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction,
223  {Flag}, "", &*F.begin()->getFirstInsertionPt());
224  CI->addParamAttr(0, Attribute::NoUndef);
225  if (UseX86FastCall) {
226  CI->setCallingConv(CallingConv::X86_FastCall);
227  CI->addParamAttr(0, Attribute::InReg);
228  }
229 
230  Changed = true;
231  }
232  return Changed;
233 }
llvm::DIScope::getFilename
StringRef getFilename() const
Definition: DebugInfoMetadata.h:635
llvm::createJMCInstrumenterPass
ModulePass * createJMCInstrumenterPass()
JMC instrument pass.
llvm::GlobalObject::setComdat
void setComdat(Comdat *C)
Definition: Globals.cpp:194
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
DJB.h
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
llvm::DIBuilder
Definition: DIBuilder.h:41
llvm::sys::path::Style::posix
@ posix
DebugInfoMetadata.h
llvm::Function
Definition: Function.h:60
Pass.h
llvm::Triple::x86
@ x86
Definition: Triple.h:85
Path.h
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::sys::path::native
void native(const Twine &path, SmallVectorImpl< char > &result, Style style=Style::native)
Convert path to the native form.
Definition: Path.cpp:540
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::sys::path::Style::windows_backslash
@ windows_backslash
llvm::CallingConv::X86_FastCall
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:100
llvm::GlobalValue::setUnnamedAddr
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:217
INITIALIZE_PASS
INITIALIZE_PASS(JMCInstrumenter, DEBUG_TYPE, "Instrument function entry with call to __CheckForDebuggerJustMyCode", false, false) ModulePass *llvm
Definition: JMCInstrumenter.cpp:55
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1400
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::sys::path::append
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:456
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
SmallString.h
llvm::GlobalObject::setSection
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:248
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1517
llvm::MDTuple
Tuple of metadata.
Definition: Metadata.h:1329
llvm::Comdat::Any
@ Any
The linker may choose any COMDAT.
Definition: Comdat.h:36
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::initializeJMCInstrumenterPass
void initializeJMCInstrumenterPass(PassRegistry &)
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1360
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::sys::path::Style
Style
Definition: Path.h:27
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Comdat
Definition: Comdat.h:33
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::SmallString< 256 >
Passes.h
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::sys::path::remove_dots
bool remove_dots(SmallVectorImpl< char > &path, bool remove_dot_dot=false, Style style=Style::native)
In-place remove any '.
Definition: Path.cpp:715
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:636
llvm::GlobalValue::WeakAnyLinkage
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::DenseMap
Definition: DenseMap.h:714
StringExtras.h
llvm::appendToUsed
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.
Definition: ModuleUtils.cpp:105
DIBuilder.h
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1322
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:518
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:498
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:137
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
llvm::DIScope::getDirectory
StringRef getDirectory() const
Definition: DebugInfoMetadata.h:641
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::Function::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:566
Function.h
llvm::sys::path::has_root_name
bool has_root_name(const Twine &path, Style style=Style::native)
Has root name?
Definition: Path.cpp:615
llvm::ReturnInst::Create
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3044
llvm::StringRef::contains
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:407
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
Instructions.h
llvm::sys::path::filename
StringRef filename(StringRef path, Style style=Style::native)
Get filename.
Definition: Path.cpp:577
ModuleUtils.h
N
#define N
llvm::sys::path::remove_filename
void remove_filename(SmallVectorImpl< char > &path, Style style=Style::native)
Remove the last component from path unless it is the root dir.
Definition: Path.cpp:474
llvm::DISubprogram
Subprogram description.
Definition: DebugInfoMetadata.h:1803
DerivedTypes.h
llvm::StringRef::str
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:213
LLVMContext.h
llvm::djbHash
uint32_t djbHash(StringRef Buffer, uint32_t H=5381)
The Bernstein hash function used by the DWARF accelerator tables.
Definition: DJB.h:21
llvm::GlobalObject::setAlignment
void setAlignment(MaybeAlign Align)
Definition: Globals.cpp:126
DEBUG_TYPE
#define DEBUG_TYPE
Definition: JMCInstrumenter.cpp:42
CU
Definition: AArch64AsmBackend.cpp:504
InitializePasses.h
llvm::DIScope::getFile
DIFile * getFile() const
Definition: DebugInfoMetadata.h:479
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38