LLVM  16.0.0git
SanitizerBinaryMetadata.cpp
Go to the documentation of this file.
1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of SanitizerBinaryMetadata.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/IR/Constant.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/GlobalVariable.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/InitializePasses.h"
35 #include "llvm/Pass.h"
37 #include "llvm/Support/Debug.h"
40 
41 #include <array>
42 #include <cstdint>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "sanmd"
47 
48 namespace {
49 
50 //===--- Constants --------------------------------------------------------===//
51 
52 constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits
53 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
54 constexpr int kCtorDtorPriority = 2;
55 
56 // Pairs of names of initialization callback functions and which section
57 // contains the relevant metadata.
58 class MetadataInfo {
59 public:
60  const StringRef FunctionPrefix;
61  const StringRef SectionSuffix;
62  const uint32_t FeatureMask;
63 
64  static const MetadataInfo Covered;
65  static const MetadataInfo Atomics;
66 
67 private:
68  // Forbid construction elsewhere.
69  explicit constexpr MetadataInfo(StringRef FunctionPrefix,
70  StringRef SectionSuffix, int Feature)
71  : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix),
72  FeatureMask(Feature != -1 ? (1u << Feature) : 0) {}
73 };
74 const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered",
75  "sanmd_covered", -1};
76 const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics",
77  "sanmd_atomics", 0};
78 
79 // The only instances of MetadataInfo are the constants above, so a set of
80 // them may simply store pointers to them. To deterministically generate code,
81 // we need to use a set with stable iteration order, such as SetVector.
82 using MetadataInfoSet = SetVector<const MetadataInfo *>;
83 
84 //===--- Command-line options ---------------------------------------------===//
85 
86 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
87  cl::desc("Emit PCs for covered functions."),
88  cl::Hidden, cl::init(false));
89 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
90  cl::desc("Emit PCs for atomic operations."),
91  cl::Hidden, cl::init(false));
92 
93 //===--- Statistics -------------------------------------------------------===//
94 
95 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
96 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
97 
98 //===----------------------------------------------------------------------===//
99 
100 // Apply opt overrides.
102 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
103  Opts.Covered |= ClEmitCovered;
104  Opts.Atomics |= ClEmitAtomics;
105  return std::move(Opts);
106 }
107 
108 class SanitizerBinaryMetadata {
109 public:
110  SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts)
111  : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
112  TargetTriple(M.getTargetTriple()), IRB(M.getContext()) {
113  // FIXME: Make it work with other formats.
114  assert(TargetTriple.isOSBinFormatELF() && "ELF only");
115  }
116 
117  bool run();
118 
119 private:
120  // Return enabled feature mask of per-instruction metadata.
121  uint32_t getEnabledPerInstructionFeature() const {
122  uint32_t FeatureMask = 0;
123  if (Options.Atomics)
124  FeatureMask |= MetadataInfo::Atomics.FeatureMask;
125  return FeatureMask;
126  }
127 
128  uint32_t getVersion() const {
129  uint32_t Version = kVersionBase;
130  const auto CM = Mod.getCodeModel();
131  if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
132  Version |= kVersionPtrSizeRel;
133  return Version;
134  }
135 
136  void runOn(Function &F, MetadataInfoSet &MIS);
137 
138  // Determines which set of metadata to collect for this instruction.
139  //
140  // Returns true if covered metadata is required to unambiguously interpret
141  // other metadata. For example, if we are interested in atomics metadata, any
142  // function with memory operations (atomic or not) requires covered metadata
143  // to determine if a memory operation is atomic or not in modules compiled
144  // with SanitizerBinaryMetadata.
145  bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB);
146 
147  // Get start/end section marker pointer.
148  GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
149 
150  // Create a 0-sized object in a section, so that the section is not discarded
151  // if all inputs have been discarded.
152  void createZeroSizedObjectInSection(Type *Ty, StringRef SectionSuffix);
153 
154  // Returns the target-dependent section name.
155  StringRef getSectionName(StringRef SectionSuffix);
156 
157  // Returns the section start marker name.
158  Twine getSectionStart(StringRef SectionSuffix);
159 
160  // Returns the section end marker name.
161  Twine getSectionEnd(StringRef SectionSuffix);
162 
163  Module &Mod;
165  const Triple TargetTriple;
166  IRBuilder<> IRB;
167 };
168 
170  MetadataInfoSet MIS;
171 
172  for (Function &F : Mod)
173  runOn(F, MIS);
174 
175  if (MIS.empty())
176  return false;
177 
178  //
179  // Setup constructors and call all initialization functions for requested
180  // metadata features.
181  //
182 
183  auto *Int8PtrTy = IRB.getInt8PtrTy();
184  auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy);
185  auto *Int32Ty = IRB.getInt32Ty();
186  const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy};
187  auto *Version = ConstantInt::get(Int32Ty, getVersion());
188 
189  for (const MetadataInfo *MI : MIS) {
190  const std::array<Value *, InitTypes.size()> InitArgs = {
191  Version,
192  getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy),
193  getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy),
194  };
195  Function *Ctor =
197  Mod, (MI->FunctionPrefix + ".module_ctor").str(),
198  (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs)
199  .first;
200  Function *Dtor =
202  Mod, (MI->FunctionPrefix + ".module_dtor").str(),
203  (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs)
204  .first;
205  Constant *CtorData = nullptr;
206  Constant *DtorData = nullptr;
207  if (TargetTriple.supportsCOMDAT()) {
208  // Use COMDAT to deduplicate constructor/destructor function.
209  Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
210  Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
211  CtorData = Ctor;
212  DtorData = Dtor;
213  }
214  appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData);
215  appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData);
216  createZeroSizedObjectInSection(Int8PtrTy, MI->SectionSuffix);
217  }
218 
219  return true;
220 }
221 
222 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
223  if (F.empty())
224  return;
225  if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
226  return;
227  // Don't touch available_externally functions, their actual body is elsewhere.
228  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
229  return;
230 
231  MDBuilder MDB(F.getContext());
232 
233  // The metadata features enabled for this function, stored along covered
234  // metadata (if enabled).
235  uint32_t PerInstrFeatureMask = getEnabledPerInstructionFeature();
236  // Don't emit unnecessary covered metadata for all functions to save space.
237  bool RequiresCovered = false;
238  if (PerInstrFeatureMask) {
239  for (BasicBlock &BB : F)
240  for (Instruction &I : BB)
241  RequiresCovered |= runOn(I, MIS, MDB);
242  }
243 
244  // Covered metadata is always emitted if explicitly requested, otherwise only
245  // if some other metadata requires it to unambiguously interpret it for
246  // modules compiled with SanitizerBinaryMetadata.
247  if (Options.Covered || RequiresCovered) {
248  NumMetadataCovered++;
249  const auto *MI = &MetadataInfo::Covered;
250  MIS.insert(MI);
251  const StringRef Section = getSectionName(MI->SectionSuffix);
252  // The feature mask will be placed after the size (32 bit) of the function,
253  // so in total one covered entry will use `sizeof(void*) + 4 + 4`.
254  Constant *CFM = IRB.getInt32(PerInstrFeatureMask);
255  F.setMetadata(LLVMContext::MD_pcsections,
256  MDB.createPCSections({{Section, {CFM}}}));
257  }
258 }
259 
260 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
261  MDBuilder &MDB) {
263  bool RequiresCovered = false;
264 
265  if (Options.Atomics && I.mayReadOrWriteMemory()) {
266  auto SSID = getAtomicSyncScopeID(&I);
267  if (SSID.has_value() && SSID.value() != SyncScope::SingleThread) {
268  NumMetadataAtomics++;
269  InstMetadata.push_back(&MetadataInfo::Atomics);
270  }
271  RequiresCovered = true;
272  }
273 
274  // Attach MD_pcsections to instruction.
275  if (!InstMetadata.empty()) {
276  MIS.insert(InstMetadata.begin(), InstMetadata.end());
278  for (const auto &MI : InstMetadata)
279  Sections.push_back({getSectionName(MI->SectionSuffix), {}});
280  I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
281  }
282 
283  return RequiresCovered;
284 }
285 
287 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
288  auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
290  /*Initializer=*/nullptr, MarkerName);
291  Marker->setVisibility(GlobalValue::HiddenVisibility);
292  return Marker;
293 }
294 
295 void SanitizerBinaryMetadata::createZeroSizedObjectInSection(
296  Type *Ty, StringRef SectionSuffix) {
297  auto *DummyInit = ConstantAggregateZero::get(ArrayType::get(Ty, 0));
298  auto *DummyEntry = new GlobalVariable(Mod, DummyInit->getType(), true,
300  DummyInit, "__dummy_" + SectionSuffix);
301  DummyEntry->setSection(getSectionName(SectionSuffix));
302  DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
303  if (TargetTriple.supportsCOMDAT())
304  DummyEntry->setComdat(Mod.getOrInsertComdat(DummyEntry->getName()));
305  // Make sure the section isn't discarded by gc-sections.
306  appendToUsed(Mod, DummyEntry);
307 }
308 
309 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
310  // FIXME: Other TargetTriple (req. string pool)
311  return SectionSuffix;
312 }
313 
314 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
315  return "__start_" + SectionSuffix;
316 }
317 
318 Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
319  return "__stop_" + SectionSuffix;
320 }
321 
322 } // namespace
323 
324 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
326  : Options(std::move(Opts)) {}
327 
330  SanitizerBinaryMetadata Pass(M, Options);
331  if (Pass.run())
332  return PreservedAnalyses::none();
333  return PreservedAnalyses::all();
334 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Instrumentation.h
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::GlobalObject::setComdat
void setComdat(Comdat *C)
Definition: Globals.cpp:189
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
Pass
print lazy value Lazy Value Info Printer Pass
Definition: LazyValueInfo.cpp:1999
Metadata.h
llvm::GlobalValue::HiddenVisibility
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
llvm::Function
Definition: Function.h:60
StringRef.h
Pass.h
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::Module::getCodeModel
Optional< CodeModel::Model > getCodeModel() const
Returns the code model (tiny, small, kernel, medium or large model)
Definition: Module.cpp:619
Module.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::appendToGlobalDtors
void appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Same as appendToGlobalCtors(), but for global dtors.
Definition: ModuleUtils.cpp:71
Instruction.h
CommandLine.h
GlobalValue.h
Twine.h
llvm::Instruction
Definition: Instruction.h:42
MDBuilder.h
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::IndexedInstrProf::Version
const uint64_t Version
Definition: InstrProf.h:1056
Type.h
llvm::SanitizerBinaryMetadataPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SanitizerBinaryMetadata.cpp:329
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
llvm::getAtomicSyncScopeID
std::optional< SyncScope::ID > getAtomicSyncScopeID(const Instruction *I)
A helper function that returns an atomic operation's sync scope; returns None if it is not an atomic ...
Definition: Instructions.h:5416
llvm::MDBuilder::createPCSections
MDNode * createPCSections(ArrayRef< PCSection > Sections)
Return metadata for PC sections.
Definition: MDBuilder.cpp:161
llvm::cl::opt< bool >
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::createSanitizerCtorAndInitFunctions
std::pair< Function *, FunctionCallee > createSanitizerCtorAndInitFunctions(Module &M, StringRef CtorName, StringRef InitName, ArrayRef< Type * > InitArgTypes, ArrayRef< Value * > InitArgs, StringRef VersionCheckName=StringRef())
Creates sanitizer constructor function, and calls sanitizer's init function from it.
Definition: ModuleUtils.cpp:138
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::SanitizerBinaryMetadataOptions::Covered
bool Covered
Definition: Instrumentation.h:155
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::appendToUsed
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.
Definition: ModuleUtils.cpp:107
llvm::Module::getOrInsertComdat
Comdat * getOrInsertComdat(StringRef Name)
Return the Comdat in the module with the specified name.
Definition: Module.cpp:583
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Triple.h
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
uint32_t
llvm::GlobalValue::AvailableExternallyLinkage
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:49
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std
Definition: BitVector.h:851
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::SanitizerBinaryMetadataOptions
Options for SanitizerBinaryMetadata.
Definition: Instrumentation.h:154
llvm::SyncScope::SingleThread
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
GlobalVariable.h
Function.h
llvm::MDBuilder
Definition: MDBuilder.h:36
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
SmallVector.h
ModuleUtils.h
SanitizerBinaryMetadata.h
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
LLVMContext.h
llvm::appendToGlobalCtors
void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
Definition: ModuleUtils.cpp:67
llvm::cl::desc
Definition: CommandLine.h:413
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1587
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
SetVector.h
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809