LLVM  14.0.0git
DataFlowSanitizer.cpp
Go to the documentation of this file.
1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11 /// analysis.
12 ///
13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14 /// class of bugs on its own. Instead, it provides a generic dynamic data flow
15 /// analysis framework to be used by clients to help detect application-specific
16 /// issues within their own code.
17 ///
18 /// The analysis is based on automatic propagation of data flow labels (also
19 /// known as taint labels) through a program as it performs computation.
20 ///
21 /// Argument and return value labels are passed through TLS variables
22 /// __dfsan_arg_tls and __dfsan_retval_tls.
23 ///
24 /// Each byte of application memory is backed by a shadow memory byte. The
25 /// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26 /// laid out as follows:
27 ///
28 /// +--------------------+ 0x800000000000 (top of memory)
29 /// | application 3 |
30 /// +--------------------+ 0x700000000000
31 /// | invalid |
32 /// +--------------------+ 0x610000000000
33 /// | origin 1 |
34 /// +--------------------+ 0x600000000000
35 /// | application 2 |
36 /// +--------------------+ 0x510000000000
37 /// | shadow 1 |
38 /// +--------------------+ 0x500000000000
39 /// | invalid |
40 /// +--------------------+ 0x400000000000
41 /// | origin 3 |
42 /// +--------------------+ 0x300000000000
43 /// | shadow 3 |
44 /// +--------------------+ 0x200000000000
45 /// | origin 2 |
46 /// +--------------------+ 0x110000000000
47 /// | invalid |
48 /// +--------------------+ 0x100000000000
49 /// | shadow 2 |
50 /// +--------------------+ 0x010000000000
51 /// | application 1 |
52 /// +--------------------+ 0x000000000000
53 ///
54 /// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55 /// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
56 ///
57 /// For more information, please refer to the design document:
58 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
59 //
60 //===----------------------------------------------------------------------===//
61 
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/DenseSet.h"
66 #include "llvm/ADT/None.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallVector.h"
69 #include "llvm/ADT/StringExtras.h"
70 #include "llvm/ADT/StringRef.h"
71 #include "llvm/ADT/Triple.h"
72 #include "llvm/ADT/iterator.h"
74 #include "llvm/IR/Argument.h"
75 #include "llvm/IR/Attributes.h"
76 #include "llvm/IR/BasicBlock.h"
77 #include "llvm/IR/Constant.h"
78 #include "llvm/IR/Constants.h"
79 #include "llvm/IR/DataLayout.h"
80 #include "llvm/IR/DerivedTypes.h"
81 #include "llvm/IR/Dominators.h"
82 #include "llvm/IR/Function.h"
83 #include "llvm/IR/GlobalAlias.h"
84 #include "llvm/IR/GlobalValue.h"
85 #include "llvm/IR/GlobalVariable.h"
86 #include "llvm/IR/IRBuilder.h"
87 #include "llvm/IR/InlineAsm.h"
88 #include "llvm/IR/InstVisitor.h"
89 #include "llvm/IR/InstrTypes.h"
90 #include "llvm/IR/Instruction.h"
91 #include "llvm/IR/Instructions.h"
92 #include "llvm/IR/IntrinsicInst.h"
93 #include "llvm/IR/LLVMContext.h"
94 #include "llvm/IR/MDBuilder.h"
95 #include "llvm/IR/Module.h"
96 #include "llvm/IR/PassManager.h"
97 #include "llvm/IR/Type.h"
98 #include "llvm/IR/User.h"
99 #include "llvm/IR/Value.h"
100 #include "llvm/InitializePasses.h"
101 #include "llvm/Pass.h"
102 #include "llvm/Support/Alignment.h"
103 #include "llvm/Support/Casting.h"
111 #include <algorithm>
112 #include <cassert>
113 #include <cstddef>
114 #include <cstdint>
115 #include <iterator>
116 #include <memory>
117 #include <set>
118 #include <string>
119 #include <utility>
120 #include <vector>
121 
122 using namespace llvm;
123 
124 // This must be consistent with ShadowWidthBits.
125 static const Align ShadowTLSAlignment = Align(2);
126 
127 static const Align MinOriginAlignment = Align(4);
128 
129 // The size of TLS variables. These constants must be kept in sync with the ones
130 // in dfsan.cpp.
131 static const unsigned ArgTLSSize = 800;
132 static const unsigned RetvalTLSSize = 800;
133 
134 // The -dfsan-preserve-alignment flag controls whether this pass assumes that
135 // alignment requirements provided by the input IR are correct. For example,
136 // if the input IR contains a load with alignment 8, this flag will cause
137 // the shadow load to have alignment 16. This flag is disabled by default as
138 // we have unfortunately encountered too much code (including Clang itself;
139 // see PR14291) which performs misaligned access.
141  "dfsan-preserve-alignment",
142  cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
143  cl::init(false));
144 
145 // The ABI list files control how shadow parameters are passed. The pass treats
146 // every function labelled "uninstrumented" in the ABI list file as conforming
147 // to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
148 // additional annotations for those functions, a call to one of those functions
149 // will produce a warning message, as the labelling behaviour of the function is
150 // unknown. The other supported annotations for uninstrumented functions are
151 // "functional" and "discard", which are described below under
152 // DataFlowSanitizer::WrapperKind.
153 // Functions will often be labelled with both "uninstrumented" and one of
154 // "functional" or "discard". This will leave the function unchanged by this
155 // pass, and create a wrapper function that will call the original.
156 //
157 // Instrumented functions can also be annotated as "force_zero_labels", which
158 // will make all shadow and return values set zero labels.
159 // Functions should never be labelled with both "force_zero_labels" and
160 // "uninstrumented" or any of the unistrumented wrapper kinds.
162  "dfsan-abilist",
163  cl::desc("File listing native ABI functions and how the pass treats them"),
164  cl::Hidden);
165 
166 // Controls whether the pass includes or ignores the labels of pointers in load
167 // instructions.
169  "dfsan-combine-pointer-labels-on-load",
170  cl::desc("Combine the label of the pointer with the label of the data when "
171  "loading from memory."),
172  cl::Hidden, cl::init(true));
173 
174 // Controls whether the pass includes or ignores the labels of pointers in
175 // stores instructions.
177  "dfsan-combine-pointer-labels-on-store",
178  cl::desc("Combine the label of the pointer with the label of the data when "
179  "storing in memory."),
180  cl::Hidden, cl::init(false));
181 
182 // Controls whether the pass propagates labels of offsets in GEP instructions.
184  "dfsan-combine-offset-labels-on-gep",
185  cl::desc(
186  "Combine the label of the offset with the label of the pointer when "
187  "doing pointer arithmetic."),
188  cl::Hidden, cl::init(true));
189 
191  "dfsan-debug-nonzero-labels",
192  cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
193  "load or return with a nonzero label"),
194  cl::Hidden);
195 
196 // Experimental feature that inserts callbacks for certain data events.
197 // Currently callbacks are only inserted for loads, stores, memory transfers
198 // (i.e. memcpy and memmove), and comparisons.
199 //
200 // If this flag is set to true, the user must provide definitions for the
201 // following callback functions:
202 // void __dfsan_load_callback(dfsan_label Label, void* addr);
203 // void __dfsan_store_callback(dfsan_label Label, void* addr);
204 // void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
205 // void __dfsan_cmp_callback(dfsan_label CombinedLabel);
207  "dfsan-event-callbacks",
208  cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
209  cl::Hidden, cl::init(false));
210 
211 // Controls whether the pass tracks the control flow of select instructions.
213  "dfsan-track-select-control-flow",
214  cl::desc("Propagate labels from condition values of select instructions "
215  "to results."),
216  cl::Hidden, cl::init(true));
217 
218 // TODO: This default value follows MSan. DFSan may use a different value.
220  "dfsan-instrument-with-call-threshold",
221  cl::desc("If the function being instrumented requires more than "
222  "this number of origin stores, use callbacks instead of "
223  "inline checks (-1 means never use callbacks)."),
224  cl::Hidden, cl::init(3500));
225 
226 // Controls how to track origins.
227 // * 0: do not track origins.
228 // * 1: track origins at memory store operations.
229 // * 2: track origins at memory load and store operations.
230 // TODO: track callsites.
231 static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
232  cl::desc("Track origins of labels"),
233  cl::Hidden, cl::init(0));
234 
236  // Types of GlobalVariables are always pointer types.
237  Type *GType = G.getValueType();
238  // For now we support excluding struct types only.
239  if (StructType *SGType = dyn_cast<StructType>(GType)) {
240  if (!SGType->isLiteral())
241  return SGType->getName();
242  }
243  return "<unknown type>";
244 }
245 
246 namespace {
247 
248 // Memory map parameters used in application-to-shadow address calculation.
249 // Offset = (Addr & ~AndMask) ^ XorMask
250 // Shadow = ShadowBase + Offset
251 // Origin = (OriginBase + Offset) & ~3ULL
252 struct MemoryMapParams {
253  uint64_t AndMask;
254  uint64_t XorMask;
255  uint64_t ShadowBase;
256  uint64_t OriginBase;
257 };
258 
259 } // end anonymous namespace
260 
261 // x86_64 Linux
262 // NOLINTNEXTLINE(readability-identifier-naming)
263 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
264  0, // AndMask (not used)
265  0x500000000000, // XorMask
266  0, // ShadowBase (not used)
267  0x100000000000, // OriginBase
268 };
269 
270 namespace {
271 
272 class DFSanABIList {
273  std::unique_ptr<SpecialCaseList> SCL;
274 
275 public:
276  DFSanABIList() = default;
277 
278  void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
279 
280  /// Returns whether either this function or its source file are listed in the
281  /// given category.
282  bool isIn(const Function &F, StringRef Category) const {
283  return isIn(*F.getParent(), Category) ||
284  SCL->inSection("dataflow", "fun", F.getName(), Category);
285  }
286 
287  /// Returns whether this global alias is listed in the given category.
288  ///
289  /// If GA aliases a function, the alias's name is matched as a function name
290  /// would be. Similarly, aliases of globals are matched like globals.
291  bool isIn(const GlobalAlias &GA, StringRef Category) const {
292  if (isIn(*GA.getParent(), Category))
293  return true;
294 
295  if (isa<FunctionType>(GA.getValueType()))
296  return SCL->inSection("dataflow", "fun", GA.getName(), Category);
297 
298  return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
299  SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
300  Category);
301  }
302 
303  /// Returns whether this module is listed in the given category.
304  bool isIn(const Module &M, StringRef Category) const {
305  return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
306  }
307 };
308 
309 /// TransformedFunction is used to express the result of transforming one
310 /// function type into another. This struct is immutable. It holds metadata
311 /// useful for updating calls of the old function to the new type.
312 struct TransformedFunction {
313  TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
314  std::vector<unsigned> ArgumentIndexMapping)
315  : OriginalType(OriginalType), TransformedType(TransformedType),
316  ArgumentIndexMapping(ArgumentIndexMapping) {}
317 
318  // Disallow copies.
319  TransformedFunction(const TransformedFunction &) = delete;
320  TransformedFunction &operator=(const TransformedFunction &) = delete;
321 
322  // Allow moves.
323  TransformedFunction(TransformedFunction &&) = default;
324  TransformedFunction &operator=(TransformedFunction &&) = default;
325 
326  /// Type of the function before the transformation.
327  FunctionType *OriginalType;
328 
329  /// Type of the function after the transformation.
330  FunctionType *TransformedType;
331 
332  /// Transforming a function may change the position of arguments. This
333  /// member records the mapping from each argument's old position to its new
334  /// position. Argument positions are zero-indexed. If the transformation
335  /// from F to F' made the first argument of F into the third argument of F',
336  /// then ArgumentIndexMapping[0] will equal 2.
337  std::vector<unsigned> ArgumentIndexMapping;
338 };
339 
340 /// Given function attributes from a call site for the original function,
341 /// return function attributes appropriate for a call to the transformed
342 /// function.
344 transformFunctionAttributes(const TransformedFunction &TransformedFunction,
345  LLVMContext &Ctx, AttributeList CallSiteAttrs) {
346 
347  // Construct a vector of AttributeSet for each function argument.
348  std::vector<llvm::AttributeSet> ArgumentAttributes(
349  TransformedFunction.TransformedType->getNumParams());
350 
351  // Copy attributes from the parameter of the original function to the
352  // transformed version. 'ArgumentIndexMapping' holds the mapping from
353  // old argument position to new.
354  for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
355  I < IE; ++I) {
356  unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
357  ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
358  }
359 
360  // Copy annotations on varargs arguments.
361  for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
362  IE = CallSiteAttrs.getNumAttrSets();
363  I < IE; ++I) {
364  ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
365  }
366 
367  return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
368  CallSiteAttrs.getRetAttrs(),
369  llvm::makeArrayRef(ArgumentAttributes));
370 }
371 
372 class DataFlowSanitizer {
373  friend struct DFSanFunction;
374  friend class DFSanVisitor;
375 
376  enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
377 
378  enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
379 
380  /// How should calls to uninstrumented functions be handled?
381  enum WrapperKind {
382  /// This function is present in an uninstrumented form but we don't know
383  /// how it should be handled. Print a warning and call the function anyway.
384  /// Don't label the return value.
385  WK_Warning,
386 
387  /// This function does not write to (user-accessible) memory, and its return
388  /// value is unlabelled.
389  WK_Discard,
390 
391  /// This function does not write to (user-accessible) memory, and the label
392  /// of its return value is the union of the label of its arguments.
393  WK_Functional,
394 
395  /// Instead of calling the function, a custom wrapper __dfsw_F is called,
396  /// where F is the name of the function. This function may wrap the
397  /// original function or provide its own implementation. WK_Custom uses an
398  /// extra pointer argument to return the shadow. This allows the wrapped
399  /// form of the function type to be expressed in C.
400  WK_Custom
401  };
402 
403  Module *Mod;
404  LLVMContext *Ctx;
405  Type *Int8Ptr;
406  IntegerType *OriginTy;
407  PointerType *OriginPtrTy;
408  ConstantInt *ZeroOrigin;
409  /// The shadow type for all primitive types and vector types.
410  IntegerType *PrimitiveShadowTy;
411  PointerType *PrimitiveShadowPtrTy;
412  IntegerType *IntptrTy;
413  ConstantInt *ZeroPrimitiveShadow;
414  Constant *ArgTLS;
415  ArrayType *ArgOriginTLSTy;
416  Constant *ArgOriginTLS;
417  Constant *RetvalTLS;
418  Constant *RetvalOriginTLS;
419  FunctionType *DFSanUnionLoadFnTy;
420  FunctionType *DFSanLoadLabelAndOriginFnTy;
421  FunctionType *DFSanUnimplementedFnTy;
422  FunctionType *DFSanSetLabelFnTy;
423  FunctionType *DFSanNonzeroLabelFnTy;
424  FunctionType *DFSanVarargWrapperFnTy;
425  FunctionType *DFSanCmpCallbackFnTy;
426  FunctionType *DFSanLoadStoreCallbackFnTy;
427  FunctionType *DFSanMemTransferCallbackFnTy;
428  FunctionType *DFSanChainOriginFnTy;
429  FunctionType *DFSanChainOriginIfTaintedFnTy;
430  FunctionType *DFSanMemOriginTransferFnTy;
431  FunctionType *DFSanMaybeStoreOriginFnTy;
432  FunctionCallee DFSanUnionLoadFn;
433  FunctionCallee DFSanLoadLabelAndOriginFn;
434  FunctionCallee DFSanUnimplementedFn;
435  FunctionCallee DFSanSetLabelFn;
436  FunctionCallee DFSanNonzeroLabelFn;
437  FunctionCallee DFSanVarargWrapperFn;
438  FunctionCallee DFSanLoadCallbackFn;
439  FunctionCallee DFSanStoreCallbackFn;
440  FunctionCallee DFSanMemTransferCallbackFn;
441  FunctionCallee DFSanCmpCallbackFn;
442  FunctionCallee DFSanChainOriginFn;
443  FunctionCallee DFSanChainOriginIfTaintedFn;
444  FunctionCallee DFSanMemOriginTransferFn;
445  FunctionCallee DFSanMaybeStoreOriginFn;
446  SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
447  MDNode *ColdCallWeights;
448  MDNode *OriginStoreWeights;
449  DFSanABIList ABIList;
450  DenseMap<Value *, Function *> UnwrappedFnMap;
451  AttrBuilder ReadOnlyNoneAttrs;
452 
453  /// Memory map parameters used in calculation mapping application addresses
454  /// to shadow addresses and origin addresses.
455  const MemoryMapParams *MapParams;
456 
457  Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
458  Value *getShadowAddress(Value *Addr, Instruction *Pos);
459  Value *getShadowAddress(Value *Addr, Instruction *Pos, Value *ShadowOffset);
460  std::pair<Value *, Value *>
461  getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
462  bool isInstrumented(const Function *F);
463  bool isInstrumented(const GlobalAlias *GA);
464  bool isForceZeroLabels(const Function *F);
465  FunctionType *getTrampolineFunctionType(FunctionType *T);
466  TransformedFunction getCustomFunctionType(FunctionType *T);
467  WrapperKind getWrapperKind(Function *F);
468  void addGlobalNameSuffix(GlobalValue *GV);
469  Function *buildWrapperFunction(Function *F, StringRef NewFName,
470  GlobalValue::LinkageTypes NewFLink,
471  FunctionType *NewFT);
472  Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
473  void initializeCallbackFunctions(Module &M);
474  void initializeRuntimeFunctions(Module &M);
475  void injectMetadataGlobals(Module &M);
476  bool initializeModule(Module &M);
477 
478  /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
479  /// from it. Returns the origin's loaded value.
480  Value *loadNextOrigin(Instruction *Pos, Align OriginAlign,
481  Value **OriginAddr);
482 
483  /// Returns whether the given load byte size is amenable to inlined
484  /// optimization patterns.
485  bool hasLoadSizeForFastPath(uint64_t Size);
486 
487  /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
488  bool shouldTrackOrigins();
489 
490  /// Returns a zero constant with the shadow type of OrigTy.
491  ///
492  /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
493  /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
494  /// getZeroShadow(other type) = i16(0)
495  Constant *getZeroShadow(Type *OrigTy);
496  /// Returns a zero constant with the shadow type of V's type.
497  Constant *getZeroShadow(Value *V);
498 
499  /// Checks if V is a zero shadow.
500  bool isZeroShadow(Value *V);
501 
502  /// Returns the shadow type of OrigTy.
503  ///
504  /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
505  /// getShadowTy([n x T]) = [n x getShadowTy(T)]
506  /// getShadowTy(other type) = i16
507  Type *getShadowTy(Type *OrigTy);
508  /// Returns the shadow type of of V's type.
509  Type *getShadowTy(Value *V);
510 
511  const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
512 
513 public:
514  DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
515 
516  bool runImpl(Module &M);
517 };
518 
519 struct DFSanFunction {
520  DataFlowSanitizer &DFS;
521  Function *F;
522  DominatorTree DT;
523  bool IsNativeABI;
524  bool IsForceZeroLabels;
525  AllocaInst *LabelReturnAlloca = nullptr;
526  AllocaInst *OriginReturnAlloca = nullptr;
527  DenseMap<Value *, Value *> ValShadowMap;
528  DenseMap<Value *, Value *> ValOriginMap;
529  DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
530  DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap;
531 
532  struct PHIFixupElement {
533  PHINode *Phi;
534  PHINode *ShadowPhi;
535  PHINode *OriginPhi;
536  };
537  std::vector<PHIFixupElement> PHIFixups;
538 
539  DenseSet<Instruction *> SkipInsts;
540  std::vector<Value *> NonZeroChecks;
541 
542  struct CachedShadow {
543  BasicBlock *Block; // The block where Shadow is defined.
544  Value *Shadow;
545  };
546  /// Maps a value to its latest shadow value in terms of domination tree.
547  DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
548  /// Maps a value to its latest collapsed shadow value it was converted to in
549  /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
550  /// used at a post process where CFG blocks are split. So it does not cache
551  /// BasicBlock like CachedShadows, but uses domination between values.
552  DenseMap<Value *, Value *> CachedCollapsedShadows;
553  DenseMap<Value *, std::set<Value *>> ShadowElements;
554 
555  DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
556  bool IsForceZeroLabels)
557  : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
558  IsForceZeroLabels(IsForceZeroLabels) {
559  DT.recalculate(*F);
560  }
561 
562  /// Computes the shadow address for a given function argument.
563  ///
564  /// Shadow = ArgTLS+ArgOffset.
565  Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
566 
567  /// Computes the shadow address for a return value.
568  Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
569 
570  /// Computes the origin address for a given function argument.
571  ///
572  /// Origin = ArgOriginTLS[ArgNo].
573  Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
574 
575  /// Computes the origin address for a return value.
576  Value *getRetvalOriginTLS();
577 
578  Value *getOrigin(Value *V);
579  void setOrigin(Instruction *I, Value *Origin);
580  /// Generates IR to compute the origin of the last operand with a taint label.
581  Value *combineOperandOrigins(Instruction *Inst);
582  /// Before the instruction Pos, generates IR to compute the last origin with a
583  /// taint label. Labels and origins are from vectors Shadows and Origins
584  /// correspondingly. The generated IR is like
585  /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
586  /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
587  /// zeros with other bitwidths.
588  Value *combineOrigins(const std::vector<Value *> &Shadows,
589  const std::vector<Value *> &Origins, Instruction *Pos,
590  ConstantInt *Zero = nullptr);
591 
592  Value *getShadow(Value *V);
593  void setShadow(Instruction *I, Value *Shadow);
594  /// Generates IR to compute the union of the two given shadows, inserting it
595  /// before Pos. The combined value is with primitive type.
596  Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
597  /// Combines the shadow values of V1 and V2, then converts the combined value
598  /// with primitive type into a shadow value with the original type T.
599  Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
600  Instruction *Pos);
601  Value *combineOperandShadows(Instruction *Inst);
602 
603  /// Generates IR to load shadow and origin corresponding to bytes [\p
604  /// Addr, \p Addr + \p Size), where addr has alignment \p
605  /// InstAlignment, and take the union of each of those shadows. The returned
606  /// shadow always has primitive type.
607  ///
608  /// When tracking loads is enabled, the returned origin is a chain at the
609  /// current stack if the returned shadow is tainted.
610  std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
611  Align InstAlignment,
612  Instruction *Pos);
613 
614  void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
615  Align InstAlignment, Value *PrimitiveShadow,
616  Value *Origin, Instruction *Pos);
617  /// Applies PrimitiveShadow to all primitive subtypes of T, returning
618  /// the expanded shadow value.
619  ///
620  /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
621  /// EFP([n x T], PS) = [n x EFP(T,PS)]
622  /// EFP(other types, PS) = PS
623  Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
624  Instruction *Pos);
625  /// Collapses Shadow into a single primitive shadow value, unioning all
626  /// primitive shadow values in the process. Returns the final primitive
627  /// shadow value.
628  ///
629  /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
630  /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
631  /// CTP(other types, PS) = PS
632  Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos);
633 
634  void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
635  Instruction *Pos);
636 
637  Align getShadowAlign(Align InstAlignment);
638 
639 private:
640  /// Collapses the shadow with aggregate type into a single primitive shadow
641  /// value.
642  template <class AggregateType>
643  Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
644  IRBuilder<> &IRB);
645 
646  Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
647 
648  /// Returns the shadow value of an argument A.
649  Value *getShadowForTLSArgument(Argument *A);
650 
651  /// The fast path of loading shadows.
652  std::pair<Value *, Value *>
653  loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
654  Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
655  Instruction *Pos);
656 
657  Align getOriginAlign(Align InstAlignment);
658 
659  /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
660  /// is __dfsan_load_label_and_origin. This function returns the union of all
661  /// labels and the origin of the first taint label. However this is an
662  /// additional call with many instructions. To ensure common cases are fast,
663  /// checks if it is possible to load labels and origins without using the
664  /// callback function.
665  ///
666  /// When enabling tracking load instructions, we always use
667  /// __dfsan_load_label_and_origin to reduce code size.
668  bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
669 
670  /// Returns a chain at the current stack with previous origin V.
671  Value *updateOrigin(Value *V, IRBuilder<> &IRB);
672 
673  /// Returns a chain at the current stack with previous origin V if Shadow is
674  /// tainted.
675  Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
676 
677  /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
678  /// Origin otherwise.
679  Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
680 
681  /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
682  /// Size).
683  void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
684  uint64_t StoreOriginSize, Align Alignment);
685 
686  /// Stores Origin in terms of its Shadow value.
687  /// * Do not write origins for zero shadows because we do not trace origins
688  /// for untainted sinks.
689  /// * Use __dfsan_maybe_store_origin if there are too many origin store
690  /// instrumentations.
691  void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow,
692  Value *Origin, Value *StoreOriginAddr, Align InstAlignment);
693 
694  /// Convert a scalar value to an i1 by comparing with 0.
695  Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
696 
697  bool shouldInstrumentWithCall();
698 
699  /// Generates IR to load shadow and origin corresponding to bytes [\p
700  /// Addr, \p Addr + \p Size), where addr has alignment \p
701  /// InstAlignment, and take the union of each of those shadows. The returned
702  /// shadow always has primitive type.
703  std::pair<Value *, Value *>
704  loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
705  Align InstAlignment, Instruction *Pos);
706  int NumOriginStores = 0;
707 };
708 
709 class DFSanVisitor : public InstVisitor<DFSanVisitor> {
710 public:
711  DFSanFunction &DFSF;
712 
713  DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
714 
715  const DataLayout &getDataLayout() const {
716  return DFSF.F->getParent()->getDataLayout();
717  }
718 
719  // Combines shadow values and origins for all of I's operands.
720  void visitInstOperands(Instruction &I);
721 
722  void visitUnaryOperator(UnaryOperator &UO);
723  void visitBinaryOperator(BinaryOperator &BO);
724  void visitBitCastInst(BitCastInst &BCI);
725  void visitCastInst(CastInst &CI);
726  void visitCmpInst(CmpInst &CI);
727  void visitLandingPadInst(LandingPadInst &LPI);
728  void visitGetElementPtrInst(GetElementPtrInst &GEPI);
729  void visitLoadInst(LoadInst &LI);
730  void visitStoreInst(StoreInst &SI);
731  void visitAtomicRMWInst(AtomicRMWInst &I);
732  void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
733  void visitReturnInst(ReturnInst &RI);
734  void visitCallBase(CallBase &CB);
735  void visitPHINode(PHINode &PN);
736  void visitExtractElementInst(ExtractElementInst &I);
737  void visitInsertElementInst(InsertElementInst &I);
738  void visitShuffleVectorInst(ShuffleVectorInst &I);
739  void visitExtractValueInst(ExtractValueInst &I);
740  void visitInsertValueInst(InsertValueInst &I);
741  void visitAllocaInst(AllocaInst &I);
742  void visitSelectInst(SelectInst &I);
743  void visitMemSetInst(MemSetInst &I);
744  void visitMemTransferInst(MemTransferInst &I);
745 
746 private:
747  void visitCASOrRMW(Align InstAlignment, Instruction &I);
748 
749  // Returns false when this is an invoke of a custom function.
750  bool visitWrappedCallBase(Function &F, CallBase &CB);
751 
752  // Combines origins for all of I's operands.
753  void visitInstOperandOrigins(Instruction &I);
754 
755  void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
756  IRBuilder<> &IRB);
757 
758  void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
759  IRBuilder<> &IRB);
760 };
761 
762 } // end anonymous namespace
763 
764 DataFlowSanitizer::DataFlowSanitizer(
765  const std::vector<std::string> &ABIListFiles) {
766  std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
767  llvm::append_range(AllABIListFiles, ClABIListFiles);
768  // FIXME: should we propagate vfs::FileSystem to this constructor?
769  ABIList.set(
771 }
772 
773 FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
774  assert(!T->isVarArg());
775  SmallVector<Type *, 4> ArgTypes;
776  ArgTypes.push_back(T->getPointerTo());
777  ArgTypes.append(T->param_begin(), T->param_end());
778  ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
779  Type *RetType = T->getReturnType();
780  if (!RetType->isVoidTy())
781  ArgTypes.push_back(PrimitiveShadowPtrTy);
782 
783  if (shouldTrackOrigins()) {
784  ArgTypes.append(T->getNumParams(), OriginTy);
785  if (!RetType->isVoidTy())
786  ArgTypes.push_back(OriginPtrTy);
787  }
788 
789  return FunctionType::get(T->getReturnType(), ArgTypes, false);
790 }
791 
792 TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
793  SmallVector<Type *, 4> ArgTypes;
794 
795  // Some parameters of the custom function being constructed are
796  // parameters of T. Record the mapping from parameters of T to
797  // parameters of the custom function, so that parameter attributes
798  // at call sites can be updated.
799  std::vector<unsigned> ArgumentIndexMapping;
800  for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
801  Type *ParamType = T->getParamType(I);
802  FunctionType *FT;
803  if (isa<PointerType>(ParamType) &&
804  (FT = dyn_cast<FunctionType>(ParamType->getPointerElementType()))) {
805  ArgumentIndexMapping.push_back(ArgTypes.size());
806  ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
807  ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
808  } else {
809  ArgumentIndexMapping.push_back(ArgTypes.size());
810  ArgTypes.push_back(ParamType);
811  }
812  }
813  for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
814  ArgTypes.push_back(PrimitiveShadowTy);
815  if (T->isVarArg())
816  ArgTypes.push_back(PrimitiveShadowPtrTy);
817  Type *RetType = T->getReturnType();
818  if (!RetType->isVoidTy())
819  ArgTypes.push_back(PrimitiveShadowPtrTy);
820 
821  if (shouldTrackOrigins()) {
822  for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
823  ArgTypes.push_back(OriginTy);
824  if (T->isVarArg())
825  ArgTypes.push_back(OriginPtrTy);
826  if (!RetType->isVoidTy())
827  ArgTypes.push_back(OriginPtrTy);
828  }
829 
830  return TransformedFunction(
831  T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
832  ArgumentIndexMapping);
833 }
834 
835 bool DataFlowSanitizer::isZeroShadow(Value *V) {
836  Type *T = V->getType();
837  if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
838  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
839  return CI->isZero();
840  return false;
841  }
842 
843  return isa<ConstantAggregateZero>(V);
844 }
845 
846 bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
847  uint64_t ShadowSize = Size * ShadowWidthBytes;
848  return ShadowSize % 8 == 0 || ShadowSize == 4;
849 }
850 
851 bool DataFlowSanitizer::shouldTrackOrigins() {
852  static const bool ShouldTrackOrigins = ClTrackOrigins;
853  return ShouldTrackOrigins;
854 }
855 
856 Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
857  if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
858  return ZeroPrimitiveShadow;
859  Type *ShadowTy = getShadowTy(OrigTy);
860  return ConstantAggregateZero::get(ShadowTy);
861 }
862 
863 Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
864  return getZeroShadow(V->getType());
865 }
866 
868  Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
869  Value *PrimitiveShadow, IRBuilder<> &IRB) {
870  if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
871  return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
872 
873  if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
874  for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
875  Indices.push_back(Idx);
877  Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
878  Indices.pop_back();
879  }
880  return Shadow;
881  }
882 
883  if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
884  for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
885  Indices.push_back(Idx);
887  Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
888  Indices.pop_back();
889  }
890  return Shadow;
891  }
892  llvm_unreachable("Unexpected shadow type");
893 }
894 
895 bool DFSanFunction::shouldInstrumentWithCall() {
896  return ClInstrumentWithCallThreshold >= 0 &&
897  NumOriginStores >= ClInstrumentWithCallThreshold;
898 }
899 
900 Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
901  Instruction *Pos) {
902  Type *ShadowTy = DFS.getShadowTy(T);
903 
904  if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
905  return PrimitiveShadow;
906 
907  if (DFS.isZeroShadow(PrimitiveShadow))
908  return DFS.getZeroShadow(ShadowTy);
909 
910  IRBuilder<> IRB(Pos);
911  SmallVector<unsigned, 4> Indices;
912  Value *Shadow = UndefValue::get(ShadowTy);
913  Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
914  PrimitiveShadow, IRB);
915 
916  // Caches the primitive shadow value that built the shadow value.
917  CachedCollapsedShadows[Shadow] = PrimitiveShadow;
918  return Shadow;
919 }
920 
921 template <class AggregateType>
922 Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
923  IRBuilder<> &IRB) {
924  if (!AT->getNumElements())
925  return DFS.ZeroPrimitiveShadow;
926 
927  Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
928  Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
929 
930  for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
931  Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
932  Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
933  Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
934  }
935  return Aggregator;
936 }
937 
938 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
939  IRBuilder<> &IRB) {
940  Type *ShadowTy = Shadow->getType();
941  if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
942  return Shadow;
943  if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
944  return collapseAggregateShadow<>(AT, Shadow, IRB);
945  if (StructType *ST = dyn_cast<StructType>(ShadowTy))
946  return collapseAggregateShadow<>(ST, Shadow, IRB);
947  llvm_unreachable("Unexpected shadow type");
948 }
949 
950 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
951  Instruction *Pos) {
952  Type *ShadowTy = Shadow->getType();
953  if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
954  return Shadow;
955 
956  // Checks if the cached collapsed shadow value dominates Pos.
957  Value *&CS = CachedCollapsedShadows[Shadow];
958  if (CS && DT.dominates(CS, Pos))
959  return CS;
960 
961  IRBuilder<> IRB(Pos);
962  Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
963  // Caches the converted primitive shadow value.
964  CS = PrimitiveShadow;
965  return PrimitiveShadow;
966 }
967 
968 Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
969  if (!OrigTy->isSized())
970  return PrimitiveShadowTy;
971  if (isa<IntegerType>(OrigTy))
972  return PrimitiveShadowTy;
973  if (isa<VectorType>(OrigTy))
974  return PrimitiveShadowTy;
975  if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
976  return ArrayType::get(getShadowTy(AT->getElementType()),
977  AT->getNumElements());
978  if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
979  SmallVector<Type *, 4> Elements;
980  for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
981  Elements.push_back(getShadowTy(ST->getElementType(I)));
982  return StructType::get(*Ctx, Elements);
983  }
984  return PrimitiveShadowTy;
985 }
986 
987 Type *DataFlowSanitizer::getShadowTy(Value *V) {
988  return getShadowTy(V->getType());
989 }
990 
991 bool DataFlowSanitizer::initializeModule(Module &M) {
992  Triple TargetTriple(M.getTargetTriple());
993  const DataLayout &DL = M.getDataLayout();
994 
995  if (TargetTriple.getOS() != Triple::Linux)
996  report_fatal_error("unsupported operating system");
997  if (TargetTriple.getArch() != Triple::x86_64)
998  report_fatal_error("unsupported architecture");
999  MapParams = &Linux_X86_64_MemoryMapParams;
1000 
1001  Mod = &M;
1002  Ctx = &M.getContext();
1003  Int8Ptr = Type::getInt8PtrTy(*Ctx);
1004  OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1005  OriginPtrTy = PointerType::getUnqual(OriginTy);
1006  PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1007  PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
1008  IntptrTy = DL.getIntPtrType(*Ctx);
1009  ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1010  ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1011 
1012  Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1013  DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1014  /*isVarArg=*/false);
1015  Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1016  DFSanLoadLabelAndOriginFnTy =
1017  FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1018  /*isVarArg=*/false);
1019  DFSanUnimplementedFnTy = FunctionType::get(
1020  Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1021  Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1022  Type::getInt8PtrTy(*Ctx), IntptrTy};
1023  DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1024  DFSanSetLabelArgs, /*isVarArg=*/false);
1025  DFSanNonzeroLabelFnTy =
1026  FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
1027  DFSanVarargWrapperFnTy = FunctionType::get(
1028  Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1029  DFSanCmpCallbackFnTy =
1030  FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1031  /*isVarArg=*/false);
1032  DFSanChainOriginFnTy =
1033  FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1034  Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1035  DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1036  OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1037  Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1038  Int8Ptr, IntptrTy, OriginTy};
1039  DFSanMaybeStoreOriginFnTy = FunctionType::get(
1040  Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1041  Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1042  DFSanMemOriginTransferFnTy = FunctionType::get(
1043  Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1044  Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1045  DFSanLoadStoreCallbackFnTy =
1046  FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1047  /*isVarArg=*/false);
1048  Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1049  DFSanMemTransferCallbackFnTy =
1050  FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1051  /*isVarArg=*/false);
1052 
1053  ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1054  OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1055  return true;
1056 }
1057 
1058 bool DataFlowSanitizer::isInstrumented(const Function *F) {
1059  return !ABIList.isIn(*F, "uninstrumented");
1060 }
1061 
1062 bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1063  return !ABIList.isIn(*GA, "uninstrumented");
1064 }
1065 
1066 bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
1067  return ABIList.isIn(*F, "force_zero_labels");
1068 }
1069 
1070 DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1071  if (ABIList.isIn(*F, "functional"))
1072  return WK_Functional;
1073  if (ABIList.isIn(*F, "discard"))
1074  return WK_Discard;
1075  if (ABIList.isIn(*F, "custom"))
1076  return WK_Custom;
1077 
1078  return WK_Warning;
1079 }
1080 
1081 void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
1082  std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
1083  GV->setName(GVName + Suffix);
1084 
1085  // Try to change the name of the function in module inline asm. We only do
1086  // this for specific asm directives, currently only ".symver", to try to avoid
1087  // corrupting asm which happens to contain the symbol name as a substring.
1088  // Note that the substitution for .symver assumes that the versioned symbol
1089  // also has an instrumented name.
1090  std::string Asm = GV->getParent()->getModuleInlineAsm();
1091  std::string SearchStr = ".symver " + GVName + ",";
1092  size_t Pos = Asm.find(SearchStr);
1093  if (Pos != std::string::npos) {
1094  Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
1095  Pos = Asm.find("@");
1096 
1097  if (Pos == std::string::npos)
1098  report_fatal_error(Twine("unsupported .symver: ", Asm));
1099 
1100  Asm.replace(Pos, 1, Suffix + "@");
1102  }
1103 }
1104 
1105 Function *
1106 DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1107  GlobalValue::LinkageTypes NewFLink,
1108  FunctionType *NewFT) {
1109  FunctionType *FT = F->getFunctionType();
1110  Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1111  NewFName, F->getParent());
1112  NewF->copyAttributesFrom(F);
1113  NewF->removeRetAttrs(
1115 
1116  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1117  if (F->isVarArg()) {
1118  NewF->removeFnAttrs(AttrBuilder().addAttribute("split-stack"));
1119  CallInst::Create(DFSanVarargWrapperFn,
1120  IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
1121  BB);
1122  new UnreachableInst(*Ctx, BB);
1123  } else {
1124  auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1125  std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1126 
1127  CallInst *CI = CallInst::Create(F, Args, "", BB);
1128  if (FT->getReturnType()->isVoidTy())
1129  ReturnInst::Create(*Ctx, BB);
1130  else
1131  ReturnInst::Create(*Ctx, CI, BB);
1132  }
1133 
1134  return NewF;
1135 }
1136 
1137 Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
1138  StringRef FName) {
1139  FunctionType *FTT = getTrampolineFunctionType(FT);
1140  FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
1141  Function *F = dyn_cast<Function>(C.getCallee());
1142  if (F && F->isDeclaration()) {
1143  F->setLinkage(GlobalValue::LinkOnceODRLinkage);
1144  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1145  std::vector<Value *> Args;
1146  Function::arg_iterator AI = F->arg_begin() + 1;
1147  for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
1148  Args.push_back(&*AI);
1149  CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
1150  Type *RetType = FT->getReturnType();
1151  ReturnInst *RI = RetType->isVoidTy() ? ReturnInst::Create(*Ctx, BB)
1152  : ReturnInst::Create(*Ctx, CI, BB);
1153 
1154  // F is called by a wrapped custom function with primitive shadows. So
1155  // its arguments and return value need conversion.
1156  DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true,
1157  /*ForceZeroLabels=*/false);
1158  Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI;
1159  ++ValAI;
1160  for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
1161  Value *Shadow =
1162  DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI);
1163  DFSF.ValShadowMap[&*ValAI] = Shadow;
1164  }
1165  Function::arg_iterator RetShadowAI = ShadowAI;
1166  const bool ShouldTrackOrigins = shouldTrackOrigins();
1167  if (ShouldTrackOrigins) {
1168  ValAI = F->arg_begin();
1169  ++ValAI;
1170  Function::arg_iterator OriginAI = ShadowAI;
1171  if (!RetType->isVoidTy())
1172  ++OriginAI;
1173  for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++OriginAI, --N) {
1174  DFSF.ValOriginMap[&*ValAI] = &*OriginAI;
1175  }
1176  }
1177  DFSanVisitor(DFSF).visitCallInst(*CI);
1178  if (!RetType->isVoidTy()) {
1179  Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(
1180  DFSF.getShadow(RI->getReturnValue()), RI);
1181  new StoreInst(PrimitiveShadow, &*RetShadowAI, RI);
1182  if (ShouldTrackOrigins) {
1183  Value *Origin = DFSF.getOrigin(RI->getReturnValue());
1184  new StoreInst(Origin, &*std::prev(F->arg_end()), RI);
1185  }
1186  }
1187  }
1188 
1189  return cast<Constant>(C.getCallee());
1190 }
1191 
1192 // Initialize DataFlowSanitizer runtime functions and declare them in the module
1193 void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1194  {
1195  AttributeList AL;
1196  AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
1197  AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
1198  AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1199  DFSanUnionLoadFn =
1200  Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1201  }
1202  {
1203  AttributeList AL;
1204  AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
1205  AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
1206  AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1207  DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1208  "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1209  }
1210  DFSanUnimplementedFn =
1211  Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1212  {
1213  AttributeList AL;
1214  AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1215  AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1216  DFSanSetLabelFn =
1217  Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1218  }
1219  DFSanNonzeroLabelFn =
1220  Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1221  DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1222  DFSanVarargWrapperFnTy);
1223  {
1224  AttributeList AL;
1225  AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1226  AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1227  DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1228  DFSanChainOriginFnTy, AL);
1229  }
1230  {
1231  AttributeList AL;
1232  AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1233  AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1234  AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1235  DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1236  "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1237  }
1238  DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1239  "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1240 
1241  {
1242  AttributeList AL;
1243  AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1244  AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1245  DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1246  "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1247  }
1248 
1249  DFSanRuntimeFunctions.insert(
1250  DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1251  DFSanRuntimeFunctions.insert(
1252  DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1253  DFSanRuntimeFunctions.insert(
1254  DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1255  DFSanRuntimeFunctions.insert(
1256  DFSanSetLabelFn.getCallee()->stripPointerCasts());
1257  DFSanRuntimeFunctions.insert(
1258  DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1259  DFSanRuntimeFunctions.insert(
1260  DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1261  DFSanRuntimeFunctions.insert(
1262  DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1263  DFSanRuntimeFunctions.insert(
1264  DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1265  DFSanRuntimeFunctions.insert(
1266  DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1267  DFSanRuntimeFunctions.insert(
1268  DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1269  DFSanRuntimeFunctions.insert(
1270  DFSanChainOriginFn.getCallee()->stripPointerCasts());
1271  DFSanRuntimeFunctions.insert(
1272  DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1273  DFSanRuntimeFunctions.insert(
1274  DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1275  DFSanRuntimeFunctions.insert(
1276  DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1277 }
1278 
1279 // Initializes event callback functions and declare them in the module
1280 void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1281  DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
1282  DFSanLoadStoreCallbackFnTy);
1283  DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback",
1284  DFSanLoadStoreCallbackFnTy);
1285  DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1286  "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1287  DFSanCmpCallbackFn =
1288  Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
1289 }
1290 
1291 void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
1292  // These variables can be used:
1293  // - by the runtime (to discover what the shadow width was, during
1294  // compilation)
1295  // - in testing (to avoid hardcoding the shadow width and type but instead
1296  // extract them by pattern matching)
1297  Type *IntTy = Type::getInt32Ty(*Ctx);
1298  (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bits", IntTy, [&] {
1299  return new GlobalVariable(
1300  M, IntTy, /*isConstant=*/true, GlobalValue::WeakODRLinkage,
1301  ConstantInt::get(IntTy, ShadowWidthBits), "__dfsan_shadow_width_bits");
1302  });
1303  (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bytes", IntTy, [&] {
1304  return new GlobalVariable(M, IntTy, /*isConstant=*/true,
1306  ConstantInt::get(IntTy, ShadowWidthBytes),
1307  "__dfsan_shadow_width_bytes");
1308  });
1309 }
1310 
1312  initializeModule(M);
1313 
1314  if (ABIList.isIn(M, "skip"))
1315  return false;
1316 
1317  const unsigned InitialGlobalSize = M.global_size();
1318  const unsigned InitialModuleSize = M.size();
1319 
1320  bool Changed = false;
1321 
1322  auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1323  Type *Ty) -> Constant * {
1324  Constant *C = Mod->getOrInsertGlobal(Name, Ty);
1325  if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
1326  Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1327  G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1328  }
1329  return C;
1330  };
1331 
1332  // These globals must be kept in sync with the ones in dfsan.cpp.
1333  ArgTLS =
1334  GetOrInsertGlobal("__dfsan_arg_tls",
1335  ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1336  RetvalTLS = GetOrInsertGlobal(
1337  "__dfsan_retval_tls",
1338  ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1339  ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1340  ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1341  RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1342 
1343  (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1344  Changed = true;
1345  return new GlobalVariable(
1346  M, OriginTy, true, GlobalValue::WeakODRLinkage,
1347  ConstantInt::getSigned(OriginTy,
1348  shouldTrackOrigins() ? ClTrackOrigins : 0),
1349  "__dfsan_track_origins");
1350  });
1351 
1352  injectMetadataGlobals(M);
1353 
1354  initializeCallbackFunctions(M);
1355  initializeRuntimeFunctions(M);
1356 
1357  std::vector<Function *> FnsToInstrument;
1358  SmallPtrSet<Function *, 2> FnsWithNativeABI;
1359  SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
1360  for (Function &F : M)
1361  if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F))
1362  FnsToInstrument.push_back(&F);
1363 
1364  // Give function aliases prefixes when necessary, and build wrappers where the
1365  // instrumentedness is inconsistent.
1366  for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
1367  // Don't stop on weak. We assume people aren't playing games with the
1368  // instrumentedness of overridden weak aliases.
1369  auto *F = dyn_cast<Function>(GA.getAliaseeObject());
1370  if (!F)
1371  continue;
1372 
1373  bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
1374  if (GAInst && FInst) {
1375  addGlobalNameSuffix(&GA);
1376  } else if (GAInst != FInst) {
1377  // Non-instrumented alias of an instrumented function, or vice versa.
1378  // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1379  // below will take care of instrumenting it.
1380  Function *NewF =
1381  buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
1382  GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType()));
1383  NewF->takeName(&GA);
1384  GA.eraseFromParent();
1385  FnsToInstrument.push_back(NewF);
1386  }
1387  }
1388 
1389  ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
1390  .addAttribute(Attribute::ReadNone);
1391 
1392  // First, change the ABI of every function in the module. ABI-listed
1393  // functions keep their original ABI and get a wrapper function.
1394  for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1395  FE = FnsToInstrument.end();
1396  FI != FE; ++FI) {
1397  Function &F = **FI;
1398  FunctionType *FT = F.getFunctionType();
1399 
1400  bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1401  FT->getReturnType()->isVoidTy());
1402 
1403  if (isInstrumented(&F)) {
1404  if (isForceZeroLabels(&F))
1405  FnsWithForceZeroLabel.insert(&F);
1406 
1407  // Instrumented functions get a '.dfsan' suffix. This allows us to more
1408  // easily identify cases of mismatching ABIs. This naming scheme is
1409  // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1410  addGlobalNameSuffix(&F);
1411  } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1412  // Build a wrapper function for F. The wrapper simply calls F, and is
1413  // added to FnsToInstrument so that any instrumentation according to its
1414  // WrapperKind is done in the second pass below.
1415 
1416  // If the function being wrapped has local linkage, then preserve the
1417  // function's linkage in the wrapper function.
1418  GlobalValue::LinkageTypes WrapperLinkage =
1419  F.hasLocalLinkage() ? F.getLinkage()
1420  : GlobalValue::LinkOnceODRLinkage;
1421 
1422  Function *NewF = buildWrapperFunction(
1423  &F,
1424  (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1425  std::string(F.getName()),
1426  WrapperLinkage, FT);
1427  NewF->removeFnAttrs(ReadOnlyNoneAttrs);
1428 
1429  Value *WrappedFnCst =
1430  ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
1431  F.replaceAllUsesWith(WrappedFnCst);
1432 
1433  UnwrappedFnMap[WrappedFnCst] = &F;
1434  *FI = NewF;
1435 
1436  if (!F.isDeclaration()) {
1437  // This function is probably defining an interposition of an
1438  // uninstrumented function and hence needs to keep the original ABI.
1439  // But any functions it may call need to use the instrumented ABI, so
1440  // we instrument it in a mode which preserves the original ABI.
1441  FnsWithNativeABI.insert(&F);
1442 
1443  // This code needs to rebuild the iterators, as they may be invalidated
1444  // by the push_back, taking care that the new range does not include
1445  // any functions added by this code.
1446  size_t N = FI - FnsToInstrument.begin(),
1447  Count = FE - FnsToInstrument.begin();
1448  FnsToInstrument.push_back(&F);
1449  FI = FnsToInstrument.begin() + N;
1450  FE = FnsToInstrument.begin() + Count;
1451  }
1452  // Hopefully, nobody will try to indirectly call a vararg
1453  // function... yet.
1454  } else if (FT->isVarArg()) {
1455  UnwrappedFnMap[&F] = &F;
1456  *FI = nullptr;
1457  }
1458  }
1459 
1460  for (Function *F : FnsToInstrument) {
1461  if (!F || F->isDeclaration())
1462  continue;
1463 
1465 
1466  DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
1467  FnsWithForceZeroLabel.count(F));
1468 
1469  // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1470  // Build a copy of the list before iterating over it.
1471  SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1472 
1473  for (BasicBlock *BB : BBList) {
1474  Instruction *Inst = &BB->front();
1475  while (true) {
1476  // DFSanVisitor may split the current basic block, changing the current
1477  // instruction's next pointer and moving the next instruction to the
1478  // tail block from which we should continue.
1479  Instruction *Next = Inst->getNextNode();
1480  // DFSanVisitor may delete Inst, so keep track of whether it was a
1481  // terminator.
1482  bool IsTerminator = Inst->isTerminator();
1483  if (!DFSF.SkipInsts.count(Inst))
1484  DFSanVisitor(DFSF).visit(Inst);
1485  if (IsTerminator)
1486  break;
1487  Inst = Next;
1488  }
1489  }
1490 
1491  // We will not necessarily be able to compute the shadow for every phi node
1492  // until we have visited every block. Therefore, the code that handles phi
1493  // nodes adds them to the PHIFixups list so that they can be properly
1494  // handled here.
1495  for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1496  for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1497  ++Val) {
1498  P.ShadowPhi->setIncomingValue(
1499  Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1500  if (P.OriginPhi)
1501  P.OriginPhi->setIncomingValue(
1502  Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1503  }
1504  }
1505 
1506  // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1507  // places (i.e. instructions in basic blocks we haven't even begun visiting
1508  // yet). To make our life easier, do this work in a pass after the main
1509  // instrumentation.
1510  if (ClDebugNonzeroLabels) {
1511  for (Value *V : DFSF.NonZeroChecks) {
1512  Instruction *Pos;
1513  if (Instruction *I = dyn_cast<Instruction>(V))
1514  Pos = I->getNextNode();
1515  else
1516  Pos = &DFSF.F->getEntryBlock().front();
1517  while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1518  Pos = Pos->getNextNode();
1519  IRBuilder<> IRB(Pos);
1520  Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1521  Value *Ne =
1522  IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1523  BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1524  Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1525  IRBuilder<> ThenIRB(BI);
1526  ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1527  }
1528  }
1529  }
1530 
1531  return Changed || !FnsToInstrument.empty() ||
1532  M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1533 }
1534 
1535 Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1536  Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
1537  if (ArgOffset)
1538  Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
1539  return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
1540  "_dfsarg");
1541 }
1542 
1543 Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1544  return IRB.CreatePointerCast(
1545  DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
1546 }
1547 
1548 Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1549 
1550 Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1551  return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo,
1552  "_dfsarg_o");
1553 }
1554 
1555 Value *DFSanFunction::getOrigin(Value *V) {
1556  assert(DFS.shouldTrackOrigins());
1557  if (!isa<Argument>(V) && !isa<Instruction>(V))
1558  return DFS.ZeroOrigin;
1559  Value *&Origin = ValOriginMap[V];
1560  if (!Origin) {
1561  if (Argument *A = dyn_cast<Argument>(V)) {
1562  if (IsNativeABI)
1563  return DFS.ZeroOrigin;
1564  if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1565  Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1566  IRBuilder<> IRB(ArgOriginTLSPos);
1567  Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1568  Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1569  } else {
1570  // Overflow
1571  Origin = DFS.ZeroOrigin;
1572  }
1573  } else {
1574  Origin = DFS.ZeroOrigin;
1575  }
1576  }
1577  return Origin;
1578 }
1579 
1580 void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1581  if (!DFS.shouldTrackOrigins())
1582  return;
1583  assert(!ValOriginMap.count(I));
1584  assert(Origin->getType() == DFS.OriginTy);
1585  ValOriginMap[I] = Origin;
1586 }
1587 
1588 Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1589  unsigned ArgOffset = 0;
1590  const DataLayout &DL = F->getParent()->getDataLayout();
1591  for (auto &FArg : F->args()) {
1592  if (!FArg.getType()->isSized()) {
1593  if (A == &FArg)
1594  break;
1595  continue;
1596  }
1597 
1598  unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1599  if (A != &FArg) {
1600  ArgOffset += alignTo(Size, ShadowTLSAlignment);
1601  if (ArgOffset > ArgTLSSize)
1602  break; // ArgTLS overflows, uses a zero shadow.
1603  continue;
1604  }
1605 
1606  if (ArgOffset + Size > ArgTLSSize)
1607  break; // ArgTLS overflows, uses a zero shadow.
1608 
1609  Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1610  IRBuilder<> IRB(ArgTLSPos);
1611  Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1612  return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1614  }
1615 
1616  return DFS.getZeroShadow(A);
1617 }
1618 
1619 Value *DFSanFunction::getShadow(Value *V) {
1620  if (!isa<Argument>(V) && !isa<Instruction>(V))
1621  return DFS.getZeroShadow(V);
1622  if (IsForceZeroLabels)
1623  return DFS.getZeroShadow(V);
1624  Value *&Shadow = ValShadowMap[V];
1625  if (!Shadow) {
1626  if (Argument *A = dyn_cast<Argument>(V)) {
1627  if (IsNativeABI)
1628  return DFS.getZeroShadow(V);
1629  Shadow = getShadowForTLSArgument(A);
1630  NonZeroChecks.push_back(Shadow);
1631  } else {
1632  Shadow = DFS.getZeroShadow(V);
1633  }
1634  }
1635  return Shadow;
1636 }
1637 
1638 void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1639  assert(!ValShadowMap.count(I));
1640  ValShadowMap[I] = Shadow;
1641 }
1642 
1643 /// Compute the integer shadow offset that corresponds to a given
1644 /// application address.
1645 ///
1646 /// Offset = (Addr & ~AndMask) ^ XorMask
1647 Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1648  assert(Addr != RetvalTLS && "Reinstrumenting?");
1649  Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1650 
1651  uint64_t AndMask = MapParams->AndMask;
1652  if (AndMask)
1653  OffsetLong =
1654  IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
1655 
1656  uint64_t XorMask = MapParams->XorMask;
1657  if (XorMask)
1658  OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
1659  return OffsetLong;
1660 }
1661 
1662 std::pair<Value *, Value *>
1663 DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1664  Instruction *Pos) {
1665  // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1666  IRBuilder<> IRB(Pos);
1667  Value *ShadowOffset = getShadowOffset(Addr, IRB);
1668  Value *ShadowLong = ShadowOffset;
1669  uint64_t ShadowBase = MapParams->ShadowBase;
1670  if (ShadowBase != 0) {
1671  ShadowLong =
1672  IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
1673  }
1674  IntegerType *ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1675  Value *ShadowPtr =
1676  IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1677  Value *OriginPtr = nullptr;
1678  if (shouldTrackOrigins()) {
1679  Value *OriginLong = ShadowOffset;
1680  uint64_t OriginBase = MapParams->OriginBase;
1681  if (OriginBase != 0)
1682  OriginLong =
1683  IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
1684  const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1685  // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1686  // So Mask is unnecessary.
1687  if (Alignment < MinOriginAlignment) {
1689  OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1690  }
1691  OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1692  }
1693  return std::make_pair(ShadowPtr, OriginPtr);
1694 }
1695 
1696 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos,
1697  Value *ShadowOffset) {
1698  IRBuilder<> IRB(Pos);
1699  return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1700 }
1701 
1702 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
1703  IRBuilder<> IRB(Pos);
1704  Value *ShadowOffset = getShadowOffset(Addr, IRB);
1705  return getShadowAddress(Addr, Pos, ShadowOffset);
1706 }
1707 
1708 Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1709  Instruction *Pos) {
1710  Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1711  return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1712 }
1713 
1714 // Generates IR to compute the union of the two given shadows, inserting it
1715 // before Pos. The combined value is with primitive type.
1716 Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
1717  if (DFS.isZeroShadow(V1))
1718  return collapseToPrimitiveShadow(V2, Pos);
1719  if (DFS.isZeroShadow(V2))
1720  return collapseToPrimitiveShadow(V1, Pos);
1721  if (V1 == V2)
1722  return collapseToPrimitiveShadow(V1, Pos);
1723 
1724  auto V1Elems = ShadowElements.find(V1);
1725  auto V2Elems = ShadowElements.find(V2);
1726  if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1727  if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1728  V2Elems->second.begin(), V2Elems->second.end())) {
1729  return collapseToPrimitiveShadow(V1, Pos);
1730  }
1731  if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1732  V1Elems->second.begin(), V1Elems->second.end())) {
1733  return collapseToPrimitiveShadow(V2, Pos);
1734  }
1735  } else if (V1Elems != ShadowElements.end()) {
1736  if (V1Elems->second.count(V2))
1737  return collapseToPrimitiveShadow(V1, Pos);
1738  } else if (V2Elems != ShadowElements.end()) {
1739  if (V2Elems->second.count(V1))
1740  return collapseToPrimitiveShadow(V2, Pos);
1741  }
1742 
1743  auto Key = std::make_pair(V1, V2);
1744  if (V1 > V2)
1745  std::swap(Key.first, Key.second);
1746  CachedShadow &CCS = CachedShadows[Key];
1747  if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
1748  return CCS.Shadow;
1749 
1750  // Converts inputs shadows to shadows with primitive types.
1751  Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
1752  Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
1753 
1754  IRBuilder<> IRB(Pos);
1755  CCS.Block = Pos->getParent();
1756  CCS.Shadow = IRB.CreateOr(PV1, PV2);
1757 
1758  std::set<Value *> UnionElems;
1759  if (V1Elems != ShadowElements.end()) {
1760  UnionElems = V1Elems->second;
1761  } else {
1762  UnionElems.insert(V1);
1763  }
1764  if (V2Elems != ShadowElements.end()) {
1765  UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
1766  } else {
1767  UnionElems.insert(V2);
1768  }
1769  ShadowElements[CCS.Shadow] = std::move(UnionElems);
1770 
1771  return CCS.Shadow;
1772 }
1773 
1774 // A convenience function which folds the shadows of each of the operands
1775 // of the provided instruction Inst, inserting the IR before Inst. Returns
1776 // the computed union Value.
1777 Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
1778  if (Inst->getNumOperands() == 0)
1779  return DFS.getZeroShadow(Inst);
1780 
1781  Value *Shadow = getShadow(Inst->getOperand(0));
1782  for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
1783  Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)), Inst);
1784 
1785  return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
1786 }
1787 
1788 void DFSanVisitor::visitInstOperands(Instruction &I) {
1789  Value *CombinedShadow = DFSF.combineOperandShadows(&I);
1790  DFSF.setShadow(&I, CombinedShadow);
1791  visitInstOperandOrigins(I);
1792 }
1793 
1794 Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
1795  const std::vector<Value *> &Origins,
1796  Instruction *Pos, ConstantInt *Zero) {
1797  assert(Shadows.size() == Origins.size());
1798  size_t Size = Origins.size();
1799  if (Size == 0)
1800  return DFS.ZeroOrigin;
1801  Value *Origin = nullptr;
1802  if (!Zero)
1803  Zero = DFS.ZeroPrimitiveShadow;
1804  for (size_t I = 0; I != Size; ++I) {
1805  Value *OpOrigin = Origins[I];
1806  Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
1807  if (ConstOpOrigin && ConstOpOrigin->isNullValue())
1808  continue;
1809  if (!Origin) {
1810  Origin = OpOrigin;
1811  continue;
1812  }
1813  Value *OpShadow = Shadows[I];
1814  Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
1815  IRBuilder<> IRB(Pos);
1816  Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
1817  Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
1818  }
1819  return Origin ? Origin : DFS.ZeroOrigin;
1820 }
1821 
1822 Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
1823  size_t Size = Inst->getNumOperands();
1824  std::vector<Value *> Shadows(Size);
1825  std::vector<Value *> Origins(Size);
1826  for (unsigned I = 0; I != Size; ++I) {
1827  Shadows[I] = getShadow(Inst->getOperand(I));
1828  Origins[I] = getOrigin(Inst->getOperand(I));
1829  }
1830  return combineOrigins(Shadows, Origins, Inst);
1831 }
1832 
1833 void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
1834  if (!DFSF.DFS.shouldTrackOrigins())
1835  return;
1836  Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
1837  DFSF.setOrigin(&I, CombinedOrigin);
1838 }
1839 
1840 Align DFSanFunction::getShadowAlign(Align InstAlignment) {
1841  const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
1842  return Align(Alignment.value() * DFS.ShadowWidthBytes);
1843 }
1844 
1845 Align DFSanFunction::getOriginAlign(Align InstAlignment) {
1846  const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1847  return Align(std::max(MinOriginAlignment, Alignment));
1848 }
1849 
1850 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
1851  Align InstAlignment) {
1852  // When enabling tracking load instructions, we always use
1853  // __dfsan_load_label_and_origin to reduce code size.
1854  if (ClTrackOrigins == 2)
1855  return true;
1856 
1857  assert(Size != 0);
1858  // * if Size == 1, it is sufficient to load its origin aligned at 4.
1859  // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
1860  // load its origin aligned at 4. If not, although origins may be lost, it
1861  // should not happen very often.
1862  // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
1863  // Size % 4 == 0, it is more efficient to load origins without callbacks.
1864  // * Otherwise we use __dfsan_load_label_and_origin.
1865  // This should ensure that common cases run efficiently.
1866  if (Size <= 2)
1867  return false;
1868 
1869  const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1870  return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
1871 }
1872 
1873 Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign,
1874  Value **OriginAddr) {
1875  IRBuilder<> IRB(Pos);
1876  *OriginAddr =
1877  IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
1878  return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
1879 }
1880 
1881 std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
1882  Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
1883  Align OriginAlign, Value *FirstOrigin, Instruction *Pos) {
1884  const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
1885  const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
1886 
1887  assert(Size >= 4 && "Not large enough load size for fast path!");
1888 
1889  // Used for origin tracking.
1890  std::vector<Value *> Shadows;
1891  std::vector<Value *> Origins;
1892 
1893  // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
1894  // but this function is only used in a subset of cases that make it possible
1895  // to optimize the instrumentation.
1896  //
1897  // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
1898  // per byte) is either:
1899  // - a multiple of 8 (common)
1900  // - equal to 4 (only for load32)
1901  //
1902  // For the second case, we can fit the wide shadow in a 32-bit integer. In all
1903  // other cases, we use a 64-bit integer to hold the wide shadow.
1904  Type *WideShadowTy =
1905  ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
1906 
1907  IRBuilder<> IRB(Pos);
1908  Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo());
1909  Value *CombinedWideShadow =
1910  IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
1911 
1912  unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
1913  const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
1914 
1915  auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
1916  if (BytesPerWideShadow > 4) {
1917  assert(BytesPerWideShadow == 8);
1918  // The wide shadow relates to two origin pointers: one for the first four
1919  // application bytes, and one for the latest four. We use a left shift to
1920  // get just the shadow bytes that correspond to the first origin pointer,
1921  // and then the entire shadow for the second origin pointer (which will be
1922  // chosen by combineOrigins() iff the least-significant half of the wide
1923  // shadow was empty but the other half was not).
1924  Value *WideShadowLo = IRB.CreateShl(
1925  WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
1926  Shadows.push_back(WideShadow);
1927  Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
1928 
1929  Shadows.push_back(WideShadowLo);
1930  Origins.push_back(Origin);
1931  } else {
1932  Shadows.push_back(WideShadow);
1933  Origins.push_back(Origin);
1934  }
1935  };
1936 
1937  if (ShouldTrackOrigins)
1938  AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
1939 
1940  // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
1941  // then OR individual shadows within the combined WideShadow by binary ORing.
1942  // This is fewer instructions than ORing shadows individually, since it
1943  // needs logN shift/or instructions (N being the bytes of the combined wide
1944  // shadow).
1945  for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
1946  ByteOfs += BytesPerWideShadow) {
1947  WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr,
1948  ConstantInt::get(DFS.IntptrTy, 1));
1949  Value *NextWideShadow =
1950  IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
1951  CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
1952  if (ShouldTrackOrigins) {
1953  Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
1954  AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
1955  }
1956  }
1957  for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
1958  Width >>= 1) {
1959  Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
1960  CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
1961  }
1962  return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
1963  ShouldTrackOrigins
1964  ? combineOrigins(Shadows, Origins, Pos,
1965  ConstantInt::getSigned(IRB.getInt64Ty(), 0))
1966  : DFS.ZeroOrigin};
1967 }
1968 
1969 std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
1970  Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
1971  const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
1972 
1973  // Non-escaped loads.
1974  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
1975  const auto SI = AllocaShadowMap.find(AI);
1976  if (SI != AllocaShadowMap.end()) {
1977  IRBuilder<> IRB(Pos);
1978  Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
1979  const auto OI = AllocaOriginMap.find(AI);
1980  assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
1981  return {ShadowLI, ShouldTrackOrigins
1982  ? IRB.CreateLoad(DFS.OriginTy, OI->second)
1983  : nullptr};
1984  }
1985  }
1986 
1987  // Load from constant addresses.
1989  getUnderlyingObjects(Addr, Objs);
1990  bool AllConstants = true;
1991  for (const Value *Obj : Objs) {
1992  if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
1993  continue;
1994  if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
1995  continue;
1996 
1997  AllConstants = false;
1998  break;
1999  }
2000  if (AllConstants)
2001  return {DFS.ZeroPrimitiveShadow,
2002  ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2003 
2004  if (Size == 0)
2005  return {DFS.ZeroPrimitiveShadow,
2006  ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2007 
2008  // Use callback to load if this is not an optimizable case for origin
2009  // tracking.
2010  if (ShouldTrackOrigins &&
2011  useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
2012  IRBuilder<> IRB(Pos);
2013  CallInst *Call =
2014  IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2015  {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2016  ConstantInt::get(DFS.IntptrTy, Size)});
2017  Call->addRetAttr(Attribute::ZExt);
2018  return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2019  DFS.PrimitiveShadowTy),
2020  IRB.CreateTrunc(Call, DFS.OriginTy)};
2021  }
2022 
2023  // Other cases that support loading shadows or origins in a fast way.
2024  Value *ShadowAddr, *OriginAddr;
2025  std::tie(ShadowAddr, OriginAddr) =
2026  DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2027 
2028  const Align ShadowAlign = getShadowAlign(InstAlignment);
2029  const Align OriginAlign = getOriginAlign(InstAlignment);
2030  Value *Origin = nullptr;
2031  if (ShouldTrackOrigins) {
2032  IRBuilder<> IRB(Pos);
2033  Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2034  }
2035 
2036  // When the byte size is small enough, we can load the shadow directly with
2037  // just a few instructions.
2038  switch (Size) {
2039  case 1: {
2040  LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2041  LI->setAlignment(ShadowAlign);
2042  return {LI, Origin};
2043  }
2044  case 2: {
2045  IRBuilder<> IRB(Pos);
2046  Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2047  ConstantInt::get(DFS.IntptrTy, 1));
2048  Value *Load =
2049  IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2050  Value *Load1 =
2051  IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2052  return {combineShadows(Load, Load1, Pos), Origin};
2053  }
2054  }
2055  bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2056 
2057  if (HasSizeForFastPath)
2058  return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2059  OriginAlign, Origin, Pos);
2060 
2061  IRBuilder<> IRB(Pos);
2062  CallInst *FallbackCall = IRB.CreateCall(
2063  DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2064  FallbackCall->addRetAttr(Attribute::ZExt);
2065  return {FallbackCall, Origin};
2066 }
2067 
2068 std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
2069  uint64_t Size,
2070  Align InstAlignment,
2071  Instruction *Pos) {
2072  Value *PrimitiveShadow, *Origin;
2073  std::tie(PrimitiveShadow, Origin) =
2074  loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2075  if (DFS.shouldTrackOrigins()) {
2076  if (ClTrackOrigins == 2) {
2077  IRBuilder<> IRB(Pos);
2078  auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2079  if (!ConstantShadow || !ConstantShadow->isZeroValue())
2080  Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2081  }
2082  }
2083  return {PrimitiveShadow, Origin};
2084 }
2085 
2087  switch (AO) {
2088  case AtomicOrdering::NotAtomic:
2089  return AtomicOrdering::NotAtomic;
2091  case AtomicOrdering::Monotonic:
2092  case AtomicOrdering::Acquire:
2093  return AtomicOrdering::Acquire;
2094  case AtomicOrdering::Release:
2095  case AtomicOrdering::AcquireRelease:
2096  return AtomicOrdering::AcquireRelease;
2097  case AtomicOrdering::SequentiallyConsistent:
2098  return AtomicOrdering::SequentiallyConsistent;
2099  }
2100  llvm_unreachable("Unknown ordering");
2101 }
2102 
2103 void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2104  auto &DL = LI.getModule()->getDataLayout();
2105  uint64_t Size = DL.getTypeStoreSize(LI.getType());
2106  if (Size == 0) {
2107  DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2108  DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2109  return;
2110  }
2111 
2112  // When an application load is atomic, increase atomic ordering between
2113  // atomic application loads and stores to ensure happen-before order; load
2114  // shadow data after application data; store zero shadow data before
2115  // application data. This ensure shadow loads return either labels of the
2116  // initial application data or zeros.
2117  if (LI.isAtomic())
2119 
2120  Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
2121  std::vector<Value *> Shadows;
2122  std::vector<Value *> Origins;
2123  Value *PrimitiveShadow, *Origin;
2124  std::tie(PrimitiveShadow, Origin) =
2125  DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2126  const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2127  if (ShouldTrackOrigins) {
2128  Shadows.push_back(PrimitiveShadow);
2129  Origins.push_back(Origin);
2130  }
2132  Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2133  PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2134  if (ShouldTrackOrigins) {
2135  Shadows.push_back(PtrShadow);
2136  Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2137  }
2138  }
2139  if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2140  DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2141 
2142  Value *Shadow =
2143  DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2144  DFSF.setShadow(&LI, Shadow);
2145 
2146  if (ShouldTrackOrigins) {
2147  DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2148  }
2149 
2150  if (ClEventCallbacks) {
2151  IRBuilder<> IRB(Pos);
2152  Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2153  IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
2154  }
2155 }
2156 
2157 Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2158  IRBuilder<> &IRB) {
2159  assert(DFS.shouldTrackOrigins());
2160  return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2161 }
2162 
2163 Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2164  if (!DFS.shouldTrackOrigins())
2165  return V;
2166  return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2167 }
2168 
2169 Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2170  const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2171  const DataLayout &DL = F->getParent()->getDataLayout();
2172  unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2173  if (IntptrSize == OriginSize)
2174  return Origin;
2175  assert(IntptrSize == OriginSize * 2);
2176  Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2177  return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2178 }
2179 
2180 void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2181  Value *StoreOriginAddr,
2182  uint64_t StoreOriginSize, Align Alignment) {
2183  const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2184  const DataLayout &DL = F->getParent()->getDataLayout();
2185  const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2186  unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2187  assert(IntptrAlignment >= MinOriginAlignment);
2188  assert(IntptrSize >= OriginSize);
2189 
2190  unsigned Ofs = 0;
2191  Align CurrentAlignment = Alignment;
2192  if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2193  Value *IntptrOrigin = originToIntptr(IRB, Origin);
2194  Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
2195  StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
2196  for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2197  Value *Ptr =
2198  I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2199  : IntptrStoreOriginPtr;
2200  IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2201  Ofs += IntptrSize / OriginSize;
2202  CurrentAlignment = IntptrAlignment;
2203  }
2204  }
2205 
2206  for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2207  ++I) {
2208  Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2209  : StoreOriginAddr;
2210  IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2211  CurrentAlignment = MinOriginAlignment;
2212  }
2213 }
2214 
2215 Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2216  const Twine &Name) {
2217  Type *VTy = V->getType();
2218  assert(VTy->isIntegerTy());
2219  if (VTy->getIntegerBitWidth() == 1)
2220  // Just converting a bool to a bool, so do nothing.
2221  return V;
2222  return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2223 }
2224 
2225 void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
2226  Value *Shadow, Value *Origin,
2227  Value *StoreOriginAddr, Align InstAlignment) {
2228  // Do not write origins for zero shadows because we do not trace origins for
2229  // untainted sinks.
2230  const Align OriginAlignment = getOriginAlign(InstAlignment);
2231  Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2232  IRBuilder<> IRB(Pos);
2233  if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2234  if (!ConstantShadow->isZeroValue())
2235  paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2236  OriginAlignment);
2237  return;
2238  }
2239 
2240  if (shouldInstrumentWithCall()) {
2241  IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn,
2242  {CollapsedShadow,
2243  IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2244  ConstantInt::get(DFS.IntptrTy, Size), Origin});
2245  } else {
2246  Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2248  Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT);
2249  IRBuilder<> IRBNew(CheckTerm);
2250  paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2251  OriginAlignment);
2252  ++NumOriginStores;
2253  }
2254 }
2255 
2256 void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2257  Align ShadowAlign,
2258  Instruction *Pos) {
2259  IRBuilder<> IRB(Pos);
2260  IntegerType *ShadowTy =
2261  IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2262  Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2263  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2264  Value *ExtShadowAddr =
2265  IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
2266  IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
2267  // Do not write origins for 0 shadows because we do not trace origins for
2268  // untainted sinks.
2269 }
2270 
2271 void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2272  Align InstAlignment,
2273  Value *PrimitiveShadow,
2274  Value *Origin,
2275  Instruction *Pos) {
2276  const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2277 
2278  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2279  const auto SI = AllocaShadowMap.find(AI);
2280  if (SI != AllocaShadowMap.end()) {
2281  IRBuilder<> IRB(Pos);
2282  IRB.CreateStore(PrimitiveShadow, SI->second);
2283 
2284  // Do not write origins for 0 shadows because we do not trace origins for
2285  // untainted sinks.
2286  if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2287  const auto OI = AllocaOriginMap.find(AI);
2288  assert(OI != AllocaOriginMap.end() && Origin);
2289  IRB.CreateStore(Origin, OI->second);
2290  }
2291  return;
2292  }
2293  }
2294 
2295  const Align ShadowAlign = getShadowAlign(InstAlignment);
2296  if (DFS.isZeroShadow(PrimitiveShadow)) {
2297  storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2298  return;
2299  }
2300 
2301  IRBuilder<> IRB(Pos);
2302  Value *ShadowAddr, *OriginAddr;
2303  std::tie(ShadowAddr, OriginAddr) =
2304  DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2305 
2306  const unsigned ShadowVecSize = 8;
2307  assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2308  "Shadow vector is too large!");
2309 
2310  uint64_t Offset = 0;
2311  uint64_t LeftSize = Size;
2312  if (LeftSize >= ShadowVecSize) {
2313  auto *ShadowVecTy =
2314  FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2315  Value *ShadowVec = UndefValue::get(ShadowVecTy);
2316  for (unsigned I = 0; I != ShadowVecSize; ++I) {
2317  ShadowVec = IRB.CreateInsertElement(
2318  ShadowVec, PrimitiveShadow,
2319  ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2320  }
2321  Value *ShadowVecAddr =
2322  IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
2323  do {
2324  Value *CurShadowVecAddr =
2325  IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
2326  IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2327  LeftSize -= ShadowVecSize;
2328  ++Offset;
2329  } while (LeftSize >= ShadowVecSize);
2330  Offset *= ShadowVecSize;
2331  }
2332  while (LeftSize > 0) {
2333  Value *CurShadowAddr =
2334  IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2335  IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2336  --LeftSize;
2337  ++Offset;
2338  }
2339 
2340  if (ShouldTrackOrigins) {
2341  storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2342  InstAlignment);
2343  }
2344 }
2345 
2347  switch (AO) {
2348  case AtomicOrdering::NotAtomic:
2349  return AtomicOrdering::NotAtomic;
2351  case AtomicOrdering::Monotonic:
2352  case AtomicOrdering::Release:
2353  return AtomicOrdering::Release;
2354  case AtomicOrdering::Acquire:
2355  case AtomicOrdering::AcquireRelease:
2356  return AtomicOrdering::AcquireRelease;
2357  case AtomicOrdering::SequentiallyConsistent:
2358  return AtomicOrdering::SequentiallyConsistent;
2359  }
2360  llvm_unreachable("Unknown ordering");
2361 }
2362 
2363 void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2364  auto &DL = SI.getModule()->getDataLayout();
2365  Value *Val = SI.getValueOperand();
2366  uint64_t Size = DL.getTypeStoreSize(Val->getType());
2367  if (Size == 0)
2368  return;
2369 
2370  // When an application store is atomic, increase atomic ordering between
2371  // atomic application loads and stores to ensure happen-before order; load
2372  // shadow data after application data; store zero shadow data before
2373  // application data. This ensure shadow loads return either labels of the
2374  // initial application data or zeros.
2375  if (SI.isAtomic())
2376  SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2377 
2378  const bool ShouldTrackOrigins =
2379  DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2380  std::vector<Value *> Shadows;
2381  std::vector<Value *> Origins;
2382 
2383  Value *Shadow =
2384  SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2385 
2386  if (ShouldTrackOrigins) {
2387  Shadows.push_back(Shadow);
2388  Origins.push_back(DFSF.getOrigin(Val));
2389  }
2390 
2391  Value *PrimitiveShadow;
2393  Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2394  if (ShouldTrackOrigins) {
2395  Shadows.push_back(PtrShadow);
2396  Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2397  }
2398  PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
2399  } else {
2400  PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
2401  }
2402  Value *Origin = nullptr;
2403  if (ShouldTrackOrigins)
2404  Origin = DFSF.combineOrigins(Shadows, Origins, &SI);
2405  DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2406  PrimitiveShadow, Origin, &SI);
2407  if (ClEventCallbacks) {
2408  IRBuilder<> IRB(&SI);
2409  Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2410  IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
2411  }
2412 }
2413 
2414 void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2415  assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2416 
2417  Value *Val = I.getOperand(1);
2418  const auto &DL = I.getModule()->getDataLayout();
2419  uint64_t Size = DL.getTypeStoreSize(Val->getType());
2420  if (Size == 0)
2421  return;
2422 
2423  // Conservatively set data at stored addresses and return with zero shadow to
2424  // prevent shadow data races.
2425  IRBuilder<> IRB(&I);
2426  Value *Addr = I.getOperand(0);
2427  const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2428  DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, &I);
2429  DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2430  DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2431 }
2432 
2433 void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2434  visitCASOrRMW(I.getAlign(), I);
2435  // TODO: The ordering change follows MSan. It is possible not to change
2436  // ordering because we always set and use 0 shadows.
2437  I.setOrdering(addReleaseOrdering(I.getOrdering()));
2438 }
2439 
2440 void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2441  visitCASOrRMW(I.getAlign(), I);
2442  // TODO: The ordering change follows MSan. It is possible not to change
2443  // ordering because we always set and use 0 shadows.
2444  I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2445 }
2446 
2447 void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2448  visitInstOperands(UO);
2449 }
2450 
2451 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2452  visitInstOperands(BO);
2453 }
2454 
2455 void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
2456  // Special case: if this is the bitcast (there is exactly 1 allowed) between
2457  // a musttail call and a ret, don't instrument. New instructions are not
2458  // allowed after a musttail call.
2459  if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
2460  if (CI->isMustTailCall())
2461  return;
2462  visitInstOperands(BCI);
2463 }
2464 
2465 void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2466 
2467 void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2468  visitInstOperands(CI);
2469  if (ClEventCallbacks) {
2470  IRBuilder<> IRB(&CI);
2471  Value *CombinedShadow = DFSF.getShadow(&CI);
2472  IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2473  }
2474 }
2475 
2476 void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
2477  // We do not need to track data through LandingPadInst.
2478  //
2479  // For the C++ exceptions, if a value is thrown, this value will be stored
2480  // in a memory location provided by __cxa_allocate_exception(...) (on the
2481  // throw side) or __cxa_begin_catch(...) (on the catch side).
2482  // This memory will have a shadow, so with the loads and stores we will be
2483  // able to propagate labels on data thrown through exceptions, without any
2484  // special handling of the LandingPadInst.
2485  //
2486  // The second element in the pair result of the LandingPadInst is a
2487  // register value, but it is for a type ID and should never be tainted.
2488  DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
2489  DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
2490 }
2491 
2492 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2494  visitInstOperands(GEPI);
2495  return;
2496  }
2497 
2498  // Only propagate shadow/origin of base pointer value but ignore those of
2499  // offset operands.
2500  Value *BasePointer = GEPI.getPointerOperand();
2501  DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
2502  if (DFSF.DFS.shouldTrackOrigins())
2503  DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
2504 }
2505 
2506 void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2507  visitInstOperands(I);
2508 }
2509 
2510 void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2511  visitInstOperands(I);
2512 }
2513 
2514 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2515  visitInstOperands(I);
2516 }
2517 
2518 void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2519  IRBuilder<> IRB(&I);
2520  Value *Agg = I.getAggregateOperand();
2521  Value *AggShadow = DFSF.getShadow(Agg);
2522  Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2523  DFSF.setShadow(&I, ResShadow);
2524  visitInstOperandOrigins(I);
2525 }
2526 
2527 void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2528  IRBuilder<> IRB(&I);
2529  Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2530  Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2531  Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2532  DFSF.setShadow(&I, Res);
2533  visitInstOperandOrigins(I);
2534 }
2535 
2536 void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2537  bool AllLoadsStores = true;
2538  for (User *U : I.users()) {
2539  if (isa<LoadInst>(U))
2540  continue;
2541 
2542  if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2543  if (SI->getPointerOperand() == &I)
2544  continue;
2545  }
2546 
2547  AllLoadsStores = false;
2548  break;
2549  }
2550  if (AllLoadsStores) {
2551  IRBuilder<> IRB(&I);
2552  DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2553  if (DFSF.DFS.shouldTrackOrigins()) {
2554  DFSF.AllocaOriginMap[&I] =
2555  IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2556  }
2557  }
2558  DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2559  DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2560 }
2561 
2562 void DFSanVisitor::visitSelectInst(SelectInst &I) {
2563  Value *CondShadow = DFSF.getShadow(I.getCondition());
2564  Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2565  Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2566  Value *ShadowSel = nullptr;
2567  const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2568  std::vector<Value *> Shadows;
2569  std::vector<Value *> Origins;
2570  Value *TrueOrigin =
2571  ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2572  Value *FalseOrigin =
2573  ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2574 
2575  if (isa<VectorType>(I.getCondition()->getType())) {
2576  ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2577  FalseShadow, &I);
2578  if (ShouldTrackOrigins) {
2579  Shadows.push_back(TrueShadow);
2580  Shadows.push_back(FalseShadow);
2581  Origins.push_back(TrueOrigin);
2582  Origins.push_back(FalseOrigin);
2583  }
2584  } else {
2585  if (TrueShadow == FalseShadow) {
2586  ShadowSel = TrueShadow;
2587  if (ShouldTrackOrigins) {
2588  Shadows.push_back(TrueShadow);
2589  Origins.push_back(TrueOrigin);
2590  }
2591  } else {
2592  ShadowSel =
2593  SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
2594  if (ShouldTrackOrigins) {
2595  Shadows.push_back(ShadowSel);
2596  Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2597  FalseOrigin, "", &I));
2598  }
2599  }
2600  }
2601  DFSF.setShadow(&I, ClTrackSelectControlFlow
2602  ? DFSF.combineShadowsThenConvert(
2603  I.getType(), CondShadow, ShadowSel, &I)
2604  : ShadowSel);
2605  if (ShouldTrackOrigins) {
2607  Shadows.push_back(CondShadow);
2608  Origins.push_back(DFSF.getOrigin(I.getCondition()));
2609  }
2610  DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I));
2611  }
2612 }
2613 
2614 void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2615  IRBuilder<> IRB(&I);
2616  Value *ValShadow = DFSF.getShadow(I.getValue());
2617  Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2618  ? DFSF.getOrigin(I.getValue())
2619  : DFSF.DFS.ZeroOrigin;
2620  IRB.CreateCall(
2621  DFSF.DFS.DFSanSetLabelFn,
2622  {ValShadow, ValOrigin,
2623  IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
2624  IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2625 }
2626 
2627 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2628  IRBuilder<> IRB(&I);
2629 
2630  // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2631  // need to move origins before moving shadows.
2632  if (DFSF.DFS.shouldTrackOrigins()) {
2633  IRB.CreateCall(
2634  DFSF.DFS.DFSanMemOriginTransferFn,
2635  {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2636  IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2637  IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2638  }
2639 
2640  Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
2641  Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
2642  Value *LenShadow =
2643  IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2644  DFSF.DFS.ShadowWidthBytes));
2645  Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
2646  Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
2647  SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
2648  auto *MTI = cast<MemTransferInst>(
2649  IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2650  {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2651  if (ClPreserveAlignment) {
2652  MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes);
2653  MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes);
2654  } else {
2655  MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes));
2656  MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes));
2657  }
2658  if (ClEventCallbacks) {
2659  IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
2660  {RawDestShadow,
2661  IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2662  }
2663 }
2664 
2665 static bool isAMustTailRetVal(Value *RetVal) {
2666  // Tail call may have a bitcast between return.
2667  if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2668  RetVal = I->getOperand(0);
2669  }
2670  if (auto *I = dyn_cast<CallInst>(RetVal)) {
2671  return I->isMustTailCall();
2672  }
2673  return false;
2674 }
2675 
2676 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2677  if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2678  // Don't emit the instrumentation for musttail call returns.
2680  return;
2681 
2682  Value *S = DFSF.getShadow(RI.getReturnValue());
2683  IRBuilder<> IRB(&RI);
2684  Type *RT = DFSF.F->getFunctionType()->getReturnType();
2685  unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
2686  if (Size <= RetvalTLSSize) {
2687  // If the size overflows, stores nothing. At callsite, oversized return
2688  // shadows are set to zero.
2689  IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
2690  }
2691  if (DFSF.DFS.shouldTrackOrigins()) {
2692  Value *O = DFSF.getOrigin(RI.getReturnValue());
2693  IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
2694  }
2695  }
2696 }
2697 
2698 void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
2699  std::vector<Value *> &Args,
2700  IRBuilder<> &IRB) {
2701  FunctionType *FT = F.getFunctionType();
2702 
2703  auto *I = CB.arg_begin();
2704 
2705  // Adds non-variable argument shadows.
2706  for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2707  Args.push_back(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB));
2708 
2709  // Adds variable argument shadows.
2710  if (FT->isVarArg()) {
2711  auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
2712  CB.arg_size() - FT->getNumParams());
2713  auto *LabelVAAlloca =
2714  new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
2715  "labelva", &DFSF.F->getEntryBlock().front());
2716 
2717  for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
2718  auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
2719  IRB.CreateStore(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB),
2720  LabelVAPtr);
2721  }
2722 
2723  Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
2724  }
2725 
2726  // Adds the return value shadow.
2727  if (!FT->getReturnType()->isVoidTy()) {
2728  if (!DFSF.LabelReturnAlloca) {
2729  DFSF.LabelReturnAlloca = new AllocaInst(
2730  DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
2731  "labelreturn", &DFSF.F->getEntryBlock().front());
2732  }
2733  Args.push_back(DFSF.LabelReturnAlloca);
2734  }
2735 }
2736 
2737 void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
2738  std::vector<Value *> &Args,
2739  IRBuilder<> &IRB) {
2740  FunctionType *FT = F.getFunctionType();
2741 
2742  auto *I = CB.arg_begin();
2743 
2744  // Add non-variable argument origins.
2745  for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2746  Args.push_back(DFSF.getOrigin(*I));
2747 
2748  // Add variable argument origins.
2749  if (FT->isVarArg()) {
2750  auto *OriginVATy =
2751  ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
2752  auto *OriginVAAlloca =
2753  new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
2754  "originva", &DFSF.F->getEntryBlock().front());
2755 
2756  for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
2757  auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
2758  IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
2759  }
2760 
2761  Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
2762  }
2763 
2764  // Add the return value origin.
2765  if (!FT->getReturnType()->isVoidTy()) {
2766  if (!DFSF.OriginReturnAlloca) {
2767  DFSF.OriginReturnAlloca = new AllocaInst(
2768  DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
2769  "originreturn", &DFSF.F->getEntryBlock().front());
2770  }
2771  Args.push_back(DFSF.OriginReturnAlloca);
2772  }
2773 }
2774 
2775 bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
2776  IRBuilder<> IRB(&CB);
2777  switch (DFSF.DFS.getWrapperKind(&F)) {
2778  case DataFlowSanitizer::WK_Warning:
2779  CB.setCalledFunction(&F);
2780  IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
2781  IRB.CreateGlobalStringPtr(F.getName()));
2782  DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2783  DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
2784  return true;
2785  case DataFlowSanitizer::WK_Discard:
2786  CB.setCalledFunction(&F);
2787  DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2788  DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
2789  return true;
2790  case DataFlowSanitizer::WK_Functional:
2791  CB.setCalledFunction(&F);
2792  visitInstOperands(CB);
2793  return true;
2794  case DataFlowSanitizer::WK_Custom:
2795  // Don't try to handle invokes of custom functions, it's too complicated.
2796  // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
2797  // wrapper.
2798  CallInst *CI = dyn_cast<CallInst>(&CB);
2799  if (!CI)
2800  return false;
2801 
2802  const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2803  FunctionType *FT = F.getFunctionType();
2804  TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
2805  std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
2806  CustomFName += F.getName();
2807  FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
2808  CustomFName, CustomFn.TransformedType);
2809  if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
2810  CustomFn->copyAttributesFrom(&F);
2811 
2812  // Custom functions returning non-void will write to the return label.
2813  if (!FT->getReturnType()->isVoidTy()) {
2814  CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
2815  }
2816  }
2817 
2818  std::vector<Value *> Args;
2819 
2820  // Adds non-variable arguments.
2821  auto *I = CB.arg_begin();
2822  for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
2823  Type *T = (*I)->getType();
2824  FunctionType *ParamFT;
2825  if (isa<PointerType>(T) &&
2826  (ParamFT = dyn_cast<FunctionType>(T->getPointerElementType()))) {
2827  std::string TName = "dfst";
2828  TName += utostr(FT->getNumParams() - N);
2829  TName += "$";
2830  TName += F.getName();
2831  Constant *Trampoline =
2832  DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
2833  Args.push_back(Trampoline);
2834  Args.push_back(
2835  IRB.CreateBitCast(*I, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
2836  } else {
2837  Args.push_back(*I);
2838  }
2839  }
2840 
2841  // Adds shadow arguments.
2842  const unsigned ShadowArgStart = Args.size();
2843  addShadowArguments(F, CB, Args, IRB);
2844 
2845  // Adds origin arguments.
2846  const unsigned OriginArgStart = Args.size();
2847  if (ShouldTrackOrigins)
2848  addOriginArguments(F, CB, Args, IRB);
2849 
2850  // Adds variable arguments.
2851  append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
2852 
2853  CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
2854  CustomCI->setCallingConv(CI->getCallingConv());
2855  CustomCI->setAttributes(transformFunctionAttributes(
2856  CustomFn, CI->getContext(), CI->getAttributes()));
2857 
2858  // Update the parameter attributes of the custom call instruction to
2859  // zero extend the shadow parameters. This is required for targets
2860  // which consider PrimitiveShadowTy an illegal type.
2861  for (unsigned N = 0; N < FT->getNumParams(); N++) {
2862  const unsigned ArgNo = ShadowArgStart + N;
2863  if (CustomCI->getArgOperand(ArgNo)->getType() ==
2864  DFSF.DFS.PrimitiveShadowTy)
2865  CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
2866  if (ShouldTrackOrigins) {
2867  const unsigned OriginArgNo = OriginArgStart + N;
2868  if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
2869  DFSF.DFS.OriginTy)
2870  CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
2871  }
2872  }
2873 
2874  // Loads the return value shadow and origin.
2875  if (!FT->getReturnType()->isVoidTy()) {
2876  LoadInst *LabelLoad =
2877  IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
2878  DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
2879  FT->getReturnType(), LabelLoad, &CB));
2880  if (ShouldTrackOrigins) {
2881  LoadInst *OriginLoad =
2882  IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
2883  DFSF.setOrigin(CustomCI, OriginLoad);
2884  }
2885  }
2886 
2887  CI->replaceAllUsesWith(CustomCI);
2888  CI->eraseFromParent();
2889  return true;
2890  }
2891  return false;
2892 }
2893 
2894 void DFSanVisitor::visitCallBase(CallBase &CB) {
2895  Function *F = CB.getCalledFunction();
2896  if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
2897  visitInstOperands(CB);
2898  return;
2899  }
2900 
2901  // Calls to this function are synthesized in wrappers, and we shouldn't
2902  // instrument them.
2903  if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
2904  return;
2905 
2907  DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
2908  if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
2909  if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
2910  return;
2911 
2912  IRBuilder<> IRB(&CB);
2913 
2914  const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2915  FunctionType *FT = CB.getFunctionType();
2916  const DataLayout &DL = getDataLayout();
2917 
2918  // Stores argument shadows.
2919  unsigned ArgOffset = 0;
2920  for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
2921  if (ShouldTrackOrigins) {
2922  // Ignore overflowed origins
2923  Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
2924  if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
2925  !DFSF.DFS.isZeroShadow(ArgShadow))
2926  IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
2927  DFSF.getArgOriginTLS(I, IRB));
2928  }
2929 
2930  unsigned Size =
2931  DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
2932  // Stop storing if arguments' size overflows. Inside a function, arguments
2933  // after overflow have zero shadow values.
2934  if (ArgOffset + Size > ArgTLSSize)
2935  break;
2936  IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
2937  DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
2939  ArgOffset += alignTo(Size, ShadowTLSAlignment);
2940  }
2941 
2942  Instruction *Next = nullptr;
2943  if (!CB.getType()->isVoidTy()) {
2944  if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
2945  if (II->getNormalDest()->getSinglePredecessor()) {
2946  Next = &II->getNormalDest()->front();
2947  } else {
2948  BasicBlock *NewBB =
2949  SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
2950  Next = &NewBB->front();
2951  }
2952  } else {
2953  assert(CB.getIterator() != CB.getParent()->end());
2954  Next = CB.getNextNode();
2955  }
2956 
2957  // Don't emit the epilogue for musttail call returns.
2958  if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
2959  return;
2960 
2961  // Loads the return value shadow.
2962  IRBuilder<> NextIRB(Next);
2963  unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
2964  if (Size > RetvalTLSSize) {
2965  // Set overflowed return shadow to be zero.
2966  DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2967  } else {
2968  LoadInst *LI = NextIRB.CreateAlignedLoad(
2969  DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
2970  ShadowTLSAlignment, "_dfsret");
2971  DFSF.SkipInsts.insert(LI);
2972  DFSF.setShadow(&CB, LI);
2973  DFSF.NonZeroChecks.push_back(LI);
2974  }
2975 
2976  if (ShouldTrackOrigins) {
2977  LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
2978  DFSF.getRetvalOriginTLS(), "_dfsret_o");
2979  DFSF.SkipInsts.insert(LI);
2980  DFSF.setOrigin(&CB, LI);
2981  }
2982  }
2983 }
2984 
2985 void DFSanVisitor::visitPHINode(PHINode &PN) {
2986  Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
2987  PHINode *ShadowPN =
2988  PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
2989 
2990  // Give the shadow phi node valid predecessors to fool SplitEdge into working.
2991  Value *UndefShadow = UndefValue::get(ShadowTy);
2992  for (BasicBlock *BB : PN.blocks())
2993  ShadowPN->addIncoming(UndefShadow, BB);
2994 
2995  DFSF.setShadow(&PN, ShadowPN);
2996 
2997  PHINode *OriginPN = nullptr;
2998  if (DFSF.DFS.shouldTrackOrigins()) {
2999  OriginPN =
3000  PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN);
3001  Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
3002  for (BasicBlock *BB : PN.blocks())
3003  OriginPN->addIncoming(UndefOrigin, BB);
3004  DFSF.setOrigin(&PN, OriginPN);
3005  }
3006 
3007  DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
3008 }
3009 
3010 namespace {
3011 class DataFlowSanitizerLegacyPass : public ModulePass {
3012 private:
3013  std::vector<std::string> ABIListFiles;
3014 
3015 public:
3016  static char ID;
3017 
3018  DataFlowSanitizerLegacyPass(
3019  const std::vector<std::string> &ABIListFiles = std::vector<std::string>())
3020  : ModulePass(ID), ABIListFiles(ABIListFiles) {}
3021 
3022  bool runOnModule(Module &M) override {
3023  return DataFlowSanitizer(ABIListFiles).runImpl(M);
3024  }
3025 };
3026 } // namespace
3027 
3029 
3030 INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan",
3031  "DataFlowSanitizer: dynamic data flow analysis.", false, false)
3032 
3034  const std::vector<std::string> &ABIListFiles) {
3035  return new DataFlowSanitizerLegacyPass(ABIListFiles);
3036 }
3037 
3038 PreservedAnalyses DataFlowSanitizerPass::run(Module &M,
3039  ModuleAnalysisManager &AM) {
3040  if (DataFlowSanitizer(ABIListFiles).runImpl(M)) {
3041  return PreservedAnalyses::none();
3042  }
3043  return PreservedAnalyses::all();
3044 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
ClABIListFiles
static cl::list< std::string > ClABIListFiles("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
alignTo
static int alignTo(int Num, int PowOf2)
Definition: AArch64LoadStoreOptimizer.cpp:1218
Instrumentation.h
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:163
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::IRBuilderBase::CreateIntCast
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2061
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:263
llvm::IRBuilderBase::CreateStore
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1673
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::createDataFlowSanitizerLegacyPassPass
ModulePass * createDataFlowSanitizerLegacyPassPass(const std::vector< std::string > &ABIListFiles=std::vector< std::string >())
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::IRBuilderBase::getInt64Ty
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:523
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3001
llvm::IRBuilderBase::CreateAlignedStore
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1696
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:408
RetvalTLSSize
static const unsigned RetvalTLSSize
Definition: DataFlowSanitizer.cpp:132
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::IRBuilderBase::CreateExtractValue
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2386
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
IntrinsicInst.h
llvm::GlobalValue::getLinkage
LinkageTypes getLinkage() const
Definition: GlobalValue.h:467
T
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1867
llvm::MemTransferInst
This class wraps the llvm.memcpy/memmove intrinsics.
Definition: IntrinsicInst.h:917
llvm::Function
Definition: Function.h:62
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::CallBase::setCalledFunction
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1434
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5209
llvm::IRBuilderBase::CreateXor
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1414
llvm::ReturnInst::getReturnValue
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Definition: Instructions.h:3046
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
InlineAsm.h
llvm::LandingPadInst
The landingpad instruction holds all of the information necessary to generate correct exception handl...
Definition: Instructions.h:2900
llvm::CallBase::isInlineAsm
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1463
ErrorHandling.h
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::IRBuilderBase::CreateStructGEP
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
Definition: IRBuilder.h:1872
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
llvm::GlobalAlias
Definition: GlobalAlias.h:28
ValueTracking.h
Local.h
llvm::AttributeList::get
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
Definition: Attributes.cpp:1011
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1388
llvm::Module::getModuleInlineAsm
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:265
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::AttributeFuncs::typeIncompatible
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
Definition: Attributes.cpp:1811
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
Linux_X86_64_MemoryMapParams
static const MemoryMapParams Linux_X86_64_MemoryMapParams
Definition: DataFlowSanitizer.cpp:263
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::removeUnreachableBlocks
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition: Local.cpp:2466
Module.h
llvm::Triple::x86_64
@ x86_64
Definition: Triple.h:84
llvm::AttributeList
Definition: Attributes.h:398
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1473
llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1256
llvm::AttributeList::getFnAttrs
AttributeSet getFnAttrs() const
The function attributes are returned.
Definition: Attributes.cpp:1362
addAcquireOrdering
static AtomicOrdering addAcquireOrdering(AtomicOrdering AO)
Definition: DataFlowSanitizer.cpp:2086
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::Module::getOrInsertFunction
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:147
llvm::SmallPtrSet< Value *, 16 >
isMustTailCall
static bool isMustTailCall(Value *V)
Definition: InstructionCombining.cpp:2902
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2330
llvm::FunctionType::getNumParams
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1318
addReleaseOrdering
static AtomicOrdering addReleaseOrdering(AtomicOrdering AO)
Definition: DataFlowSanitizer.cpp:2346
llvm::GlobalValue::LinkageTypes
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:47
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::IRBuilderBase::CreateAlignedLoad
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1677
llvm::UnaryOperator
Definition: InstrTypes.h:103
llvm::IRBuilderBase::CreateGlobalStringPtr
Constant * CreateGlobalStringPtr(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr)
Same as CreateGlobalString, but return a pointer with "i8*" type instead of a pointer to array of i8.
Definition: IRBuilder.h:1882
llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1525
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:223
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
DataFlowSanitizer.h
llvm::IRBuilderBase::CreateAlloca
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1643
getGlobalTypeString
static StringRef getGlobalTypeString(const GlobalValue &G)
Definition: DataFlowSanitizer.cpp:235
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::IRBuilderBase::CreateIntToPtr
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1987
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::FunctionType::isVarArg
bool isVarArg() const
Definition: DerivedTypes.h:123
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
Instruction.h
CommandLine.h
llvm::IRBuilderBase::CreateInsertValue
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2394
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
ClEventCallbacks
static cl::opt< bool > ClEventCallbacks("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))
llvm::IRBuilderBase::CreateMul
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1246
GlobalValue.h
Constants.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
llvm::CallBase::setAttributes
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1477
llvm::IRBuilderBase::CreateLoad
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1660
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1512
MinOriginAlignment
static const Align MinOriginAlignment
Definition: DataFlowSanitizer.cpp:127
runImpl
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
Definition: ReplaceWithVeclib.cpp:177
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1931
DenseSet.h
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction
Definition: Instruction.h:45
MDBuilder.h
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::Function::copyAttributesFrom
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:712
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::Module::getOrInsertGlobal
Constant * getOrInsertGlobal(StringRef Name, Type *Ty, function_ref< GlobalVariable *()> CreateGlobalCallback)
Look up the specified global in the module symbol table.
Definition: Module.cpp:208
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1796
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
SmallPtrSet.h
llvm::Module::setModuleInlineAsm
void setModuleInlineAsm(StringRef Asm)
Set the module-scope inline assembly blocks.
Definition: Module.h:304
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
ClInstrumentWithCallThreshold
static cl::opt< int > ClInstrumentWithCallThreshold("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::CallBase::getCallingConv
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1454
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2740
llvm::None
const NoneType None
Definition: None.h:23
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Type.h
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1362
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:905
llvm::IRBuilderBase::CreatePointerCast
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2038
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3760
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:711
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::pointer_iterator
Definition: iterator.h:344
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:206
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1992
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AttributeList::getRetAttrs
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Definition: Attributes.cpp:1358
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::IRBuilderBase::GetInsertPoint
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:179
llvm::vfs::getRealFileSystem
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
Definition: VirtualFileSystem.cpp:354
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
uint64_t
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::LoadInst::setOrdering
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:237
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2798
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
isConstant
static bool isConstant(const MachineInstr &MI)
Definition: AMDGPUInstructionSelector.cpp:2312
llvm::assumeAligned
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:103
llvm::DenseMap
Definition: DenseMap.h:714
llvm::Triple::Linux
@ Linux
Definition: Triple.h:180
iterator.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AttrBuilder
Definition: Attributes.h:930
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
StringExtras.h
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::LoadInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:227
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:593
llvm::IRBuilderBase::CreateSelect
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:985
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::CallBase::addRetAttr
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Definition: InstrTypes.h:1515
llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1212
llvm::SpecialCaseList::createOrDie
static std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
Definition: SpecialCaseList.cpp:91
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:139
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::ModulePass::runOnModule
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1732
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:640
ClDebugNonzeroLabels
static cl::opt< bool > ClDebugNonzeroLabels("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1324
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Triple.h
llvm::DominatorTreeBase::recalculate
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Definition: GenericDomTree.h:778
llvm::BinaryOperator
Definition: InstrTypes.h:190
None.h
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::PHINode::blocks
iterator_range< block_iterator > blocks()
Definition: Instructions.h:2726
ClPreserveAlignment
static cl::opt< bool > ClPreserveAlignment("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
InstVisitor.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::GlobalValue::WeakODRLinkage
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:53
ClCombinePointerLabelsOnLoad
static cl::opt< bool > ClCombinePointerLabelsOnLoad("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
A
* A
Definition: README_ALTIVEC.txt:89
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1797
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
ArgTLSSize
static const unsigned ArgTLSSize
Definition: DataFlowSanitizer.cpp:131
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:431
llvm::CallInst::isMustTailCall
bool isMustTailCall() const
Definition: Instructions.h:1668
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:232
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
VirtualFileSystem.h
llvm::IRBuilderBase::CreateConstGEP1_32
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1780
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:726
llvm::IRBuilderBase::CreateGEP
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1736
llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1896
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
Argument.h
llvm::Function::removeFnAttrs
void removeFnAttrs(const AttrBuilder &Attrs)
Definition: Function.cpp:592
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
INITIALIZE_PASS
INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan", "DataFlowSanitizer: dynamic data flow analysis.", false, false) ModulePass *llvm
Definition: DataFlowSanitizer.cpp:3030
ClTrackSelectControlFlow
static cl::opt< bool > ClTrackSelectControlFlow("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))
Attributes.h
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:610
ClTrackOrigins
static cl::opt< int > ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:518
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
Alignment.h
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
std
Definition: BitVector.h:838
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
isAMustTailRetVal
static bool isAMustTailRetVal(Value *RetVal)
Definition: DataFlowSanitizer.cpp:2665
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1341
GlobalVariable.h
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2395
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:939
Casting.h
Function.h
ShadowTLSAlignment
static const Align ShadowTLSAlignment
Definition: DataFlowSanitizer.cpp:125
PassManager.h
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:776
llvm::ReturnInst::Create
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3028
llvm::AttributeList::getNumAttrSets
unsigned getNumAttrSets() const
Definition: Attributes.cpp:1494
SpecialCaseList.h
Unordered
QP Compare Ordered Unordered
Definition: README_P9.txt:299
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1391
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:738
GlobalAlias.h
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:414
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2003
Instructions.h
ClCombineOffsetLabelsOnGEP
static cl::opt< bool > ClCombineOffsetLabelsOnGEP("dfsan-combine-offset-labels-on-gep", cl::desc("Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::GlobalAlias::getAliaseeObject
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:509
SmallVector.h
llvm::Function::removeRetAttrs
void removeRetAttrs(const AttrBuilder &Attrs)
removes the attributes from the return value list of attributes.
Definition: Function.cpp:604
User.h
llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1322
Dominators.h
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:369
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
N
#define N
llvm::IRBuilderBase::CreateShl
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1301
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2648
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
llvm::IRBuilderBase::CreateConstGEP2_64
Value * CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
Definition: IRBuilder.h:1846
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::HexStyle::Asm
@ Asm
0ffh
Definition: MCInstPrinter.h:34
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
getUnderlyingObjects
static void getUnderlyingObjects(const MachineInstr *MI, SmallVectorImpl< const Value * > &Objs)
Return the underlying objects for the memory references of an instruction.
Definition: MachinePipeliner.cpp:718
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1469
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4724
llvm::IRBuilderBase::CreateICmpNE
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2110
LLVMContext.h
llvm::GlobalAlias::eraseFromParent
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:499
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:382
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1444
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3083
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1675
llvm::AttributeList::getParamAttrs
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
Definition: Attributes.cpp:1354
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2506
BasicBlockUtils.h
llvm::GlobalValue::LinkOnceODRLinkage
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
Value.h
llvm::pdb::PDB_SymType::Block
@ Block
InitializePasses.h
llvm::GetElementPtrInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:1051
llvm::FunctionType::getReturnType
Type * getReturnType() const
Definition: DerivedTypes.h:124
expandFromPrimitiveShadowRecursive
static Value * expandFromPrimitiveShadowRecursive(Value *Shadow, SmallVector< unsigned, 4 > &Indices, Type *SubShadowTy, Value *PrimitiveShadow, IRBuilder<> &IRB)
Definition: DataFlowSanitizer.cpp:867
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2270
llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1334
ClCombinePointerLabelsOnStore
static cl::opt< bool > ClCombinePointerLabelsOnStore("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::CallBase::setCallingConv
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1458
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::cl::list
Definition: CommandLine.h:1640