LLVM 20.0.0git
Functions | Variables
DataFlowSanitizer.cpp File Reference

This file is a part of DataFlowSanitizer, a generalised dynamic data flow analysis. More...

#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>

Go to the source code of this file.

Functions

static StringRef getGlobalTypeString (const GlobalValue &G)
 
static ValueexpandFromPrimitiveShadowRecursive (Value *Shadow, SmallVector< unsigned, 4 > &Indices, Type *SubShadowTy, Value *PrimitiveShadow, IRBuilder<> &IRB)
 
static AtomicOrdering addAcquireOrdering (AtomicOrdering AO)
 
ValueStripPointerGEPsAndCasts (Value *V)
 
static AtomicOrdering addReleaseOrdering (AtomicOrdering AO)
 
static bool isAMustTailRetVal (Value *RetVal)
 

Variables

static const Align ShadowTLSAlignment = Align(2)
 
static const Align MinOriginAlignment = Align(4)
 
static const unsigned ArgTLSSize = 800
 
static const unsigned RetvalTLSSize = 800
 
static cl::opt< boolClPreserveAlignment ("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))
 
static cl::list< std::string > ClABIListFiles ("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)
 
static cl::opt< boolClCombinePointerLabelsOnLoad ("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))
 
static cl::opt< boolClCombinePointerLabelsOnStore ("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))
 
static cl::opt< boolClCombineOffsetLabelsOnGEP ("dfsan-combine-offset-labels-on-gep", cl::desc("Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))
 
static cl::list< std::string > ClCombineTaintLookupTables ("dfsan-combine-taint-lookup-table", cl::desc("When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden)
 
static cl::opt< boolClDebugNonzeroLabels ("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)
 
static cl::opt< boolClEventCallbacks ("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))
 
static cl::opt< boolClConditionalCallbacks ("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false))
 
static cl::opt< boolClReachesFunctionCallbacks ("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false))
 
static cl::opt< boolClTrackSelectControlFlow ("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))
 
static cl::opt< int > ClInstrumentWithCallThreshold ("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))
 
static cl::opt< int > ClTrackOrigins ("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))
 
static cl::opt< boolClIgnorePersonalityRoutine ("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false))
 
const MemoryMapParams Linux_AArch64_MemoryMapParams
 
const MemoryMapParams Linux_X86_64_MemoryMapParams
 
const MemoryMapParams Linux_LoongArch64_MemoryMapParams
 

Detailed Description

This file is a part of DataFlowSanitizer, a generalised dynamic data flow analysis.

Unlike other Sanitizer tools, this tool is not designed to detect a specific class of bugs on its own. Instead, it provides a generic dynamic data flow analysis framework to be used by clients to help detect application-specific issues within their own code.

The analysis is based on automatic propagation of data flow labels (also known as taint labels) through a program as it performs computation.

Argument and return value labels are passed through TLS variables __dfsan_arg_tls and __dfsan_retval_tls.

Each byte of application memory is backed by a shadow memory byte. The shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then laid out as follows:

+-----------------—+ 0x800000000000 (top of memory) | application 3 | +-----------------—+ 0x700000000000 | invalid | +-----------------—+ 0x610000000000 | origin 1 | +-----------------—+ 0x600000000000 | application 2 | +-----------------—+ 0x510000000000 | shadow 1 | +-----------------—+ 0x500000000000 | invalid | +-----------------—+ 0x400000000000 | origin 3 | +-----------------—+ 0x300000000000 | shadow 3 | +-----------------—+ 0x200000000000 | origin 2 | +-----------------—+ 0x110000000000 | invalid | +-----------------—+ 0x100000000000 | shadow 2 | +-----------------—+ 0x010000000000 | application 1 | +-----------------—+ 0x000000000000

MEM_TO_SHADOW(mem) = mem ^ 0x500000000000 SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000

For more information, please refer to the design document: http://clang.llvm.org/docs/DataFlowSanitizerDesign.html

Definition in file DataFlowSanitizer.cpp.

Function Documentation

◆ addAcquireOrdering()

static AtomicOrdering addAcquireOrdering ( AtomicOrdering  AO)
static

Definition at line 2354 of file DataFlowSanitizer.cpp.

References llvm_unreachable.

◆ addReleaseOrdering()

static AtomicOrdering addReleaseOrdering ( AtomicOrdering  AO)
static

Definition at line 2644 of file DataFlowSanitizer.cpp.

References llvm_unreachable.

◆ expandFromPrimitiveShadowRecursive()

static Value * expandFromPrimitiveShadowRecursive ( Value Shadow,
SmallVector< unsigned, 4 > &  Indices,
Type SubShadowTy,
Value PrimitiveShadow,
IRBuilder<> &  IRB 
)
static

◆ getGlobalTypeString()

static StringRef getGlobalTypeString ( const GlobalValue G)
static

Definition at line 264 of file DataFlowSanitizer.cpp.

References G.

◆ isAMustTailRetVal()

static bool isAMustTailRetVal ( Value RetVal)
static

Definition at line 2971 of file DataFlowSanitizer.cpp.

References I.

◆ StripPointerGEPsAndCasts()

Value * StripPointerGEPsAndCasts ( Value V)

Variable Documentation

◆ ArgTLSSize

const unsigned ArgTLSSize = 800
static

Definition at line 129 of file DataFlowSanitizer.cpp.

◆ ClABIListFiles

cl::list< std::string > ClABIListFiles("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden) ( "dfsan-abilist"  ,
cl::desc("File listing native ABI functions and how the pass treats them")  ,
cl::Hidden   
)
static

◆ ClCombineOffsetLabelsOnGEP

cl::opt< bool > ClCombineOffsetLabelsOnGEP("dfsan-combine-offset-labels-on-gep", cl::desc( "Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true)) ( "dfsan-combine-offset-labels-on-gep"  ,
cl::desc( "Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic.")  ,
cl::Hidden  ,
cl::init(true  
)
static

◆ ClCombinePointerLabelsOnLoad

cl::opt< bool > ClCombinePointerLabelsOnLoad("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true)) ( "dfsan-combine-pointer-labels-on-load"  ,
cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory.")  ,
cl::Hidden  ,
cl::init(true  
)
static

◆ ClCombinePointerLabelsOnStore

cl::opt< bool > ClCombinePointerLabelsOnStore("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false)) ( "dfsan-combine-pointer-labels-on-store"  ,
cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory.")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ ClCombineTaintLookupTables

cl::list< std::string > ClCombineTaintLookupTables("dfsan-combine-taint-lookup-table", cl::desc( "When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden) ( "dfsan-combine-taint-lookup-table"  ,
cl::desc( "When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables).")  ,
cl::Hidden   
)
static

◆ ClConditionalCallbacks

cl::opt< bool > ClConditionalCallbacks("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false)) ( "dfsan-conditional-callbacks"  ,
cl::desc("Insert calls to callback functions on conditionals.")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ ClDebugNonzeroLabels

cl::opt< bool > ClDebugNonzeroLabels("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden) ( "dfsan-debug-nonzero-labels"  ,
cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label")  ,
cl::Hidden   
)
static

◆ ClEventCallbacks

cl::opt< bool > ClEventCallbacks("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false)) ( "dfsan-event-callbacks"  ,
cl::desc("Insert calls to __dfsan_*_callback functions on data events.")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ ClIgnorePersonalityRoutine

cl::opt< bool > ClIgnorePersonalityRoutine("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false)) ( "dfsan-ignore-personality-routine"  ,
cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it.")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ ClInstrumentWithCallThreshold

cl::opt< int > ClInstrumentWithCallThreshold("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500)) ( "dfsan-instrument-with-call-threshold"  ,
cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks).")  ,
cl::Hidden  ,
cl::init(3500)   
)
static

◆ ClPreserveAlignment

cl::opt< bool > ClPreserveAlignment("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false)) ( "dfsan-preserve-alignment"  ,
cl::desc("respect alignment requirements provided by input IR")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ ClReachesFunctionCallbacks

cl::opt< bool > ClReachesFunctionCallbacks("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false)) ( "dfsan-reaches-function-callbacks"  ,
cl::desc("Insert calls to callback functions on data reaching a function.")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ ClTrackOrigins

cl::opt< int > ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0)) ( "dfsan-track-origins"  ,
cl::desc("Track origins of labels")  ,
cl::Hidden  ,
cl::init(0)   
)
static

◆ ClTrackSelectControlFlow

cl::opt< bool > ClTrackSelectControlFlow("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true)) ( "dfsan-track-select-control-flow"  ,
cl::desc("Propagate labels from condition values of select instructions " "to results.")  ,
cl::Hidden  ,
cl::init(true  
)
static

◆ Linux_AArch64_MemoryMapParams

const MemoryMapParams Linux_AArch64_MemoryMapParams
Initial value:
= {
0,
0x0B00000000000,
0,
0x0200000000000,
}

Definition at line 292 of file DataFlowSanitizer.cpp.

◆ Linux_LoongArch64_MemoryMapParams

const MemoryMapParams Linux_LoongArch64_MemoryMapParams
Initial value:
= {
0,
0x500000000000,
0,
0x100000000000,
}

Definition at line 309 of file DataFlowSanitizer.cpp.

◆ Linux_X86_64_MemoryMapParams

const MemoryMapParams Linux_X86_64_MemoryMapParams
Initial value:
= {
0,
0x500000000000,
0,
0x100000000000,
}

Definition at line 300 of file DataFlowSanitizer.cpp.

◆ MinOriginAlignment

const Align MinOriginAlignment = Align(4)
static

Definition at line 125 of file DataFlowSanitizer.cpp.

◆ RetvalTLSSize

const unsigned RetvalTLSSize = 800
static

Definition at line 130 of file DataFlowSanitizer.cpp.

◆ ShadowTLSAlignment

const Align ShadowTLSAlignment = Align(2)
static

Definition at line 123 of file DataFlowSanitizer.cpp.