LLVM 20.0.0git
DataFlowSanitizer.cpp
Go to the documentation of this file.
1//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11/// analysis.
12///
13/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14/// class of bugs on its own. Instead, it provides a generic dynamic data flow
15/// analysis framework to be used by clients to help detect application-specific
16/// issues within their own code.
17///
18/// The analysis is based on automatic propagation of data flow labels (also
19/// known as taint labels) through a program as it performs computation.
20///
21/// Argument and return value labels are passed through TLS variables
22/// __dfsan_arg_tls and __dfsan_retval_tls.
23///
24/// Each byte of application memory is backed by a shadow memory byte. The
25/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26/// laid out as follows:
27///
28/// +--------------------+ 0x800000000000 (top of memory)
29/// | application 3 |
30/// +--------------------+ 0x700000000000
31/// | invalid |
32/// +--------------------+ 0x610000000000
33/// | origin 1 |
34/// +--------------------+ 0x600000000000
35/// | application 2 |
36/// +--------------------+ 0x510000000000
37/// | shadow 1 |
38/// +--------------------+ 0x500000000000
39/// | invalid |
40/// +--------------------+ 0x400000000000
41/// | origin 3 |
42/// +--------------------+ 0x300000000000
43/// | shadow 3 |
44/// +--------------------+ 0x200000000000
45/// | origin 2 |
46/// +--------------------+ 0x110000000000
47/// | invalid |
48/// +--------------------+ 0x100000000000
49/// | shadow 2 |
50/// +--------------------+ 0x010000000000
51/// | application 1 |
52/// +--------------------+ 0x000000000000
53///
54/// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55/// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
56///
57/// For more information, please refer to the design document:
58/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/DenseMap.h"
64#include "llvm/ADT/DenseSet.h"
68#include "llvm/ADT/StringRef.h"
69#include "llvm/ADT/StringSet.h"
70#include "llvm/ADT/iterator.h"
75#include "llvm/IR/Argument.h"
77#include "llvm/IR/Attributes.h"
78#include "llvm/IR/BasicBlock.h"
79#include "llvm/IR/Constant.h"
80#include "llvm/IR/Constants.h"
81#include "llvm/IR/DataLayout.h"
83#include "llvm/IR/Dominators.h"
84#include "llvm/IR/Function.h"
85#include "llvm/IR/GlobalAlias.h"
86#include "llvm/IR/GlobalValue.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstVisitor.h"
90#include "llvm/IR/InstrTypes.h"
91#include "llvm/IR/Instruction.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Module.h"
96#include "llvm/IR/PassManager.h"
97#include "llvm/IR/Type.h"
98#include "llvm/IR/User.h"
99#include "llvm/IR/Value.h"
101#include "llvm/Support/Casting.h"
110#include <algorithm>
111#include <cassert>
112#include <cstddef>
113#include <cstdint>
114#include <memory>
115#include <set>
116#include <string>
117#include <utility>
118#include <vector>
119
120using namespace llvm;
121
122// This must be consistent with ShadowWidthBits.
124
126
127// The size of TLS variables. These constants must be kept in sync with the ones
128// in dfsan.cpp.
129static const unsigned ArgTLSSize = 800;
130static const unsigned RetvalTLSSize = 800;
131
132// The -dfsan-preserve-alignment flag controls whether this pass assumes that
133// alignment requirements provided by the input IR are correct. For example,
134// if the input IR contains a load with alignment 8, this flag will cause
135// the shadow load to have alignment 16. This flag is disabled by default as
136// we have unfortunately encountered too much code (including Clang itself;
137// see PR14291) which performs misaligned access.
139 "dfsan-preserve-alignment",
140 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
141 cl::init(false));
142
143// The ABI list files control how shadow parameters are passed. The pass treats
144// every function labelled "uninstrumented" in the ABI list file as conforming
145// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
146// additional annotations for those functions, a call to one of those functions
147// will produce a warning message, as the labelling behaviour of the function is
148// unknown. The other supported annotations for uninstrumented functions are
149// "functional" and "discard", which are described below under
150// DataFlowSanitizer::WrapperKind.
151// Functions will often be labelled with both "uninstrumented" and one of
152// "functional" or "discard". This will leave the function unchanged by this
153// pass, and create a wrapper function that will call the original.
154//
155// Instrumented functions can also be annotated as "force_zero_labels", which
156// will make all shadow and return values set zero labels.
157// Functions should never be labelled with both "force_zero_labels" and
158// "uninstrumented" or any of the unistrumented wrapper kinds.
160 "dfsan-abilist",
161 cl::desc("File listing native ABI functions and how the pass treats them"),
162 cl::Hidden);
163
164// Controls whether the pass includes or ignores the labels of pointers in load
165// instructions.
167 "dfsan-combine-pointer-labels-on-load",
168 cl::desc("Combine the label of the pointer with the label of the data when "
169 "loading from memory."),
170 cl::Hidden, cl::init(true));
171
172// Controls whether the pass includes or ignores the labels of pointers in
173// stores instructions.
175 "dfsan-combine-pointer-labels-on-store",
176 cl::desc("Combine the label of the pointer with the label of the data when "
177 "storing in memory."),
178 cl::Hidden, cl::init(false));
179
180// Controls whether the pass propagates labels of offsets in GEP instructions.
182 "dfsan-combine-offset-labels-on-gep",
183 cl::desc(
184 "Combine the label of the offset with the label of the pointer when "
185 "doing pointer arithmetic."),
186 cl::Hidden, cl::init(true));
187
189 "dfsan-combine-taint-lookup-table",
190 cl::desc(
191 "When dfsan-combine-offset-labels-on-gep and/or "
192 "dfsan-combine-pointer-labels-on-load are false, this flag can "
193 "be used to re-enable combining offset and/or pointer taint when "
194 "loading specific constant global variables (i.e. lookup tables)."),
195 cl::Hidden);
196
198 "dfsan-debug-nonzero-labels",
199 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
200 "load or return with a nonzero label"),
201 cl::Hidden);
202
203// Experimental feature that inserts callbacks for certain data events.
204// Currently callbacks are only inserted for loads, stores, memory transfers
205// (i.e. memcpy and memmove), and comparisons.
206//
207// If this flag is set to true, the user must provide definitions for the
208// following callback functions:
209// void __dfsan_load_callback(dfsan_label Label, void* addr);
210// void __dfsan_store_callback(dfsan_label Label, void* addr);
211// void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
212// void __dfsan_cmp_callback(dfsan_label CombinedLabel);
214 "dfsan-event-callbacks",
215 cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
216 cl::Hidden, cl::init(false));
217
218// Experimental feature that inserts callbacks for conditionals, including:
219// conditional branch, switch, select.
220// This must be true for dfsan_set_conditional_callback() to have effect.
222 "dfsan-conditional-callbacks",
223 cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
224 cl::init(false));
225
226// Experimental feature that inserts callbacks for data reaching a function,
227// either via function arguments and loads.
228// This must be true for dfsan_set_reaches_function_callback() to have effect.
230 "dfsan-reaches-function-callbacks",
231 cl::desc("Insert calls to callback functions on data reaching a function."),
232 cl::Hidden, cl::init(false));
233
234// Controls whether the pass tracks the control flow of select instructions.
236 "dfsan-track-select-control-flow",
237 cl::desc("Propagate labels from condition values of select instructions "
238 "to results."),
239 cl::Hidden, cl::init(true));
240
241// TODO: This default value follows MSan. DFSan may use a different value.
243 "dfsan-instrument-with-call-threshold",
244 cl::desc("If the function being instrumented requires more than "
245 "this number of origin stores, use callbacks instead of "
246 "inline checks (-1 means never use callbacks)."),
247 cl::Hidden, cl::init(3500));
248
249// Controls how to track origins.
250// * 0: do not track origins.
251// * 1: track origins at memory store operations.
252// * 2: track origins at memory load and store operations.
253// TODO: track callsites.
254static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
255 cl::desc("Track origins of labels"),
256 cl::Hidden, cl::init(0));
257
259 "dfsan-ignore-personality-routine",
260 cl::desc("If a personality routine is marked uninstrumented from the ABI "
261 "list, do not create a wrapper for it."),
262 cl::Hidden, cl::init(false));
263
265 // Types of GlobalVariables are always pointer types.
266 Type *GType = G.getValueType();
267 // For now we support excluding struct types only.
268 if (StructType *SGType = dyn_cast<StructType>(GType)) {
269 if (!SGType->isLiteral())
270 return SGType->getName();
271 }
272 return "<unknown type>";
273}
274
275namespace {
276
277// Memory map parameters used in application-to-shadow address calculation.
278// Offset = (Addr & ~AndMask) ^ XorMask
279// Shadow = ShadowBase + Offset
280// Origin = (OriginBase + Offset) & ~3ULL
281struct MemoryMapParams {
282 uint64_t AndMask;
283 uint64_t XorMask;
284 uint64_t ShadowBase;
285 uint64_t OriginBase;
286};
287
288} // end anonymous namespace
289
290// NOLINTBEGIN(readability-identifier-naming)
291// aarch64 Linux
292const MemoryMapParams Linux_AArch64_MemoryMapParams = {
293 0, // AndMask (not used)
294 0x0B00000000000, // XorMask
295 0, // ShadowBase (not used)
296 0x0200000000000, // OriginBase
297};
298
299// x86_64 Linux
300const MemoryMapParams Linux_X86_64_MemoryMapParams = {
301 0, // AndMask (not used)
302 0x500000000000, // XorMask
303 0, // ShadowBase (not used)
304 0x100000000000, // OriginBase
305};
306// NOLINTEND(readability-identifier-naming)
307
308// loongarch64 Linux
309const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
310 0, // AndMask (not used)
311 0x500000000000, // XorMask
312 0, // ShadowBase (not used)
313 0x100000000000, // OriginBase
314};
315
316namespace {
317
318class DFSanABIList {
319 std::unique_ptr<SpecialCaseList> SCL;
320
321public:
322 DFSanABIList() = default;
323
324 void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
325
326 /// Returns whether either this function or its source file are listed in the
327 /// given category.
328 bool isIn(const Function &F, StringRef Category) const {
329 return isIn(*F.getParent(), Category) ||
330 SCL->inSection("dataflow", "fun", F.getName(), Category);
331 }
332
333 /// Returns whether this global alias is listed in the given category.
334 ///
335 /// If GA aliases a function, the alias's name is matched as a function name
336 /// would be. Similarly, aliases of globals are matched like globals.
337 bool isIn(const GlobalAlias &GA, StringRef Category) const {
338 if (isIn(*GA.getParent(), Category))
339 return true;
340
341 if (isa<FunctionType>(GA.getValueType()))
342 return SCL->inSection("dataflow", "fun", GA.getName(), Category);
343
344 return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
345 SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
346 Category);
347 }
348
349 /// Returns whether this module is listed in the given category.
350 bool isIn(const Module &M, StringRef Category) const {
351 return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
352 }
353};
354
355/// TransformedFunction is used to express the result of transforming one
356/// function type into another. This struct is immutable. It holds metadata
357/// useful for updating calls of the old function to the new type.
358struct TransformedFunction {
359 TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
360 const std::vector<unsigned> &ArgumentIndexMapping)
361 : OriginalType(OriginalType), TransformedType(TransformedType),
362 ArgumentIndexMapping(ArgumentIndexMapping) {}
363
364 // Disallow copies.
365 TransformedFunction(const TransformedFunction &) = delete;
366 TransformedFunction &operator=(const TransformedFunction &) = delete;
367
368 // Allow moves.
369 TransformedFunction(TransformedFunction &&) = default;
370 TransformedFunction &operator=(TransformedFunction &&) = default;
371
372 /// Type of the function before the transformation.
373 FunctionType *OriginalType;
374
375 /// Type of the function after the transformation.
377
378 /// Transforming a function may change the position of arguments. This
379 /// member records the mapping from each argument's old position to its new
380 /// position. Argument positions are zero-indexed. If the transformation
381 /// from F to F' made the first argument of F into the third argument of F',
382 /// then ArgumentIndexMapping[0] will equal 2.
383 std::vector<unsigned> ArgumentIndexMapping;
384};
385
386/// Given function attributes from a call site for the original function,
387/// return function attributes appropriate for a call to the transformed
388/// function.
390transformFunctionAttributes(const TransformedFunction &TransformedFunction,
391 LLVMContext &Ctx, AttributeList CallSiteAttrs) {
392
393 // Construct a vector of AttributeSet for each function argument.
394 std::vector<llvm::AttributeSet> ArgumentAttributes(
395 TransformedFunction.TransformedType->getNumParams());
396
397 // Copy attributes from the parameter of the original function to the
398 // transformed version. 'ArgumentIndexMapping' holds the mapping from
399 // old argument position to new.
400 for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
401 I < IE; ++I) {
402 unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
403 ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
404 }
405
406 // Copy annotations on varargs arguments.
407 for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
408 IE = CallSiteAttrs.getNumAttrSets();
409 I < IE; ++I) {
410 ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
411 }
412
413 return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
414 CallSiteAttrs.getRetAttrs(),
415 llvm::ArrayRef(ArgumentAttributes));
416}
417
418class DataFlowSanitizer {
419 friend struct DFSanFunction;
420 friend class DFSanVisitor;
421
422 enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
423
424 enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
425
426 /// How should calls to uninstrumented functions be handled?
427 enum WrapperKind {
428 /// This function is present in an uninstrumented form but we don't know
429 /// how it should be handled. Print a warning and call the function anyway.
430 /// Don't label the return value.
431 WK_Warning,
432
433 /// This function does not write to (user-accessible) memory, and its return
434 /// value is unlabelled.
435 WK_Discard,
436
437 /// This function does not write to (user-accessible) memory, and the label
438 /// of its return value is the union of the label of its arguments.
439 WK_Functional,
440
441 /// Instead of calling the function, a custom wrapper __dfsw_F is called,
442 /// where F is the name of the function. This function may wrap the
443 /// original function or provide its own implementation. WK_Custom uses an
444 /// extra pointer argument to return the shadow. This allows the wrapped
445 /// form of the function type to be expressed in C.
446 WK_Custom
447 };
448
449 Module *Mod;
450 LLVMContext *Ctx;
451 Type *Int8Ptr;
452 IntegerType *OriginTy;
453 PointerType *OriginPtrTy;
454 ConstantInt *ZeroOrigin;
455 /// The shadow type for all primitive types and vector types.
456 IntegerType *PrimitiveShadowTy;
457 PointerType *PrimitiveShadowPtrTy;
458 IntegerType *IntptrTy;
459 ConstantInt *ZeroPrimitiveShadow;
460 Constant *ArgTLS;
461 ArrayType *ArgOriginTLSTy;
462 Constant *ArgOriginTLS;
463 Constant *RetvalTLS;
464 Constant *RetvalOriginTLS;
465 FunctionType *DFSanUnionLoadFnTy;
466 FunctionType *DFSanLoadLabelAndOriginFnTy;
467 FunctionType *DFSanUnimplementedFnTy;
468 FunctionType *DFSanWrapperExternWeakNullFnTy;
469 FunctionType *DFSanSetLabelFnTy;
470 FunctionType *DFSanNonzeroLabelFnTy;
471 FunctionType *DFSanVarargWrapperFnTy;
472 FunctionType *DFSanConditionalCallbackFnTy;
473 FunctionType *DFSanConditionalCallbackOriginFnTy;
474 FunctionType *DFSanReachesFunctionCallbackFnTy;
475 FunctionType *DFSanReachesFunctionCallbackOriginFnTy;
476 FunctionType *DFSanCmpCallbackFnTy;
477 FunctionType *DFSanLoadStoreCallbackFnTy;
478 FunctionType *DFSanMemTransferCallbackFnTy;
479 FunctionType *DFSanChainOriginFnTy;
480 FunctionType *DFSanChainOriginIfTaintedFnTy;
481 FunctionType *DFSanMemOriginTransferFnTy;
482 FunctionType *DFSanMemShadowOriginTransferFnTy;
483 FunctionType *DFSanMemShadowOriginConditionalExchangeFnTy;
484 FunctionType *DFSanMaybeStoreOriginFnTy;
485 FunctionCallee DFSanUnionLoadFn;
486 FunctionCallee DFSanLoadLabelAndOriginFn;
487 FunctionCallee DFSanUnimplementedFn;
488 FunctionCallee DFSanWrapperExternWeakNullFn;
489 FunctionCallee DFSanSetLabelFn;
490 FunctionCallee DFSanNonzeroLabelFn;
491 FunctionCallee DFSanVarargWrapperFn;
492 FunctionCallee DFSanLoadCallbackFn;
493 FunctionCallee DFSanStoreCallbackFn;
494 FunctionCallee DFSanMemTransferCallbackFn;
495 FunctionCallee DFSanConditionalCallbackFn;
496 FunctionCallee DFSanConditionalCallbackOriginFn;
497 FunctionCallee DFSanReachesFunctionCallbackFn;
498 FunctionCallee DFSanReachesFunctionCallbackOriginFn;
499 FunctionCallee DFSanCmpCallbackFn;
500 FunctionCallee DFSanChainOriginFn;
501 FunctionCallee DFSanChainOriginIfTaintedFn;
502 FunctionCallee DFSanMemOriginTransferFn;
503 FunctionCallee DFSanMemShadowOriginTransferFn;
504 FunctionCallee DFSanMemShadowOriginConditionalExchangeFn;
505 FunctionCallee DFSanMaybeStoreOriginFn;
506 SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
507 MDNode *ColdCallWeights;
508 MDNode *OriginStoreWeights;
509 DFSanABIList ABIList;
510 DenseMap<Value *, Function *> UnwrappedFnMap;
511 AttributeMask ReadOnlyNoneAttrs;
512 StringSet<> CombineTaintLookupTableNames;
513
514 /// Memory map parameters used in calculation mapping application addresses
515 /// to shadow addresses and origin addresses.
516 const MemoryMapParams *MapParams;
517
518 Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
519 Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos);
520 Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos,
521 Value *ShadowOffset);
522 std::pair<Value *, Value *> getShadowOriginAddress(Value *Addr,
523 Align InstAlignment,
525 bool isInstrumented(const Function *F);
526 bool isInstrumented(const GlobalAlias *GA);
527 bool isForceZeroLabels(const Function *F);
528 TransformedFunction getCustomFunctionType(FunctionType *T);
529 WrapperKind getWrapperKind(Function *F);
530 void addGlobalNameSuffix(GlobalValue *GV);
531 void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F);
532 Function *buildWrapperFunction(Function *F, StringRef NewFName,
534 FunctionType *NewFT);
535 void initializeCallbackFunctions(Module &M);
536 void initializeRuntimeFunctions(Module &M);
537 bool initializeModule(Module &M);
538
539 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
540 /// from it. Returns the origin's loaded value.
541 Value *loadNextOrigin(BasicBlock::iterator Pos, Align OriginAlign,
542 Value **OriginAddr);
543
544 /// Returns whether the given load byte size is amenable to inlined
545 /// optimization patterns.
546 bool hasLoadSizeForFastPath(uint64_t Size);
547
548 /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
549 bool shouldTrackOrigins();
550
551 /// Returns a zero constant with the shadow type of OrigTy.
552 ///
553 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
554 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
555 /// getZeroShadow(other type) = i16(0)
556 Constant *getZeroShadow(Type *OrigTy);
557 /// Returns a zero constant with the shadow type of V's type.
558 Constant *getZeroShadow(Value *V);
559
560 /// Checks if V is a zero shadow.
561 bool isZeroShadow(Value *V);
562
563 /// Returns the shadow type of OrigTy.
564 ///
565 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
566 /// getShadowTy([n x T]) = [n x getShadowTy(T)]
567 /// getShadowTy(other type) = i16
568 Type *getShadowTy(Type *OrigTy);
569 /// Returns the shadow type of V's type.
570 Type *getShadowTy(Value *V);
571
572 const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
573
574public:
575 DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
576
577 bool runImpl(Module &M,
579};
580
581struct DFSanFunction {
582 DataFlowSanitizer &DFS;
583 Function *F;
584 DominatorTree DT;
585 bool IsNativeABI;
586 bool IsForceZeroLabels;
588 AllocaInst *LabelReturnAlloca = nullptr;
589 AllocaInst *OriginReturnAlloca = nullptr;
590 DenseMap<Value *, Value *> ValShadowMap;
591 DenseMap<Value *, Value *> ValOriginMap;
594
595 struct PHIFixupElement {
596 PHINode *Phi;
597 PHINode *ShadowPhi;
598 PHINode *OriginPhi;
599 };
600 std::vector<PHIFixupElement> PHIFixups;
601
602 DenseSet<Instruction *> SkipInsts;
603 std::vector<Value *> NonZeroChecks;
604
605 struct CachedShadow {
606 BasicBlock *Block; // The block where Shadow is defined.
607 Value *Shadow;
608 };
609 /// Maps a value to its latest shadow value in terms of domination tree.
610 DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
611 /// Maps a value to its latest collapsed shadow value it was converted to in
612 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
613 /// used at a post process where CFG blocks are split. So it does not cache
614 /// BasicBlock like CachedShadows, but uses domination between values.
615 DenseMap<Value *, Value *> CachedCollapsedShadows;
617
618 DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
619 bool IsForceZeroLabels, TargetLibraryInfo &TLI)
620 : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
621 IsForceZeroLabels(IsForceZeroLabels), TLI(TLI) {
622 DT.recalculate(*F);
623 }
624
625 /// Computes the shadow address for a given function argument.
626 ///
627 /// Shadow = ArgTLS+ArgOffset.
628 Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
629
630 /// Computes the shadow address for a return value.
631 Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
632
633 /// Computes the origin address for a given function argument.
634 ///
635 /// Origin = ArgOriginTLS[ArgNo].
636 Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
637
638 /// Computes the origin address for a return value.
639 Value *getRetvalOriginTLS();
640
641 Value *getOrigin(Value *V);
642 void setOrigin(Instruction *I, Value *Origin);
643 /// Generates IR to compute the origin of the last operand with a taint label.
644 Value *combineOperandOrigins(Instruction *Inst);
645 /// Before the instruction Pos, generates IR to compute the last origin with a
646 /// taint label. Labels and origins are from vectors Shadows and Origins
647 /// correspondingly. The generated IR is like
648 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
649 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
650 /// zeros with other bitwidths.
651 Value *combineOrigins(const std::vector<Value *> &Shadows,
652 const std::vector<Value *> &Origins,
653 BasicBlock::iterator Pos, ConstantInt *Zero = nullptr);
654
655 Value *getShadow(Value *V);
656 void setShadow(Instruction *I, Value *Shadow);
657 /// Generates IR to compute the union of the two given shadows, inserting it
658 /// before Pos. The combined value is with primitive type.
659 Value *combineShadows(Value *V1, Value *V2, BasicBlock::iterator Pos);
660 /// Combines the shadow values of V1 and V2, then converts the combined value
661 /// with primitive type into a shadow value with the original type T.
662 Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
664 Value *combineOperandShadows(Instruction *Inst);
665
666 /// Generates IR to load shadow and origin corresponding to bytes [\p
667 /// Addr, \p Addr + \p Size), where addr has alignment \p
668 /// InstAlignment, and take the union of each of those shadows. The returned
669 /// shadow always has primitive type.
670 ///
671 /// When tracking loads is enabled, the returned origin is a chain at the
672 /// current stack if the returned shadow is tainted.
673 std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
674 Align InstAlignment,
676
677 void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
678 Align InstAlignment, Value *PrimitiveShadow,
679 Value *Origin, BasicBlock::iterator Pos);
680 /// Applies PrimitiveShadow to all primitive subtypes of T, returning
681 /// the expanded shadow value.
682 ///
683 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
684 /// EFP([n x T], PS) = [n x EFP(T,PS)]
685 /// EFP(other types, PS) = PS
686 Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
688 /// Collapses Shadow into a single primitive shadow value, unioning all
689 /// primitive shadow values in the process. Returns the final primitive
690 /// shadow value.
691 ///
692 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
693 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
694 /// CTP(other types, PS) = PS
695 Value *collapseToPrimitiveShadow(Value *Shadow, BasicBlock::iterator Pos);
696
697 void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
699
700 Align getShadowAlign(Align InstAlignment);
701
702 // If ClConditionalCallbacks is enabled, insert a callback after a given
703 // branch instruction using the given conditional expression.
704 void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
705
706 // If ClReachesFunctionCallbacks is enabled, insert a callback for each
707 // argument and load instruction.
708 void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I,
709 Value *Data);
710
711 bool isLookupTableConstant(Value *P);
712
713private:
714 /// Collapses the shadow with aggregate type into a single primitive shadow
715 /// value.
716 template <class AggregateType>
717 Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
718 IRBuilder<> &IRB);
719
720 Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
721
722 /// Returns the shadow value of an argument A.
723 Value *getShadowForTLSArgument(Argument *A);
724
725 /// The fast path of loading shadows.
726 std::pair<Value *, Value *>
727 loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
728 Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
730
731 Align getOriginAlign(Align InstAlignment);
732
733 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
734 /// is __dfsan_load_label_and_origin. This function returns the union of all
735 /// labels and the origin of the first taint label. However this is an
736 /// additional call with many instructions. To ensure common cases are fast,
737 /// checks if it is possible to load labels and origins without using the
738 /// callback function.
739 ///
740 /// When enabling tracking load instructions, we always use
741 /// __dfsan_load_label_and_origin to reduce code size.
742 bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
743
744 /// Returns a chain at the current stack with previous origin V.
745 Value *updateOrigin(Value *V, IRBuilder<> &IRB);
746
747 /// Returns a chain at the current stack with previous origin V if Shadow is
748 /// tainted.
749 Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
750
751 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
752 /// Origin otherwise.
753 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
754
755 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
756 /// Size).
757 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
758 uint64_t StoreOriginSize, Align Alignment);
759
760 /// Stores Origin in terms of its Shadow value.
761 /// * Do not write origins for zero shadows because we do not trace origins
762 /// for untainted sinks.
763 /// * Use __dfsan_maybe_store_origin if there are too many origin store
764 /// instrumentations.
765 void storeOrigin(BasicBlock::iterator Pos, Value *Addr, uint64_t Size,
766 Value *Shadow, Value *Origin, Value *StoreOriginAddr,
767 Align InstAlignment);
768
769 /// Convert a scalar value to an i1 by comparing with 0.
770 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
771
772 bool shouldInstrumentWithCall();
773
774 /// Generates IR to load shadow and origin corresponding to bytes [\p
775 /// Addr, \p Addr + \p Size), where addr has alignment \p
776 /// InstAlignment, and take the union of each of those shadows. The returned
777 /// shadow always has primitive type.
778 std::pair<Value *, Value *>
779 loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
780 Align InstAlignment,
782 int NumOriginStores = 0;
783};
784
785class DFSanVisitor : public InstVisitor<DFSanVisitor> {
786public:
787 DFSanFunction &DFSF;
788
789 DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
790
791 const DataLayout &getDataLayout() const {
792 return DFSF.F->getDataLayout();
793 }
794
795 // Combines shadow values and origins for all of I's operands.
796 void visitInstOperands(Instruction &I);
797
800 void visitBitCastInst(BitCastInst &BCI);
801 void visitCastInst(CastInst &CI);
802 void visitCmpInst(CmpInst &CI);
805 void visitLoadInst(LoadInst &LI);
806 void visitStoreInst(StoreInst &SI);
809 void visitReturnInst(ReturnInst &RI);
810 void visitLibAtomicLoad(CallBase &CB);
811 void visitLibAtomicStore(CallBase &CB);
812 void visitLibAtomicExchange(CallBase &CB);
813 void visitLibAtomicCompareExchange(CallBase &CB);
814 void visitCallBase(CallBase &CB);
815 void visitPHINode(PHINode &PN);
825 void visitBranchInst(BranchInst &BR);
826 void visitSwitchInst(SwitchInst &SW);
827
828private:
829 void visitCASOrRMW(Align InstAlignment, Instruction &I);
830
831 // Returns false when this is an invoke of a custom function.
832 bool visitWrappedCallBase(Function &F, CallBase &CB);
833
834 // Combines origins for all of I's operands.
835 void visitInstOperandOrigins(Instruction &I);
836
837 void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
838 IRBuilder<> &IRB);
839
840 void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
841 IRBuilder<> &IRB);
842
843 Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB);
844 Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB);
845};
846
847bool LibAtomicFunction(const Function &F) {
848 // This is a bit of a hack because TargetLibraryInfo is a function pass.
849 // The DFSan pass would need to be refactored to be function pass oriented
850 // (like MSan is) in order to fit together nicely with TargetLibraryInfo.
851 // We need this check to prevent them from being instrumented, or wrapped.
852 // Match on name and number of arguments.
853 if (!F.hasName() || F.isVarArg())
854 return false;
855 switch (F.arg_size()) {
856 case 4:
857 return F.getName() == "__atomic_load" || F.getName() == "__atomic_store";
858 case 5:
859 return F.getName() == "__atomic_exchange";
860 case 6:
861 return F.getName() == "__atomic_compare_exchange";
862 default:
863 return false;
864 }
865}
866
867} // end anonymous namespace
868
869DataFlowSanitizer::DataFlowSanitizer(
870 const std::vector<std::string> &ABIListFiles) {
871 std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
872 llvm::append_range(AllABIListFiles, ClABIListFiles);
873 // FIXME: should we propagate vfs::FileSystem to this constructor?
874 ABIList.set(
876
878 CombineTaintLookupTableNames.insert(v);
879}
880
881TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
882 SmallVector<Type *, 4> ArgTypes;
883
884 // Some parameters of the custom function being constructed are
885 // parameters of T. Record the mapping from parameters of T to
886 // parameters of the custom function, so that parameter attributes
887 // at call sites can be updated.
888 std::vector<unsigned> ArgumentIndexMapping;
889 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
890 Type *ParamType = T->getParamType(I);
891 ArgumentIndexMapping.push_back(ArgTypes.size());
892 ArgTypes.push_back(ParamType);
893 }
894 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
895 ArgTypes.push_back(PrimitiveShadowTy);
896 if (T->isVarArg())
897 ArgTypes.push_back(PrimitiveShadowPtrTy);
898 Type *RetType = T->getReturnType();
899 if (!RetType->isVoidTy())
900 ArgTypes.push_back(PrimitiveShadowPtrTy);
901
902 if (shouldTrackOrigins()) {
903 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
904 ArgTypes.push_back(OriginTy);
905 if (T->isVarArg())
906 ArgTypes.push_back(OriginPtrTy);
907 if (!RetType->isVoidTy())
908 ArgTypes.push_back(OriginPtrTy);
909 }
910
911 return TransformedFunction(
912 T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
913 ArgumentIndexMapping);
914}
915
916bool DataFlowSanitizer::isZeroShadow(Value *V) {
917 Type *T = V->getType();
918 if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
919 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
920 return CI->isZero();
921 return false;
922 }
923
924 return isa<ConstantAggregateZero>(V);
925}
926
927bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
928 uint64_t ShadowSize = Size * ShadowWidthBytes;
929 return ShadowSize % 8 == 0 || ShadowSize == 4;
930}
931
932bool DataFlowSanitizer::shouldTrackOrigins() {
933 static const bool ShouldTrackOrigins = ClTrackOrigins;
934 return ShouldTrackOrigins;
935}
936
937Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
938 if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
939 return ZeroPrimitiveShadow;
940 Type *ShadowTy = getShadowTy(OrigTy);
941 return ConstantAggregateZero::get(ShadowTy);
942}
943
944Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
945 return getZeroShadow(V->getType());
946}
947
949 Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
950 Value *PrimitiveShadow, IRBuilder<> &IRB) {
951 if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
952 return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
953
954 if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
955 for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
956 Indices.push_back(Idx);
958 Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
959 Indices.pop_back();
960 }
961 return Shadow;
962 }
963
964 if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
965 for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
966 Indices.push_back(Idx);
968 Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
969 Indices.pop_back();
970 }
971 return Shadow;
972 }
973 llvm_unreachable("Unexpected shadow type");
974}
975
976bool DFSanFunction::shouldInstrumentWithCall() {
977 return ClInstrumentWithCallThreshold >= 0 &&
978 NumOriginStores >= ClInstrumentWithCallThreshold;
979}
980
981Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
983 Type *ShadowTy = DFS.getShadowTy(T);
984
985 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
986 return PrimitiveShadow;
987
988 if (DFS.isZeroShadow(PrimitiveShadow))
989 return DFS.getZeroShadow(ShadowTy);
990
991 IRBuilder<> IRB(Pos->getParent(), Pos);
993 Value *Shadow = UndefValue::get(ShadowTy);
994 Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
995 PrimitiveShadow, IRB);
996
997 // Caches the primitive shadow value that built the shadow value.
998 CachedCollapsedShadows[Shadow] = PrimitiveShadow;
999 return Shadow;
1000}
1001
1002template <class AggregateType>
1003Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
1004 IRBuilder<> &IRB) {
1005 if (!AT->getNumElements())
1006 return DFS.ZeroPrimitiveShadow;
1007
1008 Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1009 Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
1010
1011 for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
1012 Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1013 Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
1014 Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1015 }
1016 return Aggregator;
1017}
1018
1019Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1020 IRBuilder<> &IRB) {
1021 Type *ShadowTy = Shadow->getType();
1022 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1023 return Shadow;
1024 if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
1025 return collapseAggregateShadow<>(AT, Shadow, IRB);
1026 if (StructType *ST = dyn_cast<StructType>(ShadowTy))
1027 return collapseAggregateShadow<>(ST, Shadow, IRB);
1028 llvm_unreachable("Unexpected shadow type");
1029}
1030
1031Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1033 Type *ShadowTy = Shadow->getType();
1034 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1035 return Shadow;
1036
1037 // Checks if the cached collapsed shadow value dominates Pos.
1038 Value *&CS = CachedCollapsedShadows[Shadow];
1039 if (CS && DT.dominates(CS, Pos))
1040 return CS;
1041
1042 IRBuilder<> IRB(Pos->getParent(), Pos);
1043 Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
1044 // Caches the converted primitive shadow value.
1045 CS = PrimitiveShadow;
1046 return PrimitiveShadow;
1047}
1048
1049void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
1050 Value *Condition) {
1052 return;
1053 }
1054 IRBuilder<> IRB(&I);
1055 Value *CondShadow = getShadow(Condition);
1056 CallInst *CI;
1057 if (DFS.shouldTrackOrigins()) {
1058 Value *CondOrigin = getOrigin(Condition);
1059 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
1060 {CondShadow, CondOrigin});
1061 } else {
1062 CI = IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
1063 }
1064 CI->addParamAttr(0, Attribute::ZExt);
1065}
1066
1067void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,
1068 Instruction &I,
1069 Value *Data) {
1071 return;
1072 }
1073 const DebugLoc &dbgloc = I.getDebugLoc();
1074 Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB);
1075 ConstantInt *CILine;
1076 llvm::Value *FilePathPtr;
1077
1078 if (dbgloc.get() == nullptr) {
1079 CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0));
1080 FilePathPtr = IRB.CreateGlobalString(
1081 I.getFunction()->getParent()->getSourceFileName());
1082 } else {
1083 CILine = llvm::ConstantInt::get(I.getContext(),
1084 llvm::APInt(32, dbgloc.getLine()));
1085 FilePathPtr = IRB.CreateGlobalString(dbgloc->getFilename());
1086 }
1087
1088 llvm::Value *FunctionNamePtr =
1089 IRB.CreateGlobalString(I.getFunction()->getName());
1090
1091 CallInst *CB;
1092 std::vector<Value *> args;
1093
1094 if (DFS.shouldTrackOrigins()) {
1095 Value *DataOrigin = getOrigin(Data);
1096 args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr };
1097 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args);
1098 } else {
1099 args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr };
1100 CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args);
1101 }
1102 CB->addParamAttr(0, Attribute::ZExt);
1103 CB->setDebugLoc(dbgloc);
1104}
1105
1106Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
1107 if (!OrigTy->isSized())
1108 return PrimitiveShadowTy;
1109 if (isa<IntegerType>(OrigTy))
1110 return PrimitiveShadowTy;
1111 if (isa<VectorType>(OrigTy))
1112 return PrimitiveShadowTy;
1113 if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
1114 return ArrayType::get(getShadowTy(AT->getElementType()),
1115 AT->getNumElements());
1116 if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1118 for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
1119 Elements.push_back(getShadowTy(ST->getElementType(I)));
1120 return StructType::get(*Ctx, Elements);
1121 }
1122 return PrimitiveShadowTy;
1123}
1124
1125Type *DataFlowSanitizer::getShadowTy(Value *V) {
1126 return getShadowTy(V->getType());
1127}
1128
1129bool DataFlowSanitizer::initializeModule(Module &M) {
1130 Triple TargetTriple(M.getTargetTriple());
1131 const DataLayout &DL = M.getDataLayout();
1132
1133 if (TargetTriple.getOS() != Triple::Linux)
1134 report_fatal_error("unsupported operating system");
1135 switch (TargetTriple.getArch()) {
1136 case Triple::aarch64:
1137 MapParams = &Linux_AArch64_MemoryMapParams;
1138 break;
1139 case Triple::x86_64:
1140 MapParams = &Linux_X86_64_MemoryMapParams;
1141 break;
1144 break;
1145 default:
1146 report_fatal_error("unsupported architecture");
1147 }
1148
1149 Mod = &M;
1150 Ctx = &M.getContext();
1151 Int8Ptr = PointerType::getUnqual(*Ctx);
1152 OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1153 OriginPtrTy = PointerType::getUnqual(OriginTy);
1154 PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1155 PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
1156 IntptrTy = DL.getIntPtrType(*Ctx);
1157 ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1158 ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1159
1160 Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1161 DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1162 /*isVarArg=*/false);
1163 Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1164 DFSanLoadLabelAndOriginFnTy =
1165 FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1166 /*isVarArg=*/false);
1167 DFSanUnimplementedFnTy = FunctionType::get(
1168 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1169 Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};
1170 DFSanWrapperExternWeakNullFnTy =
1171 FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,
1172 /*isVarArg=*/false);
1173 Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1174 PointerType::getUnqual(*Ctx), IntptrTy};
1175 DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1176 DFSanSetLabelArgs, /*isVarArg=*/false);
1177 DFSanNonzeroLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), {},
1178 /*isVarArg=*/false);
1179 DFSanVarargWrapperFnTy = FunctionType::get(
1180 Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
1181 DFSanConditionalCallbackFnTy =
1182 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1183 /*isVarArg=*/false);
1184 Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
1185 DFSanConditionalCallbackOriginFnTy = FunctionType::get(
1186 Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
1187 /*isVarArg=*/false);
1188 Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr,
1189 OriginTy, Int8Ptr};
1190 DFSanReachesFunctionCallbackFnTy =
1191 FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs,
1192 /*isVarArg=*/false);
1193 Type *DFSanReachesFunctionCallbackOriginArgs[5] = {
1194 PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr};
1195 DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get(
1196 Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs,
1197 /*isVarArg=*/false);
1198 DFSanCmpCallbackFnTy =
1199 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1200 /*isVarArg=*/false);
1201 DFSanChainOriginFnTy =
1202 FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1203 Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1204 DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1205 OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1206 Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1207 Int8Ptr, IntptrTy, OriginTy};
1208 DFSanMaybeStoreOriginFnTy = FunctionType::get(
1209 Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1210 Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1211 DFSanMemOriginTransferFnTy = FunctionType::get(
1212 Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1213 Type *DFSanMemShadowOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1214 DFSanMemShadowOriginTransferFnTy =
1215 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemShadowOriginTransferArgs,
1216 /*isVarArg=*/false);
1217 Type *DFSanMemShadowOriginConditionalExchangeArgs[5] = {
1218 IntegerType::get(*Ctx, 8), Int8Ptr, Int8Ptr, Int8Ptr, IntptrTy};
1219 DFSanMemShadowOriginConditionalExchangeFnTy = FunctionType::get(
1220 Type::getVoidTy(*Ctx), DFSanMemShadowOriginConditionalExchangeArgs,
1221 /*isVarArg=*/false);
1222 Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1223 DFSanLoadStoreCallbackFnTy =
1224 FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1225 /*isVarArg=*/false);
1226 Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1227 DFSanMemTransferCallbackFnTy =
1228 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1229 /*isVarArg=*/false);
1230
1231 ColdCallWeights = MDBuilder(*Ctx).createUnlikelyBranchWeights();
1232 OriginStoreWeights = MDBuilder(*Ctx).createUnlikelyBranchWeights();
1233 return true;
1234}
1235
1236bool DataFlowSanitizer::isInstrumented(const Function *F) {
1237 return !ABIList.isIn(*F, "uninstrumented");
1238}
1239
1240bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1241 return !ABIList.isIn(*GA, "uninstrumented");
1242}
1243
1244bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
1245 return ABIList.isIn(*F, "force_zero_labels");
1246}
1247
1248DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1249 if (ABIList.isIn(*F, "functional"))
1250 return WK_Functional;
1251 if (ABIList.isIn(*F, "discard"))
1252 return WK_Discard;
1253 if (ABIList.isIn(*F, "custom"))
1254 return WK_Custom;
1255
1256 return WK_Warning;
1257}
1258
1259void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
1260 std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
1261 GV->setName(GVName + Suffix);
1262
1263 // Try to change the name of the function in module inline asm. We only do
1264 // this for specific asm directives, currently only ".symver", to try to avoid
1265 // corrupting asm which happens to contain the symbol name as a substring.
1266 // Note that the substitution for .symver assumes that the versioned symbol
1267 // also has an instrumented name.
1268 std::string Asm = GV->getParent()->getModuleInlineAsm();
1269 std::string SearchStr = ".symver " + GVName + ",";
1270 size_t Pos = Asm.find(SearchStr);
1271 if (Pos != std::string::npos) {
1272 Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
1273 Pos = Asm.find('@');
1274
1275 if (Pos == std::string::npos)
1276 report_fatal_error(Twine("unsupported .symver: ", Asm));
1277
1278 Asm.replace(Pos, 1, Suffix + "@");
1279 GV->getParent()->setModuleInlineAsm(Asm);
1280 }
1281}
1282
1283void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
1284 Function *F) {
1285 // If the function we are wrapping was ExternWeak, it may be null.
1286 // The original code before calling this wrapper may have checked for null,
1287 // but replacing with a known-to-not-be-null wrapper can break this check.
1288 // When replacing uses of the extern weak function with the wrapper we try
1289 // to avoid replacing uses in conditionals, but this is not perfect.
1290 // In the case where we fail, and accidentally optimize out a null check
1291 // for a extern weak function, add a check here to help identify the issue.
1292 if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {
1293 std::vector<Value *> Args;
1294 Args.push_back(F);
1295 Args.push_back(IRB.CreateGlobalString(F->getName()));
1296 IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);
1297 }
1298}
1299
1300Function *
1301DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1303 FunctionType *NewFT) {
1304 FunctionType *FT = F->getFunctionType();
1305 Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1306 NewFName, F->getParent());
1307 NewF->copyAttributesFrom(F);
1309 NewFT->getReturnType(), NewF->getAttributes().getRetAttrs()));
1310
1311 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1312 if (F->isVarArg()) {
1313 NewF->removeFnAttr("split-stack");
1314 CallInst::Create(DFSanVarargWrapperFn,
1315 IRBuilder<>(BB).CreateGlobalString(F->getName()), "", BB);
1316 new UnreachableInst(*Ctx, BB);
1317 } else {
1318 auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1319 std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1320
1321 CallInst *CI = CallInst::Create(F, Args, "", BB);
1322 if (FT->getReturnType()->isVoidTy())
1323 ReturnInst::Create(*Ctx, BB);
1324 else
1325 ReturnInst::Create(*Ctx, CI, BB);
1326 }
1327
1328 return NewF;
1329}
1330
1331// Initialize DataFlowSanitizer runtime functions and declare them in the module
1332void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1333 LLVMContext &C = M.getContext();
1334 {
1336 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1337 AL = AL.addFnAttribute(
1339 AL = AL.addRetAttribute(C, Attribute::ZExt);
1340 DFSanUnionLoadFn =
1341 Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1342 }
1343 {
1345 AL = AL.addFnAttribute(C, Attribute::NoUnwind);
1346 AL = AL.addFnAttribute(
1348 AL = AL.addRetAttribute(C, Attribute::ZExt);
1349 DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1350 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1351 }
1352 DFSanUnimplementedFn =
1353 Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1354 DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction(
1355 "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy);
1356 {
1358 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1359 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1360 DFSanSetLabelFn =
1361 Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1362 }
1363 DFSanNonzeroLabelFn =
1364 Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1365 DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1366 DFSanVarargWrapperFnTy);
1367 {
1369 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1370 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1371 DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1372 DFSanChainOriginFnTy, AL);
1373 }
1374 {
1376 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1377 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1378 AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1379 DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1380 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1381 }
1382 DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1383 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1384
1385 DFSanMemShadowOriginTransferFn = Mod->getOrInsertFunction(
1386 "__dfsan_mem_shadow_origin_transfer", DFSanMemShadowOriginTransferFnTy);
1387
1388 DFSanMemShadowOriginConditionalExchangeFn =
1389 Mod->getOrInsertFunction("__dfsan_mem_shadow_origin_conditional_exchange",
1390 DFSanMemShadowOriginConditionalExchangeFnTy);
1391
1392 {
1394 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1395 AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1396 DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1397 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1398 }
1399
1400 DFSanRuntimeFunctions.insert(
1401 DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1402 DFSanRuntimeFunctions.insert(
1403 DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1404 DFSanRuntimeFunctions.insert(
1405 DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1406 DFSanRuntimeFunctions.insert(
1407 DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts());
1408 DFSanRuntimeFunctions.insert(
1409 DFSanSetLabelFn.getCallee()->stripPointerCasts());
1410 DFSanRuntimeFunctions.insert(
1411 DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1412 DFSanRuntimeFunctions.insert(
1413 DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1414 DFSanRuntimeFunctions.insert(
1415 DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1416 DFSanRuntimeFunctions.insert(
1417 DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1418 DFSanRuntimeFunctions.insert(
1419 DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1420 DFSanRuntimeFunctions.insert(
1421 DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
1422 DFSanRuntimeFunctions.insert(
1423 DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
1424 DFSanRuntimeFunctions.insert(
1425 DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts());
1426 DFSanRuntimeFunctions.insert(
1427 DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts());
1428 DFSanRuntimeFunctions.insert(
1429 DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1430 DFSanRuntimeFunctions.insert(
1431 DFSanChainOriginFn.getCallee()->stripPointerCasts());
1432 DFSanRuntimeFunctions.insert(
1433 DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1434 DFSanRuntimeFunctions.insert(
1435 DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1436 DFSanRuntimeFunctions.insert(
1437 DFSanMemShadowOriginTransferFn.getCallee()->stripPointerCasts());
1438 DFSanRuntimeFunctions.insert(
1439 DFSanMemShadowOriginConditionalExchangeFn.getCallee()
1440 ->stripPointerCasts());
1441 DFSanRuntimeFunctions.insert(
1442 DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1443}
1444
1445// Initializes event callback functions and declare them in the module
1446void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1447 {
1449 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1450 DFSanLoadCallbackFn = Mod->getOrInsertFunction(
1451 "__dfsan_load_callback", DFSanLoadStoreCallbackFnTy, AL);
1452 }
1453 {
1455 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1456 DFSanStoreCallbackFn = Mod->getOrInsertFunction(
1457 "__dfsan_store_callback", DFSanLoadStoreCallbackFnTy, AL);
1458 }
1459 DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1460 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1461 {
1463 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1464 DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback",
1465 DFSanCmpCallbackFnTy, AL);
1466 }
1467 {
1469 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1470 DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
1471 "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy, AL);
1472 }
1473 {
1475 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1476 DFSanConditionalCallbackOriginFn =
1477 Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
1478 DFSanConditionalCallbackOriginFnTy, AL);
1479 }
1480 {
1482 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1483 DFSanReachesFunctionCallbackFn =
1484 Mod->getOrInsertFunction("__dfsan_reaches_function_callback",
1485 DFSanReachesFunctionCallbackFnTy, AL);
1486 }
1487 {
1489 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1490 DFSanReachesFunctionCallbackOriginFn =
1491 Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin",
1492 DFSanReachesFunctionCallbackOriginFnTy, AL);
1493 }
1494}
1495
1496bool DataFlowSanitizer::runImpl(
1498 initializeModule(M);
1499
1500 if (ABIList.isIn(M, "skip"))
1501 return false;
1502
1503 const unsigned InitialGlobalSize = M.global_size();
1504 const unsigned InitialModuleSize = M.size();
1505
1506 bool Changed = false;
1507
1508 auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1509 Type *Ty) -> Constant * {
1510 Constant *C = Mod->getOrInsertGlobal(Name, Ty);
1511 if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
1512 Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1513 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1514 }
1515 return C;
1516 };
1517
1518 // These globals must be kept in sync with the ones in dfsan.cpp.
1519 ArgTLS =
1520 GetOrInsertGlobal("__dfsan_arg_tls",
1521 ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1522 RetvalTLS = GetOrInsertGlobal(
1523 "__dfsan_retval_tls",
1524 ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1525 ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1526 ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1527 RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1528
1529 (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1530 Changed = true;
1531 return new GlobalVariable(
1532 M, OriginTy, true, GlobalValue::WeakODRLinkage,
1533 ConstantInt::getSigned(OriginTy,
1534 shouldTrackOrigins() ? ClTrackOrigins : 0),
1535 "__dfsan_track_origins");
1536 });
1537
1538 initializeCallbackFunctions(M);
1539 initializeRuntimeFunctions(M);
1540
1541 std::vector<Function *> FnsToInstrument;
1542 SmallPtrSet<Function *, 2> FnsWithNativeABI;
1543 SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
1544 SmallPtrSet<Constant *, 1> PersonalityFns;
1545 for (Function &F : M)
1546 if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F) &&
1547 !LibAtomicFunction(F) &&
1548 !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) {
1549 FnsToInstrument.push_back(&F);
1550 if (F.hasPersonalityFn())
1551 PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
1552 }
1553
1555 for (auto *C : PersonalityFns) {
1556 assert(isa<Function>(C) && "Personality routine is not a function!");
1557 Function *F = cast<Function>(C);
1558 if (!isInstrumented(F))
1559 llvm::erase(FnsToInstrument, F);
1560 }
1561 }
1562
1563 // Give function aliases prefixes when necessary, and build wrappers where the
1564 // instrumentedness is inconsistent.
1565 for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
1566 // Don't stop on weak. We assume people aren't playing games with the
1567 // instrumentedness of overridden weak aliases.
1568 auto *F = dyn_cast<Function>(GA.getAliaseeObject());
1569 if (!F)
1570 continue;
1571
1572 bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
1573 if (GAInst && FInst) {
1574 addGlobalNameSuffix(&GA);
1575 } else if (GAInst != FInst) {
1576 // Non-instrumented alias of an instrumented function, or vice versa.
1577 // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1578 // below will take care of instrumenting it.
1579 Function *NewF =
1580 buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
1581 GA.replaceAllUsesWith(NewF);
1582 NewF->takeName(&GA);
1583 GA.eraseFromParent();
1584 FnsToInstrument.push_back(NewF);
1585 }
1586 }
1587
1588 // TODO: This could be more precise.
1589 ReadOnlyNoneAttrs.addAttribute(Attribute::Memory);
1590
1591 // First, change the ABI of every function in the module. ABI-listed
1592 // functions keep their original ABI and get a wrapper function.
1593 for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1594 FE = FnsToInstrument.end();
1595 FI != FE; ++FI) {
1596 Function &F = **FI;
1597 FunctionType *FT = F.getFunctionType();
1598
1599 bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1600 FT->getReturnType()->isVoidTy());
1601
1602 if (isInstrumented(&F)) {
1603 if (isForceZeroLabels(&F))
1604 FnsWithForceZeroLabel.insert(&F);
1605
1606 // Instrumented functions get a '.dfsan' suffix. This allows us to more
1607 // easily identify cases of mismatching ABIs. This naming scheme is
1608 // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1609 addGlobalNameSuffix(&F);
1610 } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1611 // Build a wrapper function for F. The wrapper simply calls F, and is
1612 // added to FnsToInstrument so that any instrumentation according to its
1613 // WrapperKind is done in the second pass below.
1614
1615 // If the function being wrapped has local linkage, then preserve the
1616 // function's linkage in the wrapper function.
1617 GlobalValue::LinkageTypes WrapperLinkage =
1618 F.hasLocalLinkage() ? F.getLinkage()
1620
1621 Function *NewF = buildWrapperFunction(
1622 &F,
1623 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1624 std::string(F.getName()),
1625 WrapperLinkage, FT);
1626 NewF->removeFnAttrs(ReadOnlyNoneAttrs);
1627
1628 // Extern weak functions can sometimes be null at execution time.
1629 // Code will sometimes check if an extern weak function is null.
1630 // This could look something like:
1631 // declare extern_weak i8 @my_func(i8)
1632 // br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,
1633 // label %avoid_my_func
1634 // The @"dfsw$my_func" wrapper is never null, so if we replace this use
1635 // in the comparison, the icmp will simplify to false and we have
1636 // accidentally optimized away a null check that is necessary.
1637 // This can lead to a crash when the null extern_weak my_func is called.
1638 //
1639 // To prevent (the most common pattern of) this problem,
1640 // do not replace uses in comparisons with the wrapper.
1641 // We definitely want to replace uses in call instructions.
1642 // Other uses (e.g. store the function address somewhere) might be
1643 // called or compared or both - this case may not be handled correctly.
1644 // We will default to replacing with wrapper in cases we are unsure.
1645 auto IsNotCmpUse = [](Use &U) -> bool {
1646 User *Usr = U.getUser();
1647 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
1648 // This is the most common case for icmp ne null
1649 if (CE->getOpcode() == Instruction::ICmp) {
1650 return false;
1651 }
1652 }
1653 if (Instruction *I = dyn_cast<Instruction>(Usr)) {
1654 if (I->getOpcode() == Instruction::ICmp) {
1655 return false;
1656 }
1657 }
1658 return true;
1659 };
1660 F.replaceUsesWithIf(NewF, IsNotCmpUse);
1661
1662 UnwrappedFnMap[NewF] = &F;
1663 *FI = NewF;
1664
1665 if (!F.isDeclaration()) {
1666 // This function is probably defining an interposition of an
1667 // uninstrumented function and hence needs to keep the original ABI.
1668 // But any functions it may call need to use the instrumented ABI, so
1669 // we instrument it in a mode which preserves the original ABI.
1670 FnsWithNativeABI.insert(&F);
1671
1672 // This code needs to rebuild the iterators, as they may be invalidated
1673 // by the push_back, taking care that the new range does not include
1674 // any functions added by this code.
1675 size_t N = FI - FnsToInstrument.begin(),
1676 Count = FE - FnsToInstrument.begin();
1677 FnsToInstrument.push_back(&F);
1678 FI = FnsToInstrument.begin() + N;
1679 FE = FnsToInstrument.begin() + Count;
1680 }
1681 // Hopefully, nobody will try to indirectly call a vararg
1682 // function... yet.
1683 } else if (FT->isVarArg()) {
1684 UnwrappedFnMap[&F] = &F;
1685 *FI = nullptr;
1686 }
1687 }
1688
1689 for (Function *F : FnsToInstrument) {
1690 if (!F || F->isDeclaration())
1691 continue;
1692
1694
1695 DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
1696 FnsWithForceZeroLabel.count(F), GetTLI(*F));
1697
1699 // Add callback for arguments reaching this function.
1700 for (auto &FArg : F->args()) {
1701 Instruction *Next = &F->getEntryBlock().front();
1702 Value *FArgShadow = DFSF.getShadow(&FArg);
1703 if (isZeroShadow(FArgShadow))
1704 continue;
1705 if (Instruction *FArgShadowInst = dyn_cast<Instruction>(FArgShadow)) {
1706 Next = FArgShadowInst->getNextNode();
1707 }
1708 if (shouldTrackOrigins()) {
1709 if (Instruction *Origin =
1710 dyn_cast<Instruction>(DFSF.getOrigin(&FArg))) {
1711 // Ensure IRB insertion point is after loads for shadow and origin.
1712 Instruction *OriginNext = Origin->getNextNode();
1713 if (Next->comesBefore(OriginNext)) {
1714 Next = OriginNext;
1715 }
1716 }
1717 }
1718 IRBuilder<> IRB(Next);
1719 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg);
1720 }
1721 }
1722
1723 // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1724 // Build a copy of the list before iterating over it.
1725 SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1726
1727 for (BasicBlock *BB : BBList) {
1728 Instruction *Inst = &BB->front();
1729 while (true) {
1730 // DFSanVisitor may split the current basic block, changing the current
1731 // instruction's next pointer and moving the next instruction to the
1732 // tail block from which we should continue.
1733 Instruction *Next = Inst->getNextNode();
1734 // DFSanVisitor may delete Inst, so keep track of whether it was a
1735 // terminator.
1736 bool IsTerminator = Inst->isTerminator();
1737 if (!DFSF.SkipInsts.count(Inst))
1738 DFSanVisitor(DFSF).visit(Inst);
1739 if (IsTerminator)
1740 break;
1741 Inst = Next;
1742 }
1743 }
1744
1745 // We will not necessarily be able to compute the shadow for every phi node
1746 // until we have visited every block. Therefore, the code that handles phi
1747 // nodes adds them to the PHIFixups list so that they can be properly
1748 // handled here.
1749 for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1750 for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1751 ++Val) {
1752 P.ShadowPhi->setIncomingValue(
1753 Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1754 if (P.OriginPhi)
1755 P.OriginPhi->setIncomingValue(
1756 Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1757 }
1758 }
1759
1760 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1761 // places (i.e. instructions in basic blocks we haven't even begun visiting
1762 // yet). To make our life easier, do this work in a pass after the main
1763 // instrumentation.
1765 for (Value *V : DFSF.NonZeroChecks) {
1767 if (Instruction *I = dyn_cast<Instruction>(V))
1768 Pos = std::next(I->getIterator());
1769 else
1770 Pos = DFSF.F->getEntryBlock().begin();
1771 while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1772 Pos = std::next(Pos->getIterator());
1773 IRBuilder<> IRB(Pos->getParent(), Pos);
1774 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1775 Value *Ne =
1776 IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1777 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1778 Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1779 IRBuilder<> ThenIRB(BI);
1780 ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1781 }
1782 }
1783 }
1784
1785 return Changed || !FnsToInstrument.empty() ||
1786 M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1787}
1788
1789Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1790 Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
1791 if (ArgOffset)
1792 Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
1793 return IRB.CreateIntToPtr(Base, PointerType::get(*DFS.Ctx, 0), "_dfsarg");
1794}
1795
1796Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1797 return IRB.CreatePointerCast(DFS.RetvalTLS, PointerType::get(*DFS.Ctx, 0),
1798 "_dfsret");
1799}
1800
1801Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1802
1803Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1804 return IRB.CreateConstInBoundsGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0,
1805 ArgNo, "_dfsarg_o");
1806}
1807
1808Value *DFSanFunction::getOrigin(Value *V) {
1809 assert(DFS.shouldTrackOrigins());
1810 if (!isa<Argument>(V) && !isa<Instruction>(V))
1811 return DFS.ZeroOrigin;
1812 Value *&Origin = ValOriginMap[V];
1813 if (!Origin) {
1814 if (Argument *A = dyn_cast<Argument>(V)) {
1815 if (IsNativeABI)
1816 return DFS.ZeroOrigin;
1817 if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1818 Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1819 IRBuilder<> IRB(ArgOriginTLSPos);
1820 Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1821 Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1822 } else {
1823 // Overflow
1824 Origin = DFS.ZeroOrigin;
1825 }
1826 } else {
1827 Origin = DFS.ZeroOrigin;
1828 }
1829 }
1830 return Origin;
1831}
1832
1833void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1834 if (!DFS.shouldTrackOrigins())
1835 return;
1836 assert(!ValOriginMap.count(I));
1837 assert(Origin->getType() == DFS.OriginTy);
1838 ValOriginMap[I] = Origin;
1839}
1840
1841Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1842 unsigned ArgOffset = 0;
1843 const DataLayout &DL = F->getDataLayout();
1844 for (auto &FArg : F->args()) {
1845 if (!FArg.getType()->isSized()) {
1846 if (A == &FArg)
1847 break;
1848 continue;
1849 }
1850
1851 unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1852 if (A != &FArg) {
1853 ArgOffset += alignTo(Size, ShadowTLSAlignment);
1854 if (ArgOffset > ArgTLSSize)
1855 break; // ArgTLS overflows, uses a zero shadow.
1856 continue;
1857 }
1858
1859 if (ArgOffset + Size > ArgTLSSize)
1860 break; // ArgTLS overflows, uses a zero shadow.
1861
1862 Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1863 IRBuilder<> IRB(ArgTLSPos);
1864 Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1865 return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1867 }
1868
1869 return DFS.getZeroShadow(A);
1870}
1871
1872Value *DFSanFunction::getShadow(Value *V) {
1873 if (!isa<Argument>(V) && !isa<Instruction>(V))
1874 return DFS.getZeroShadow(V);
1875 if (IsForceZeroLabels)
1876 return DFS.getZeroShadow(V);
1877 Value *&Shadow = ValShadowMap[V];
1878 if (!Shadow) {
1879 if (Argument *A = dyn_cast<Argument>(V)) {
1880 if (IsNativeABI)
1881 return DFS.getZeroShadow(V);
1882 Shadow = getShadowForTLSArgument(A);
1883 NonZeroChecks.push_back(Shadow);
1884 } else {
1885 Shadow = DFS.getZeroShadow(V);
1886 }
1887 }
1888 return Shadow;
1889}
1890
1891void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1892 assert(!ValShadowMap.count(I));
1893 ValShadowMap[I] = Shadow;
1894}
1895
1896/// Compute the integer shadow offset that corresponds to a given
1897/// application address.
1898///
1899/// Offset = (Addr & ~AndMask) ^ XorMask
1900Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1901 assert(Addr != RetvalTLS && "Reinstrumenting?");
1902 Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1903
1904 uint64_t AndMask = MapParams->AndMask;
1905 if (AndMask)
1906 OffsetLong =
1907 IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
1908
1909 uint64_t XorMask = MapParams->XorMask;
1910 if (XorMask)
1911 OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
1912 return OffsetLong;
1913}
1914
1915std::pair<Value *, Value *>
1916DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1918 // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1919 IRBuilder<> IRB(Pos->getParent(), Pos);
1920 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1921 Value *ShadowLong = ShadowOffset;
1922 uint64_t ShadowBase = MapParams->ShadowBase;
1923 if (ShadowBase != 0) {
1924 ShadowLong =
1925 IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
1926 }
1927 Value *ShadowPtr = IRB.CreateIntToPtr(ShadowLong, PointerType::get(*Ctx, 0));
1928 Value *OriginPtr = nullptr;
1929 if (shouldTrackOrigins()) {
1930 Value *OriginLong = ShadowOffset;
1931 uint64_t OriginBase = MapParams->OriginBase;
1932 if (OriginBase != 0)
1933 OriginLong =
1934 IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
1935 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1936 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1937 // So Mask is unnecessary.
1938 if (Alignment < MinOriginAlignment) {
1940 OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1941 }
1942 OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1943 }
1944 return std::make_pair(ShadowPtr, OriginPtr);
1945}
1946
1947Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
1949 Value *ShadowOffset) {
1950 IRBuilder<> IRB(Pos->getParent(), Pos);
1951 return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1952}
1953
1954Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
1956 IRBuilder<> IRB(Pos->getParent(), Pos);
1957 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1958 return getShadowAddress(Addr, Pos, ShadowOffset);
1959}
1960
1961Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1963 Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1964 return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1965}
1966
1967// Generates IR to compute the union of the two given shadows, inserting it
1968// before Pos. The combined value is with primitive type.
1969Value *DFSanFunction::combineShadows(Value *V1, Value *V2,
1971 if (DFS.isZeroShadow(V1))
1972 return collapseToPrimitiveShadow(V2, Pos);
1973 if (DFS.isZeroShadow(V2))
1974 return collapseToPrimitiveShadow(V1, Pos);
1975 if (V1 == V2)
1976 return collapseToPrimitiveShadow(V1, Pos);
1977
1978 auto V1Elems = ShadowElements.find(V1);
1979 auto V2Elems = ShadowElements.find(V2);
1980 if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1981 if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1982 V2Elems->second.begin(), V2Elems->second.end())) {
1983 return collapseToPrimitiveShadow(V1, Pos);
1984 }
1985 if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1986 V1Elems->second.begin(), V1Elems->second.end())) {
1987 return collapseToPrimitiveShadow(V2, Pos);
1988 }
1989 } else if (V1Elems != ShadowElements.end()) {
1990 if (V1Elems->second.count(V2))
1991 return collapseToPrimitiveShadow(V1, Pos);
1992 } else if (V2Elems != ShadowElements.end()) {
1993 if (V2Elems->second.count(V1))
1994 return collapseToPrimitiveShadow(V2, Pos);
1995 }
1996
1997 auto Key = std::make_pair(V1, V2);
1998 if (V1 > V2)
1999 std::swap(Key.first, Key.second);
2000 CachedShadow &CCS = CachedShadows[Key];
2001 if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
2002 return CCS.Shadow;
2003
2004 // Converts inputs shadows to shadows with primitive types.
2005 Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
2006 Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
2007
2008 IRBuilder<> IRB(Pos->getParent(), Pos);
2009 CCS.Block = Pos->getParent();
2010 CCS.Shadow = IRB.CreateOr(PV1, PV2);
2011
2012 std::set<Value *> UnionElems;
2013 if (V1Elems != ShadowElements.end()) {
2014 UnionElems = V1Elems->second;
2015 } else {
2016 UnionElems.insert(V1);
2017 }
2018 if (V2Elems != ShadowElements.end()) {
2019 UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
2020 } else {
2021 UnionElems.insert(V2);
2022 }
2023 ShadowElements[CCS.Shadow] = std::move(UnionElems);
2024
2025 return CCS.Shadow;
2026}
2027
2028// A convenience function which folds the shadows of each of the operands
2029// of the provided instruction Inst, inserting the IR before Inst. Returns
2030// the computed union Value.
2031Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
2032 if (Inst->getNumOperands() == 0)
2033 return DFS.getZeroShadow(Inst);
2034
2035 Value *Shadow = getShadow(Inst->getOperand(0));
2036 for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
2037 Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)),
2038 Inst->getIterator());
2039
2040 return expandFromPrimitiveShadow(Inst->getType(), Shadow,
2041 Inst->getIterator());
2042}
2043
2044void DFSanVisitor::visitInstOperands(Instruction &I) {
2045 Value *CombinedShadow = DFSF.combineOperandShadows(&I);
2046 DFSF.setShadow(&I, CombinedShadow);
2047 visitInstOperandOrigins(I);
2048}
2049
2050Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
2051 const std::vector<Value *> &Origins,
2053 ConstantInt *Zero) {
2054 assert(Shadows.size() == Origins.size());
2055 size_t Size = Origins.size();
2056 if (Size == 0)
2057 return DFS.ZeroOrigin;
2058 Value *Origin = nullptr;
2059 if (!Zero)
2060 Zero = DFS.ZeroPrimitiveShadow;
2061 for (size_t I = 0; I != Size; ++I) {
2062 Value *OpOrigin = Origins[I];
2063 Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
2064 if (ConstOpOrigin && ConstOpOrigin->isNullValue())
2065 continue;
2066 if (!Origin) {
2067 Origin = OpOrigin;
2068 continue;
2069 }
2070 Value *OpShadow = Shadows[I];
2071 Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
2072 IRBuilder<> IRB(Pos->getParent(), Pos);
2073 Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
2074 Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2075 }
2076 return Origin ? Origin : DFS.ZeroOrigin;
2077}
2078
2079Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
2080 size_t Size = Inst->getNumOperands();
2081 std::vector<Value *> Shadows(Size);
2082 std::vector<Value *> Origins(Size);
2083 for (unsigned I = 0; I != Size; ++I) {
2084 Shadows[I] = getShadow(Inst->getOperand(I));
2085 Origins[I] = getOrigin(Inst->getOperand(I));
2086 }
2087 return combineOrigins(Shadows, Origins, Inst->getIterator());
2088}
2089
2090void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
2091 if (!DFSF.DFS.shouldTrackOrigins())
2092 return;
2093 Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
2094 DFSF.setOrigin(&I, CombinedOrigin);
2095}
2096
2097Align DFSanFunction::getShadowAlign(Align InstAlignment) {
2098 const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
2099 return Align(Alignment.value() * DFS.ShadowWidthBytes);
2100}
2101
2102Align DFSanFunction::getOriginAlign(Align InstAlignment) {
2103 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2104 return Align(std::max(MinOriginAlignment, Alignment));
2105}
2106
2107bool DFSanFunction::isLookupTableConstant(Value *P) {
2108 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))
2109 if (GV->isConstant() && GV->hasName())
2110 return DFS.CombineTaintLookupTableNames.count(GV->getName());
2111
2112 return false;
2113}
2114
2115bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
2116 Align InstAlignment) {
2117 // When enabling tracking load instructions, we always use
2118 // __dfsan_load_label_and_origin to reduce code size.
2119 if (ClTrackOrigins == 2)
2120 return true;
2121
2122 assert(Size != 0);
2123 // * if Size == 1, it is sufficient to load its origin aligned at 4.
2124 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
2125 // load its origin aligned at 4. If not, although origins may be lost, it
2126 // should not happen very often.
2127 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
2128 // Size % 4 == 0, it is more efficient to load origins without callbacks.
2129 // * Otherwise we use __dfsan_load_label_and_origin.
2130 // This should ensure that common cases run efficiently.
2131 if (Size <= 2)
2132 return false;
2133
2134 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2135 return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
2136}
2137
2138Value *DataFlowSanitizer::loadNextOrigin(BasicBlock::iterator Pos,
2139 Align OriginAlign,
2140 Value **OriginAddr) {
2141 IRBuilder<> IRB(Pos->getParent(), Pos);
2142 *OriginAddr =
2143 IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
2144 return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
2145}
2146
2147std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
2148 Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
2149 Align OriginAlign, Value *FirstOrigin, BasicBlock::iterator Pos) {
2150 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2151 const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
2152
2153 assert(Size >= 4 && "Not large enough load size for fast path!");
2154
2155 // Used for origin tracking.
2156 std::vector<Value *> Shadows;
2157 std::vector<Value *> Origins;
2158
2159 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
2160 // but this function is only used in a subset of cases that make it possible
2161 // to optimize the instrumentation.
2162 //
2163 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
2164 // per byte) is either:
2165 // - a multiple of 8 (common)
2166 // - equal to 4 (only for load32)
2167 //
2168 // For the second case, we can fit the wide shadow in a 32-bit integer. In all
2169 // other cases, we use a 64-bit integer to hold the wide shadow.
2170 Type *WideShadowTy =
2171 ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
2172
2173 IRBuilder<> IRB(Pos->getParent(), Pos);
2174 Value *CombinedWideShadow =
2175 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2176
2177 unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
2178 const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
2179
2180 auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
2181 if (BytesPerWideShadow > 4) {
2182 assert(BytesPerWideShadow == 8);
2183 // The wide shadow relates to two origin pointers: one for the first four
2184 // application bytes, and one for the latest four. We use a left shift to
2185 // get just the shadow bytes that correspond to the first origin pointer,
2186 // and then the entire shadow for the second origin pointer (which will be
2187 // chosen by combineOrigins() iff the least-significant half of the wide
2188 // shadow was empty but the other half was not).
2189 Value *WideShadowLo = IRB.CreateShl(
2190 WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
2191 Shadows.push_back(WideShadow);
2192 Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
2193
2194 Shadows.push_back(WideShadowLo);
2195 Origins.push_back(Origin);
2196 } else {
2197 Shadows.push_back(WideShadow);
2198 Origins.push_back(Origin);
2199 }
2200 };
2201
2202 if (ShouldTrackOrigins)
2203 AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
2204
2205 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2206 // then OR individual shadows within the combined WideShadow by binary ORing.
2207 // This is fewer instructions than ORing shadows individually, since it
2208 // needs logN shift/or instructions (N being the bytes of the combined wide
2209 // shadow).
2210 for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
2211 ByteOfs += BytesPerWideShadow) {
2212 ShadowAddr = IRB.CreateGEP(WideShadowTy, ShadowAddr,
2213 ConstantInt::get(DFS.IntptrTy, 1));
2214 Value *NextWideShadow =
2215 IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
2216 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
2217 if (ShouldTrackOrigins) {
2218 Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
2219 AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
2220 }
2221 }
2222 for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
2223 Width >>= 1) {
2224 Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
2225 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
2226 }
2227 return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
2228 ShouldTrackOrigins
2229 ? combineOrigins(Shadows, Origins, Pos,
2231 : DFS.ZeroOrigin};
2232}
2233
2234std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
2235 Value *Addr, uint64_t Size, Align InstAlignment, BasicBlock::iterator Pos) {
2236 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2237
2238 // Non-escaped loads.
2239 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2240 const auto SI = AllocaShadowMap.find(AI);
2241 if (SI != AllocaShadowMap.end()) {
2242 IRBuilder<> IRB(Pos->getParent(), Pos);
2243 Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
2244 const auto OI = AllocaOriginMap.find(AI);
2245 assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
2246 return {ShadowLI, ShouldTrackOrigins
2247 ? IRB.CreateLoad(DFS.OriginTy, OI->second)
2248 : nullptr};
2249 }
2250 }
2251
2252 // Load from constant addresses.
2255 bool AllConstants = true;
2256 for (const Value *Obj : Objs) {
2257 if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
2258 continue;
2259 if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
2260 continue;
2261
2262 AllConstants = false;
2263 break;
2264 }
2265 if (AllConstants)
2266 return {DFS.ZeroPrimitiveShadow,
2267 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2268
2269 if (Size == 0)
2270 return {DFS.ZeroPrimitiveShadow,
2271 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2272
2273 // Use callback to load if this is not an optimizable case for origin
2274 // tracking.
2275 if (ShouldTrackOrigins &&
2276 useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
2277 IRBuilder<> IRB(Pos->getParent(), Pos);
2278 CallInst *Call =
2279 IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2280 {Addr, ConstantInt::get(DFS.IntptrTy, Size)});
2281 Call->addRetAttr(Attribute::ZExt);
2282 return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2283 DFS.PrimitiveShadowTy),
2284 IRB.CreateTrunc(Call, DFS.OriginTy)};
2285 }
2286
2287 // Other cases that support loading shadows or origins in a fast way.
2288 Value *ShadowAddr, *OriginAddr;
2289 std::tie(ShadowAddr, OriginAddr) =
2290 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2291
2292 const Align ShadowAlign = getShadowAlign(InstAlignment);
2293 const Align OriginAlign = getOriginAlign(InstAlignment);
2294 Value *Origin = nullptr;
2295 if (ShouldTrackOrigins) {
2296 IRBuilder<> IRB(Pos->getParent(), Pos);
2297 Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2298 }
2299
2300 // When the byte size is small enough, we can load the shadow directly with
2301 // just a few instructions.
2302 switch (Size) {
2303 case 1: {
2304 LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2305 LI->setAlignment(ShadowAlign);
2306 return {LI, Origin};
2307 }
2308 case 2: {
2309 IRBuilder<> IRB(Pos->getParent(), Pos);
2310 Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2311 ConstantInt::get(DFS.IntptrTy, 1));
2312 Value *Load =
2313 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2314 Value *Load1 =
2315 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2316 return {combineShadows(Load, Load1, Pos), Origin};
2317 }
2318 }
2319 bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2320
2321 if (HasSizeForFastPath)
2322 return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2323 OriginAlign, Origin, Pos);
2324
2325 IRBuilder<> IRB(Pos->getParent(), Pos);
2326 CallInst *FallbackCall = IRB.CreateCall(
2327 DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2328 FallbackCall->addRetAttr(Attribute::ZExt);
2329 return {FallbackCall, Origin};
2330}
2331
2332std::pair<Value *, Value *>
2333DFSanFunction::loadShadowOrigin(Value *Addr, uint64_t Size, Align InstAlignment,
2335 Value *PrimitiveShadow, *Origin;
2336 std::tie(PrimitiveShadow, Origin) =
2337 loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2338 if (DFS.shouldTrackOrigins()) {
2339 if (ClTrackOrigins == 2) {
2340 IRBuilder<> IRB(Pos->getParent(), Pos);
2341 auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2342 if (!ConstantShadow || !ConstantShadow->isZeroValue())
2343 Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2344 }
2345 }
2346 return {PrimitiveShadow, Origin};
2347}
2348
2350 switch (AO) {
2351 case AtomicOrdering::NotAtomic:
2352 return AtomicOrdering::NotAtomic;
2353 case AtomicOrdering::Unordered:
2354 case AtomicOrdering::Monotonic:
2355 case AtomicOrdering::Acquire:
2356 return AtomicOrdering::Acquire;
2357 case AtomicOrdering::Release:
2358 case AtomicOrdering::AcquireRelease:
2359 return AtomicOrdering::AcquireRelease;
2360 case AtomicOrdering::SequentiallyConsistent:
2361 return AtomicOrdering::SequentiallyConsistent;
2362 }
2363 llvm_unreachable("Unknown ordering");
2364}
2365
2367 if (!V->getType()->isPointerTy())
2368 return V;
2369
2370 // DFSan pass should be running on valid IR, but we'll
2371 // keep a seen set to ensure there are no issues.
2373 Visited.insert(V);
2374 do {
2375 if (auto *GEP = dyn_cast<GEPOperator>(V)) {
2376 V = GEP->getPointerOperand();
2377 } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2378 V = cast<Operator>(V)->getOperand(0);
2379 if (!V->getType()->isPointerTy())
2380 return V;
2381 } else if (isa<GlobalAlias>(V)) {
2382 V = cast<GlobalAlias>(V)->getAliasee();
2383 }
2384 } while (Visited.insert(V).second);
2385
2386 return V;
2387}
2388
2389void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2390 auto &DL = LI.getDataLayout();
2391 uint64_t Size = DL.getTypeStoreSize(LI.getType());
2392 if (Size == 0) {
2393 DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2394 DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2395 return;
2396 }
2397
2398 // When an application load is atomic, increase atomic ordering between
2399 // atomic application loads and stores to ensure happen-before order; load
2400 // shadow data after application data; store zero shadow data before
2401 // application data. This ensure shadow loads return either labels of the
2402 // initial application data or zeros.
2403 if (LI.isAtomic())
2405
2406 BasicBlock::iterator AfterLi = std::next(LI.getIterator());
2408 if (LI.isAtomic())
2409 Pos = std::next(Pos);
2410
2411 std::vector<Value *> Shadows;
2412 std::vector<Value *> Origins;
2413 Value *PrimitiveShadow, *Origin;
2414 std::tie(PrimitiveShadow, Origin) =
2415 DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2416 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2417 if (ShouldTrackOrigins) {
2418 Shadows.push_back(PrimitiveShadow);
2419 Origins.push_back(Origin);
2420 }
2422 DFSF.isLookupTableConstant(
2424 Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2425 PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2426 if (ShouldTrackOrigins) {
2427 Shadows.push_back(PtrShadow);
2428 Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2429 }
2430 }
2431 if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2432 DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2433
2434 Value *Shadow =
2435 DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2436 DFSF.setShadow(&LI, Shadow);
2437
2438 if (ShouldTrackOrigins) {
2439 DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2440 }
2441
2442 if (ClEventCallbacks) {
2443 IRBuilder<> IRB(Pos->getParent(), Pos);
2445 CallInst *CI =
2446 IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr});
2447 CI->addParamAttr(0, Attribute::ZExt);
2448 }
2449
2450 IRBuilder<> IRB(AfterLi->getParent(), AfterLi);
2451 DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI);
2452}
2453
2454Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2455 IRBuilder<> &IRB) {
2456 assert(DFS.shouldTrackOrigins());
2457 return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2458}
2459
2460Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2461 if (!DFS.shouldTrackOrigins())
2462 return V;
2463 return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2464}
2465
2466Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2467 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2468 const DataLayout &DL = F->getDataLayout();
2469 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2470 if (IntptrSize == OriginSize)
2471 return Origin;
2472 assert(IntptrSize == OriginSize * 2);
2473 Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2474 return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2475}
2476
2477void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2478 Value *StoreOriginAddr,
2479 uint64_t StoreOriginSize, Align Alignment) {
2480 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2481 const DataLayout &DL = F->getDataLayout();
2482 const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2483 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2484 assert(IntptrAlignment >= MinOriginAlignment);
2485 assert(IntptrSize >= OriginSize);
2486
2487 unsigned Ofs = 0;
2488 Align CurrentAlignment = Alignment;
2489 if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2490 Value *IntptrOrigin = originToIntptr(IRB, Origin);
2491 Value *IntptrStoreOriginPtr =
2492 IRB.CreatePointerCast(StoreOriginAddr, PointerType::get(*DFS.Ctx, 0));
2493 for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2494 Value *Ptr =
2495 I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2496 : IntptrStoreOriginPtr;
2497 IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2498 Ofs += IntptrSize / OriginSize;
2499 CurrentAlignment = IntptrAlignment;
2500 }
2501 }
2502
2503 for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2504 ++I) {
2505 Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2506 : StoreOriginAddr;
2507 IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2508 CurrentAlignment = MinOriginAlignment;
2509 }
2510}
2511
2512Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2513 const Twine &Name) {
2514 Type *VTy = V->getType();
2515 assert(VTy->isIntegerTy());
2516 if (VTy->getIntegerBitWidth() == 1)
2517 // Just converting a bool to a bool, so do nothing.
2518 return V;
2519 return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2520}
2521
2522void DFSanFunction::storeOrigin(BasicBlock::iterator Pos, Value *Addr,
2523 uint64_t Size, Value *Shadow, Value *Origin,
2524 Value *StoreOriginAddr, Align InstAlignment) {
2525 // Do not write origins for zero shadows because we do not trace origins for
2526 // untainted sinks.
2527 const Align OriginAlignment = getOriginAlign(InstAlignment);
2528 Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2529 IRBuilder<> IRB(Pos->getParent(), Pos);
2530 if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2531 if (!ConstantShadow->isZeroValue())
2532 paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2533 OriginAlignment);
2534 return;
2535 }
2536
2537 if (shouldInstrumentWithCall()) {
2538 IRB.CreateCall(
2539 DFS.DFSanMaybeStoreOriginFn,
2540 {CollapsedShadow, Addr, ConstantInt::get(DFS.IntptrTy, Size), Origin});
2541 } else {
2542 Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2543 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2545 Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DTU);
2546 IRBuilder<> IRBNew(CheckTerm);
2547 paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2548 OriginAlignment);
2549 ++NumOriginStores;
2550 }
2551}
2552
2553void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2554 Align ShadowAlign,
2556 IRBuilder<> IRB(Pos->getParent(), Pos);
2557 IntegerType *ShadowTy =
2558 IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2559 Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2560 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2561 IRB.CreateAlignedStore(ExtZeroShadow, ShadowAddr, ShadowAlign);
2562 // Do not write origins for 0 shadows because we do not trace origins for
2563 // untainted sinks.
2564}
2565
2566void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2567 Align InstAlignment,
2568 Value *PrimitiveShadow,
2569 Value *Origin,
2571 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2572
2573 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2574 const auto SI = AllocaShadowMap.find(AI);
2575 if (SI != AllocaShadowMap.end()) {
2576 IRBuilder<> IRB(Pos->getParent(), Pos);
2577 IRB.CreateStore(PrimitiveShadow, SI->second);
2578
2579 // Do not write origins for 0 shadows because we do not trace origins for
2580 // untainted sinks.
2581 if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2582 const auto OI = AllocaOriginMap.find(AI);
2583 assert(OI != AllocaOriginMap.end() && Origin);
2584 IRB.CreateStore(Origin, OI->second);
2585 }
2586 return;
2587 }
2588 }
2589
2590 const Align ShadowAlign = getShadowAlign(InstAlignment);
2591 if (DFS.isZeroShadow(PrimitiveShadow)) {
2592 storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2593 return;
2594 }
2595
2596 IRBuilder<> IRB(Pos->getParent(), Pos);
2597 Value *ShadowAddr, *OriginAddr;
2598 std::tie(ShadowAddr, OriginAddr) =
2599 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2600
2601 const unsigned ShadowVecSize = 8;
2602 assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2603 "Shadow vector is too large!");
2604
2605 uint64_t Offset = 0;
2606 uint64_t LeftSize = Size;
2607 if (LeftSize >= ShadowVecSize) {
2608 auto *ShadowVecTy =
2609 FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2610 Value *ShadowVec = PoisonValue::get(ShadowVecTy);
2611 for (unsigned I = 0; I != ShadowVecSize; ++I) {
2612 ShadowVec = IRB.CreateInsertElement(
2613 ShadowVec, PrimitiveShadow,
2614 ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2615 }
2616 do {
2617 Value *CurShadowVecAddr =
2618 IRB.CreateConstGEP1_32(ShadowVecTy, ShadowAddr, Offset);
2619 IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2620 LeftSize -= ShadowVecSize;
2621 ++Offset;
2622 } while (LeftSize >= ShadowVecSize);
2623 Offset *= ShadowVecSize;
2624 }
2625 while (LeftSize > 0) {
2626 Value *CurShadowAddr =
2627 IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2628 IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2629 --LeftSize;
2630 ++Offset;
2631 }
2632
2633 if (ShouldTrackOrigins) {
2634 storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2635 InstAlignment);
2636 }
2637}
2638
2640 switch (AO) {
2641 case AtomicOrdering::NotAtomic:
2642 return AtomicOrdering::NotAtomic;
2643 case AtomicOrdering::Unordered:
2644 case AtomicOrdering::Monotonic:
2645 case AtomicOrdering::Release:
2646 return AtomicOrdering::Release;
2647 case AtomicOrdering::Acquire:
2648 case AtomicOrdering::AcquireRelease:
2649 return AtomicOrdering::AcquireRelease;
2650 case AtomicOrdering::SequentiallyConsistent:
2651 return AtomicOrdering::SequentiallyConsistent;
2652 }
2653 llvm_unreachable("Unknown ordering");
2654}
2655
2656void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2657 auto &DL = SI.getDataLayout();
2658 Value *Val = SI.getValueOperand();
2659 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2660 if (Size == 0)
2661 return;
2662
2663 // When an application store is atomic, increase atomic ordering between
2664 // atomic application loads and stores to ensure happen-before order; load
2665 // shadow data after application data; store zero shadow data before
2666 // application data. This ensure shadow loads return either labels of the
2667 // initial application data or zeros.
2668 if (SI.isAtomic())
2669 SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2670
2671 const bool ShouldTrackOrigins =
2672 DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2673 std::vector<Value *> Shadows;
2674 std::vector<Value *> Origins;
2675
2676 Value *Shadow =
2677 SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2678
2679 if (ShouldTrackOrigins) {
2680 Shadows.push_back(Shadow);
2681 Origins.push_back(DFSF.getOrigin(Val));
2682 }
2683
2684 Value *PrimitiveShadow;
2686 Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2687 if (ShouldTrackOrigins) {
2688 Shadows.push_back(PtrShadow);
2689 Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2690 }
2691 PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, SI.getIterator());
2692 } else {
2693 PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, SI.getIterator());
2694 }
2695 Value *Origin = nullptr;
2696 if (ShouldTrackOrigins)
2697 Origin = DFSF.combineOrigins(Shadows, Origins, SI.getIterator());
2698 DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2699 PrimitiveShadow, Origin, SI.getIterator());
2700 if (ClEventCallbacks) {
2701 IRBuilder<> IRB(&SI);
2702 Value *Addr = SI.getPointerOperand();
2703 CallInst *CI =
2704 IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr});
2705 CI->addParamAttr(0, Attribute::ZExt);
2706 }
2707}
2708
2709void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2710 assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2711
2712 Value *Val = I.getOperand(1);
2713 const auto &DL = I.getDataLayout();
2714 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2715 if (Size == 0)
2716 return;
2717
2718 // Conservatively set data at stored addresses and return with zero shadow to
2719 // prevent shadow data races.
2720 IRBuilder<> IRB(&I);
2721 Value *Addr = I.getOperand(0);
2722 const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2723 DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, I.getIterator());
2724 DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2725 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2726}
2727
2728void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2729 visitCASOrRMW(I.getAlign(), I);
2730 // TODO: The ordering change follows MSan. It is possible not to change
2731 // ordering because we always set and use 0 shadows.
2732 I.setOrdering(addReleaseOrdering(I.getOrdering()));
2733}
2734
2735void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2736 visitCASOrRMW(I.getAlign(), I);
2737 // TODO: The ordering change follows MSan. It is possible not to change
2738 // ordering because we always set and use 0 shadows.
2739 I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2740}
2741
2742void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2743 visitInstOperands(UO);
2744}
2745
2746void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2747 visitInstOperands(BO);
2748}
2749
2750void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
2751 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2752 // a musttail call and a ret, don't instrument. New instructions are not
2753 // allowed after a musttail call.
2754 if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
2755 if (CI->isMustTailCall())
2756 return;
2757 visitInstOperands(BCI);
2758}
2759
2760void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2761
2762void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2763 visitInstOperands(CI);
2764 if (ClEventCallbacks) {
2765 IRBuilder<> IRB(&CI);
2766 Value *CombinedShadow = DFSF.getShadow(&CI);
2767 CallInst *CallI =
2768 IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2769 CallI->addParamAttr(0, Attribute::ZExt);
2770 }
2771}
2772
2773void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
2774 // We do not need to track data through LandingPadInst.
2775 //
2776 // For the C++ exceptions, if a value is thrown, this value will be stored
2777 // in a memory location provided by __cxa_allocate_exception(...) (on the
2778 // throw side) or __cxa_begin_catch(...) (on the catch side).
2779 // This memory will have a shadow, so with the loads and stores we will be
2780 // able to propagate labels on data thrown through exceptions, without any
2781 // special handling of the LandingPadInst.
2782 //
2783 // The second element in the pair result of the LandingPadInst is a
2784 // register value, but it is for a type ID and should never be tainted.
2785 DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
2786 DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
2787}
2788
2789void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2791 DFSF.isLookupTableConstant(
2793 visitInstOperands(GEPI);
2794 return;
2795 }
2796
2797 // Only propagate shadow/origin of base pointer value but ignore those of
2798 // offset operands.
2799 Value *BasePointer = GEPI.getPointerOperand();
2800 DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
2801 if (DFSF.DFS.shouldTrackOrigins())
2802 DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
2803}
2804
2805void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2806 visitInstOperands(I);
2807}
2808
2809void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2810 visitInstOperands(I);
2811}
2812
2813void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2814 visitInstOperands(I);
2815}
2816
2817void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2818 IRBuilder<> IRB(&I);
2819 Value *Agg = I.getAggregateOperand();
2820 Value *AggShadow = DFSF.getShadow(Agg);
2821 Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2822 DFSF.setShadow(&I, ResShadow);
2823 visitInstOperandOrigins(I);
2824}
2825
2826void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2827 IRBuilder<> IRB(&I);
2828 Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2829 Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2830 Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2831 DFSF.setShadow(&I, Res);
2832 visitInstOperandOrigins(I);
2833}
2834
2835void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2836 bool AllLoadsStores = true;
2837 for (User *U : I.users()) {
2838 if (isa<LoadInst>(U))
2839 continue;
2840
2841 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2842 if (SI->getPointerOperand() == &I)
2843 continue;
2844 }
2845
2846 AllLoadsStores = false;
2847 break;
2848 }
2849 if (AllLoadsStores) {
2850 IRBuilder<> IRB(&I);
2851 DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2852 if (DFSF.DFS.shouldTrackOrigins()) {
2853 DFSF.AllocaOriginMap[&I] =
2854 IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2855 }
2856 }
2857 DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2858 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2859}
2860
2861void DFSanVisitor::visitSelectInst(SelectInst &I) {
2862 Value *CondShadow = DFSF.getShadow(I.getCondition());
2863 Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2864 Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2865 Value *ShadowSel = nullptr;
2866 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2867 std::vector<Value *> Shadows;
2868 std::vector<Value *> Origins;
2869 Value *TrueOrigin =
2870 ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2871 Value *FalseOrigin =
2872 ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2873
2874 DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
2875
2876 if (isa<VectorType>(I.getCondition()->getType())) {
2877 ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2878 FalseShadow, I.getIterator());
2879 if (ShouldTrackOrigins) {
2880 Shadows.push_back(TrueShadow);
2881 Shadows.push_back(FalseShadow);
2882 Origins.push_back(TrueOrigin);
2883 Origins.push_back(FalseOrigin);
2884 }
2885 } else {
2886 if (TrueShadow == FalseShadow) {
2887 ShadowSel = TrueShadow;
2888 if (ShouldTrackOrigins) {
2889 Shadows.push_back(TrueShadow);
2890 Origins.push_back(TrueOrigin);
2891 }
2892 } else {
2893 ShadowSel = SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow,
2894 "", I.getIterator());
2895 if (ShouldTrackOrigins) {
2896 Shadows.push_back(ShadowSel);
2897 Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2898 FalseOrigin, "", I.getIterator()));
2899 }
2900 }
2901 }
2902 DFSF.setShadow(&I, ClTrackSelectControlFlow ? DFSF.combineShadowsThenConvert(
2903 I.getType(), CondShadow,
2904 ShadowSel, I.getIterator())
2905 : ShadowSel);
2906 if (ShouldTrackOrigins) {
2908 Shadows.push_back(CondShadow);
2909 Origins.push_back(DFSF.getOrigin(I.getCondition()));
2910 }
2911 DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, I.getIterator()));
2912 }
2913}
2914
2915void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2916 IRBuilder<> IRB(&I);
2917 Value *ValShadow = DFSF.getShadow(I.getValue());
2918 Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2919 ? DFSF.getOrigin(I.getValue())
2920 : DFSF.DFS.ZeroOrigin;
2921 IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
2922 {ValShadow, ValOrigin, I.getDest(),
2923 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2924}
2925
2926void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2927 IRBuilder<> IRB(&I);
2928
2929 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2930 // need to move origins before moving shadows.
2931 if (DFSF.DFS.shouldTrackOrigins()) {
2932 IRB.CreateCall(
2933 DFSF.DFS.DFSanMemOriginTransferFn,
2934 {I.getArgOperand(0), I.getArgOperand(1),
2935 IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2936 }
2937
2938 Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), I.getIterator());
2939 Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), I.getIterator());
2940 Value *LenShadow =
2941 IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2942 DFSF.DFS.ShadowWidthBytes));
2943 auto *MTI = cast<MemTransferInst>(
2944 IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2945 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2946 MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));
2947 MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));
2948 if (ClEventCallbacks) {
2949 IRB.CreateCall(
2950 DFSF.DFS.DFSanMemTransferCallbackFn,
2951 {DestShadow, IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2952 }
2953}
2954
2955void DFSanVisitor::visitBranchInst(BranchInst &BR) {
2956 if (!BR.isConditional())
2957 return;
2958
2959 DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
2960}
2961
2962void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
2963 DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
2964}
2965
2966static bool isAMustTailRetVal(Value *RetVal) {
2967 // Tail call may have a bitcast between return.
2968 if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2969 RetVal = I->getOperand(0);
2970 }
2971 if (auto *I = dyn_cast<CallInst>(RetVal)) {
2972 return I->isMustTailCall();
2973 }
2974 return false;
2975}
2976
2977void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2978 if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2979 // Don't emit the instrumentation for musttail call returns.
2981 return;
2982
2983 Value *S = DFSF.getShadow(RI.getReturnValue());
2984 IRBuilder<> IRB(&RI);
2985 Type *RT = DFSF.F->getFunctionType()->getReturnType();
2986 unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
2987 if (Size <= RetvalTLSSize) {
2988 // If the size overflows, stores nothing. At callsite, oversized return
2989 // shadows are set to zero.
2990 IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
2991 }
2992 if (DFSF.DFS.shouldTrackOrigins()) {
2993 Value *O = DFSF.getOrigin(RI.getReturnValue());
2994 IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
2995 }
2996 }
2997}
2998
2999void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
3000 std::vector<Value *> &Args,
3001 IRBuilder<> &IRB) {
3002 FunctionType *FT = F.getFunctionType();
3003
3004 auto *I = CB.arg_begin();
3005
3006 // Adds non-variable argument shadows.
3007 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3008 Args.push_back(
3009 DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()));
3010
3011 // Adds variable argument shadows.
3012 if (FT->isVarArg()) {
3013 auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
3014 CB.arg_size() - FT->getNumParams());
3015 auto *LabelVAAlloca =
3016 new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
3017 "labelva", DFSF.F->getEntryBlock().begin());
3018
3019 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3020 auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
3021 IRB.CreateStore(
3022 DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()),
3023 LabelVAPtr);
3024 }
3025
3026 Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
3027 }
3028
3029 // Adds the return value shadow.
3030 if (!FT->getReturnType()->isVoidTy()) {
3031 if (!DFSF.LabelReturnAlloca) {
3032 DFSF.LabelReturnAlloca = new AllocaInst(
3033 DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
3034 "labelreturn", DFSF.F->getEntryBlock().begin());
3035 }
3036 Args.push_back(DFSF.LabelReturnAlloca);
3037 }
3038}
3039
3040void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
3041 std::vector<Value *> &Args,
3042 IRBuilder<> &IRB) {
3043 FunctionType *FT = F.getFunctionType();
3044
3045 auto *I = CB.arg_begin();
3046
3047 // Add non-variable argument origins.
3048 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3049 Args.push_back(DFSF.getOrigin(*I));
3050
3051 // Add variable argument origins.
3052 if (FT->isVarArg()) {
3053 auto *OriginVATy =
3054 ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
3055 auto *OriginVAAlloca =
3056 new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
3057 "originva", DFSF.F->getEntryBlock().begin());
3058
3059 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3060 auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
3061 IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
3062 }
3063
3064 Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
3065 }
3066
3067 // Add the return value origin.
3068 if (!FT->getReturnType()->isVoidTy()) {
3069 if (!DFSF.OriginReturnAlloca) {
3070 DFSF.OriginReturnAlloca = new AllocaInst(
3071 DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
3072 "originreturn", DFSF.F->getEntryBlock().begin());
3073 }
3074 Args.push_back(DFSF.OriginReturnAlloca);
3075 }
3076}
3077
3078bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
3079 IRBuilder<> IRB(&CB);
3080 switch (DFSF.DFS.getWrapperKind(&F)) {
3081 case DataFlowSanitizer::WK_Warning:
3082 CB.setCalledFunction(&F);
3083 IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
3084 IRB.CreateGlobalString(F.getName()));
3085 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3086 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3087 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3088 return true;
3089 case DataFlowSanitizer::WK_Discard:
3090 CB.setCalledFunction(&F);
3091 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3092 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3093 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3094 return true;
3095 case DataFlowSanitizer::WK_Functional:
3096 CB.setCalledFunction(&F);
3097 DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
3098 visitInstOperands(CB);
3099 return true;
3100 case DataFlowSanitizer::WK_Custom:
3101 // Don't try to handle invokes of custom functions, it's too complicated.
3102 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
3103 // wrapper.
3104 CallInst *CI = dyn_cast<CallInst>(&CB);
3105 if (!CI)
3106 return false;
3107
3108 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3109 FunctionType *FT = F.getFunctionType();
3110 TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
3111 std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
3112 CustomFName += F.getName();
3113 FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
3114 CustomFName, CustomFn.TransformedType);
3115 if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
3116 CustomFn->copyAttributesFrom(&F);
3117
3118 // Custom functions returning non-void will write to the return label.
3119 if (!FT->getReturnType()->isVoidTy()) {
3120 CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
3121 }
3122 }
3123
3124 std::vector<Value *> Args;
3125
3126 // Adds non-variable arguments.
3127 auto *I = CB.arg_begin();
3128 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
3129 Args.push_back(*I);
3130 }
3131
3132 // Adds shadow arguments.
3133 const unsigned ShadowArgStart = Args.size();
3134 addShadowArguments(F, CB, Args, IRB);
3135
3136 // Adds origin arguments.
3137 const unsigned OriginArgStart = Args.size();
3138 if (ShouldTrackOrigins)
3139 addOriginArguments(F, CB, Args, IRB);
3140
3141 // Adds variable arguments.
3142 append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
3143
3144 CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
3145 CustomCI->setCallingConv(CI->getCallingConv());
3146 CustomCI->setAttributes(transformFunctionAttributes(
3147 CustomFn, CI->getContext(), CI->getAttributes()));
3148
3149 // Update the parameter attributes of the custom call instruction to
3150 // zero extend the shadow parameters. This is required for targets
3151 // which consider PrimitiveShadowTy an illegal type.
3152 for (unsigned N = 0; N < FT->getNumParams(); N++) {
3153 const unsigned ArgNo = ShadowArgStart + N;
3154 if (CustomCI->getArgOperand(ArgNo)->getType() ==
3155 DFSF.DFS.PrimitiveShadowTy)
3156 CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
3157 if (ShouldTrackOrigins) {
3158 const unsigned OriginArgNo = OriginArgStart + N;
3159 if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
3160 DFSF.DFS.OriginTy)
3161 CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
3162 }
3163 }
3164
3165 // Loads the return value shadow and origin.
3166 if (!FT->getReturnType()->isVoidTy()) {
3167 LoadInst *LabelLoad =
3168 IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
3169 DFSF.setShadow(CustomCI,
3170 DFSF.expandFromPrimitiveShadow(
3171 FT->getReturnType(), LabelLoad, CB.getIterator()));
3172 if (ShouldTrackOrigins) {
3173 LoadInst *OriginLoad =
3174 IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
3175 DFSF.setOrigin(CustomCI, OriginLoad);
3176 }
3177 }
3178
3179 CI->replaceAllUsesWith(CustomCI);
3180 CI->eraseFromParent();
3181 return true;
3182 }
3183 return false;
3184}
3185
3186Value *DFSanVisitor::makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
3187 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3188 uint32_t OrderingTable[NumOrderings] = {};
3189
3190 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3191 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3192 OrderingTable[(int)AtomicOrderingCABI::consume] =
3193 (int)AtomicOrderingCABI::acquire;
3194 OrderingTable[(int)AtomicOrderingCABI::release] =
3195 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3196 (int)AtomicOrderingCABI::acq_rel;
3197 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3198 (int)AtomicOrderingCABI::seq_cst;
3199
3200 return ConstantDataVector::get(IRB.getContext(), OrderingTable);
3201}
3202
3203void DFSanVisitor::visitLibAtomicLoad(CallBase &CB) {
3204 // Since we use getNextNode here, we can't have CB terminate the BB.
3205 assert(isa<CallInst>(CB));
3206
3207 IRBuilder<> IRB(&CB);
3208 Value *Size = CB.getArgOperand(0);
3209 Value *SrcPtr = CB.getArgOperand(1);
3210 Value *DstPtr = CB.getArgOperand(2);
3211 Value *Ordering = CB.getArgOperand(3);
3212 // Convert the call to have at least Acquire ordering to make sure
3213 // the shadow operations aren't reordered before it.
3214 Value *NewOrdering =
3215 IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3216 CB.setArgOperand(3, NewOrdering);
3217
3218 IRBuilder<> NextIRB(CB.getNextNode());
3219 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3220
3221 // TODO: Support ClCombinePointerLabelsOnLoad
3222 // TODO: Support ClEventCallbacks
3223
3224 NextIRB.CreateCall(
3225 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3226 {DstPtr, SrcPtr, NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3227}
3228
3229Value *DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
3230 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
3231 uint32_t OrderingTable[NumOrderings] = {};
3232
3233 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
3234 OrderingTable[(int)AtomicOrderingCABI::release] =
3235 (int)AtomicOrderingCABI::release;
3236 OrderingTable[(int)AtomicOrderingCABI::consume] =
3237 OrderingTable[(int)AtomicOrderingCABI::acquire] =
3238 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
3239 (int)AtomicOrderingCABI::acq_rel;
3240 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
3241 (int)AtomicOrderingCABI::seq_cst;
3242
3243 return ConstantDataVector::get(IRB.getContext(), OrderingTable);
3244}
3245
3246void DFSanVisitor::visitLibAtomicStore(CallBase &CB) {
3247 IRBuilder<> IRB(&CB);
3248 Value *Size = CB.getArgOperand(0);
3249 Value *SrcPtr = CB.getArgOperand(1);
3250 Value *DstPtr = CB.getArgOperand(2);
3251 Value *Ordering = CB.getArgOperand(3);
3252 // Convert the call to have at least Release ordering to make sure
3253 // the shadow operations aren't reordered after it.
3254 Value *NewOrdering =
3255 IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3256 CB.setArgOperand(3, NewOrdering);
3257
3258 // TODO: Support ClCombinePointerLabelsOnStore
3259 // TODO: Support ClEventCallbacks
3260
3261 IRB.CreateCall(
3262 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3263 {DstPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3264}
3265
3266void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {
3267 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int
3268 // ordering)
3269 IRBuilder<> IRB(&CB);
3270 Value *Size = CB.getArgOperand(0);
3271 Value *TargetPtr = CB.getArgOperand(1);
3272 Value *SrcPtr = CB.getArgOperand(2);
3273 Value *DstPtr = CB.getArgOperand(3);
3274
3275 // This operation is not atomic for the shadow and origin memory.
3276 // This could result in DFSan false positives or false negatives.
3277 // For now we will assume these operations are rare, and
3278 // the additional complexity to address this is not warrented.
3279
3280 // Current Target to Dest
3281 IRB.CreateCall(
3282 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3283 {DstPtr, TargetPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3284
3285 // Current Src to Target (overriding)
3286 IRB.CreateCall(
3287 DFSF.DFS.DFSanMemShadowOriginTransferFn,
3288 {TargetPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3289}
3290
3291void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {
3292 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void
3293 // *desired, int success_order, int failure_order)
3294 Value *Size = CB.getArgOperand(0);
3295 Value *TargetPtr = CB.getArgOperand(1);
3296 Value *ExpectedPtr = CB.getArgOperand(2);
3297 Value *DesiredPtr = CB.getArgOperand(3);
3298
3299 // This operation is not atomic for the shadow and origin memory.
3300 // This could result in DFSan false positives or false negatives.
3301 // For now we will assume these operations are rare, and
3302 // the additional complexity to address this is not warrented.
3303
3304 IRBuilder<> NextIRB(CB.getNextNode());
3305 NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3306
3307 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3308
3309 // If original call returned true, copy Desired to Target.
3310 // If original call returned false, copy Target to Expected.
3311 NextIRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginConditionalExchangeFn,
3312 {NextIRB.CreateIntCast(&CB, NextIRB.getInt8Ty(), false),
3313 TargetPtr, ExpectedPtr, DesiredPtr,
3314 NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
3315}
3316
3317void DFSanVisitor::visitCallBase(CallBase &CB) {
3319 if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
3320 visitInstOperands(CB);
3321 return;
3322 }
3323
3324 // Calls to this function are synthesized in wrappers, and we shouldn't
3325 // instrument them.
3326 if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
3327 return;
3328
3329 LibFunc LF;
3330 if (DFSF.TLI.getLibFunc(CB, LF)) {
3331 // libatomic.a functions need to have special handling because there isn't
3332 // a good way to intercept them or compile the library with
3333 // instrumentation.
3334 switch (LF) {
3335 case LibFunc_atomic_load:
3336 if (!isa<CallInst>(CB)) {
3337 llvm::errs() << "DFSAN -- cannot instrument invoke of libatomic load. "
3338 "Ignoring!\n";
3339 break;
3340 }
3341 visitLibAtomicLoad(CB);
3342 return;
3343 case LibFunc_atomic_store:
3344 visitLibAtomicStore(CB);
3345 return;
3346 default:
3347 break;
3348 }
3349 }
3350
3351 // TODO: These are not supported by TLI? They are not in the enum.
3352 if (F && F->hasName() && !F->isVarArg()) {
3353 if (F->getName() == "__atomic_exchange") {
3354 visitLibAtomicExchange(CB);
3355 return;
3356 }
3357 if (F->getName() == "__atomic_compare_exchange") {
3358 visitLibAtomicCompareExchange(CB);
3359 return;
3360 }
3361 }
3362
3364 DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
3365 if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
3366 if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
3367 return;
3368
3369 IRBuilder<> IRB(&CB);
3370
3371 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3372 FunctionType *FT = CB.getFunctionType();
3373 const DataLayout &DL = getDataLayout();
3374
3375 // Stores argument shadows.
3376 unsigned ArgOffset = 0;
3377 for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
3378 if (ShouldTrackOrigins) {
3379 // Ignore overflowed origins
3380 Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
3381 if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
3382 !DFSF.DFS.isZeroShadow(ArgShadow))
3383 IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
3384 DFSF.getArgOriginTLS(I, IRB));
3385 }
3386
3387 unsigned Size =
3388 DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
3389 // Stop storing if arguments' size overflows. Inside a function, arguments
3390 // after overflow have zero shadow values.
3391 if (ArgOffset + Size > ArgTLSSize)
3392 break;
3393 IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
3394 DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
3396 ArgOffset += alignTo(Size, ShadowTLSAlignment);
3397 }
3398
3399 Instruction *Next = nullptr;
3400 if (!CB.getType()->isVoidTy()) {
3401 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3402 if (II->getNormalDest()->getSinglePredecessor()) {
3403 Next = &II->getNormalDest()->front();
3404 } else {
3405 BasicBlock *NewBB =
3406 SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
3407 Next = &NewBB->front();
3408 }
3409 } else {
3410 assert(CB.getIterator() != CB.getParent()->end());
3411 Next = CB.getNextNode();
3412 }
3413
3414 // Don't emit the epilogue for musttail call returns.
3415 if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3416 return;
3417
3418 // Loads the return value shadow.
3419 IRBuilder<> NextIRB(Next);
3420 unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
3421 if (Size > RetvalTLSSize) {
3422 // Set overflowed return shadow to be zero.
3423 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3424 } else {
3425 LoadInst *LI = NextIRB.CreateAlignedLoad(
3426 DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
3427 ShadowTLSAlignment, "_dfsret");
3428 DFSF.SkipInsts.insert(LI);
3429 DFSF.setShadow(&CB, LI);
3430 DFSF.NonZeroChecks.push_back(LI);
3431 }
3432
3433 if (ShouldTrackOrigins) {
3434 LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
3435 DFSF.getRetvalOriginTLS(), "_dfsret_o");
3436 DFSF.SkipInsts.insert(LI);
3437 DFSF.setOrigin(&CB, LI);
3438 }
3439
3440 DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB);
3441 }
3442}
3443
3444void DFSanVisitor::visitPHINode(PHINode &PN) {
3445 Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
3446 PHINode *ShadowPN = PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "",
3447 PN.getIterator());
3448
3449 // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3450 Value *UndefShadow = UndefValue::get(ShadowTy);
3451 for (BasicBlock *BB : PN.blocks())
3452 ShadowPN->addIncoming(UndefShadow, BB);
3453
3454 DFSF.setShadow(&PN, ShadowPN);
3455
3456 PHINode *OriginPN = nullptr;
3457 if (DFSF.DFS.shouldTrackOrigins()) {
3458 OriginPN = PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "",
3459 PN.getIterator());
3460 Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
3461 for (BasicBlock *BB : PN.blocks())
3462 OriginPN->addIncoming(UndefOrigin, BB);
3463 DFSF.setOrigin(&PN, OriginPN);
3464 }
3465
3466 DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
3467}
3468
3471 // Return early if nosanitize_dataflow module flag is present for the module.
3472 if (checkIfAlreadyInstrumented(M, "nosanitize_dataflow"))
3473 return PreservedAnalyses::all();
3474 auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
3475 auto &FAM =
3478 };
3479 if (!DataFlowSanitizer(ABIListFiles).runImpl(M, GetTLI))
3480 return PreservedAnalyses::all();
3481
3483 // GlobalsAA is considered stateless and does not get invalidated unless
3484 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
3485 // make changes that require GlobalsAA to be invalidated.
3486 PA.abandon<GlobalsAA>();
3487 return PA;
3488}
static bool isConstant(const MachineInstr &MI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MemoryMapParams Linux_LoongArch64_MemoryMapParams
const MemoryMapParams Linux_X86_64_MemoryMapParams
static cl::opt< bool > ClTrackSelectControlFlow("dfsan-track-select-control-flow", cl::desc("Propagate labels from condition values of select instructions " "to results."), cl::Hidden, cl::init(true))
static cl::list< std::string > ClCombineTaintLookupTables("dfsan-combine-taint-lookup-table", cl::desc("When dfsan-combine-offset-labels-on-gep and/or " "dfsan-combine-pointer-labels-on-load are false, this flag can " "be used to re-enable combining offset and/or pointer taint when " "loading specific constant global variables (i.e. lookup tables)."), cl::Hidden)
static const Align MinOriginAlignment
static cl::opt< int > ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0))
static cl::list< std::string > ClABIListFiles("dfsan-abilist", cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden)
static cl::opt< bool > ClReachesFunctionCallbacks("dfsan-reaches-function-callbacks", cl::desc("Insert calls to callback functions on data reaching a function."), cl::Hidden, cl::init(false))
static Value * expandFromPrimitiveShadowRecursive(Value *Shadow, SmallVector< unsigned, 4 > &Indices, Type *SubShadowTy, Value *PrimitiveShadow, IRBuilder<> &IRB)
static cl::opt< int > ClInstrumentWithCallThreshold("dfsan-instrument-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500))
static cl::opt< bool > ClPreserveAlignment("dfsan-preserve-alignment", cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false))
static cl::opt< bool > ClDebugNonzeroLabels("dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " "load or return with a nonzero label"), cl::Hidden)
static cl::opt< bool > ClCombineOffsetLabelsOnGEP("dfsan-combine-offset-labels-on-gep", cl::desc("Combine the label of the offset with the label of the pointer when " "doing pointer arithmetic."), cl::Hidden, cl::init(true))
static cl::opt< bool > ClIgnorePersonalityRoutine("dfsan-ignore-personality-routine", cl::desc("If a personality routine is marked uninstrumented from the ABI " "list, do not create a wrapper for it."), cl::Hidden, cl::init(false))
static const Align ShadowTLSAlignment
static AtomicOrdering addReleaseOrdering(AtomicOrdering AO)
static AtomicOrdering addAcquireOrdering(AtomicOrdering AO)
Value * StripPointerGEPsAndCasts(Value *V)
const MemoryMapParams Linux_AArch64_MemoryMapParams
static cl::opt< bool > ClConditionalCallbacks("dfsan-conditional-callbacks", cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false))
static cl::opt< bool > ClCombinePointerLabelsOnLoad("dfsan-combine-pointer-labels-on-load", cl::desc("Combine the label of the pointer with the label of the data when " "loading from memory."), cl::Hidden, cl::init(true))
static StringRef getGlobalTypeString(const GlobalValue &G)
static cl::opt< bool > ClCombinePointerLabelsOnStore("dfsan-combine-pointer-labels-on-store", cl::desc("Combine the label of the pointer with the label of the data when " "storing in memory."), cl::Hidden, cl::init(false))
static const unsigned ArgTLSSize
static const unsigned RetvalTLSSize
static bool isAMustTailRetVal(Value *RetVal)
static cl::opt< bool > ClEventCallbacks("dfsan-event-callbacks", cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
uint64_t Addr
std::string Name
uint64_t Size
static bool runImpl(Function &F, const TargetLowering &TLI)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
nvptx lower args
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
Defines the virtual file system interface vfs::FileSystem.
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:63
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
unsigned getNumAttrSets() const
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Definition: AttributeMask.h:44
static Attribute getWithMemoryEffects(LLVMContext &Context, MemoryEffects ME)
Definition: Attributes.cpp:281
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class represents a no-op cast from one type to another.
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1408
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1403
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1341
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1399
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1261
Value * getCalledOperand() const
Definition: InstrTypes.h:1334
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1420
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Definition: InstrTypes.h:1484
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1286
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1291
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1267
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1199
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1277
unsigned arg_size() const
Definition: InstrTypes.h:1284
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1417
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1380
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:3006
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
This is an important base class in LLVM.
Definition: Constant.h:42
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
unsigned getLine() const
Definition: DebugLoc.cpp:24
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:20
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
void removeFnAttrs(const AttributeMask &Attrs)
Definition: Function.cpp:697
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition: Function.cpp:689
arg_iterator arg_begin()
Definition: Function.h:868
void removeRetAttrs(const AttributeMask &Attrs)
removes the attributes from the return value list of attributes.
Definition: Function.cpp:709
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:860
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:587
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:595
static bool isExternalWeakLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:413
LinkageTypes getLinkage() const
Definition: GlobalValue.h:547
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:57
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
Type * getValueType() const
Definition: GlobalValue.h:297
Analysis pass providing a never-invalidated alias analysis result.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2511
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1887
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1781
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2499
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2199
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
Definition: IRBuilder.h:1980
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2147
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2274
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1874
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1798
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
LLVMContext & getContext() const
Definition: IRBuilder.h:195
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
Definition: IRBuilder.h:1967
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1811
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2225
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1562
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404
GlobalVariable * CreateGlobalString(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr, bool AddNull=true)
Make a new global variable with initializer type i8*.
Definition: IRBuilder.cpp:44
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
This instruction inserts a single (scalar) element into a VectorType value.
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCmpInst(CmpInst &I)
Definition: InstVisitor.h:265
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:270
RetTy visitInsertValueInst(InsertValueInst &I)
Definition: InstVisitor.h:195
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
Definition: InstVisitor.h:193
RetTy visitLandingPadInst(LandingPadInst &I)
Definition: InstVisitor.h:196
RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I)
Definition: InstVisitor.h:171
RetTy visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:187
RetTy visitSwitchInst(SwitchInst &I)
Definition: InstVisitor.h:235
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
RetTy visitReturnInst(ReturnInst &I)
Definition: InstVisitor.h:229
RetTy visitExtractValueInst(ExtractValueInst &I)
Definition: InstVisitor.h:194
RetTy visitUnaryOperator(UnaryOperator &I)
Definition: InstVisitor.h:263
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
RetTy visitAllocaInst(AllocaInst &I)
Definition: InstVisitor.h:168
RetTy visitBinaryOperator(BinaryOperator &I)
Definition: InstVisitor.h:264
RetTy visitMemTransferInst(MemTransferInst &I)
Definition: InstVisitor.h:217
RetTy visitMemSetInst(MemSetInst &I)
Definition: InstVisitor.h:209
RetTy visitCastInst(CastInst &I)
Definition: InstVisitor.h:262
RetTy visitBranchInst(BranchInst &I)
Definition: InstVisitor.h:232
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
bool isTerminator() const
Definition: Instruction.h:277
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:176
void setAlignment(Align Align)
Definition: Instructions.h:215
Value * getPointerOperand()
Definition: Instructions.h:255
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:225
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition: MDBuilder.cpp:47
Metadata node.
Definition: Metadata.h:1073
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
static MemoryEffectsBase readOnly()
Create MemoryEffectsBase that can read any memory.
Definition: ModRef.h:122
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:306
void setModuleInlineAsm(StringRef Asm)
Set the module-scope inline assembly blocks.
Definition: Module.h:345
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void abandon()
Mark an analysis as abandoned.
Definition: Analysis.h:164
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
static std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
Multiway switch.
Value * getCondition() const
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
@ loongarch64
Definition: Triple.h:62
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1859
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Key
PAL metadata keys.
AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< BlockNode * > Block
Definition: RDFGraph.h:392
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
iterator_range< df_iterator< T > > depth_first(const T &G)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition: Local.cpp:3274
bool checkIfAlreadyInstrumented(Module &M, StringRef Flag)
Check if module has flag attached, if not add the flag.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85