clang  3.9.0
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
25 #include <climits>
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32  check::PreStmt<CallExpr> > {
33 public:
34  static void *getTag() { static int Tag; return &Tag; }
35 
36  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39 
40 private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44 
45  mutable std::unique_ptr<BugType> BT;
46  inline void initBugType() const {
47  if (!BT)
48  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49  }
50 
51  /// \brief Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54 
55  /// \brief Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57 
58  /// \brief Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60 
61  /// \brief Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63 
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67 
68  /// \brief Given a pointer argument, get the symbol of the value it contains
69  /// (points to).
70  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71 
72  /// Functions defining the attack surface.
73  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74  CheckerContext &C) const;
75  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78 
79  /// Taint the scanned input if the file is tainted.
80  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81 
82  /// Check for CWE-134: Uncontrolled Format String.
83  static const char MsgUncontrolledFormatString[];
84  bool checkUncontrolledFormatString(const CallExpr *CE,
85  CheckerContext &C) const;
86 
87  /// Check for:
88  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89  /// CWE-78, "Failure to Sanitize Data into an OS Command"
90  static const char MsgSanitizeSystemArgs[];
91  bool checkSystemCall(const CallExpr *CE, StringRef Name,
92  CheckerContext &C) const;
93 
94  /// Check if tainted data is used as a buffer size ins strn.. functions,
95  /// and allocators.
96  static const char MsgTaintedBufferSize[];
97  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98  CheckerContext &C) const;
99 
100  /// Generate a report if the expression is tainted or points to tainted data.
101  bool generateReportIfTainted(const Expr *E, const char Msg[],
102  CheckerContext &C) const;
103 
104 
105  typedef SmallVector<unsigned, 2> ArgVector;
106 
107  /// \brief A struct used to specify taint propagation rules for a function.
108  ///
109  /// If any of the possible taint source arguments is tainted, all of the
110  /// destination arguments should also be tainted. Use InvalidArgIndex in the
111  /// src list to specify that all of the arguments can introduce taint. Use
112  /// InvalidArgIndex in the dst arguments to signify that all the non-const
113  /// pointer and reference arguments might be tainted on return. If
114  /// ReturnValueIndex is added to the dst list, the return value will be
115  /// tainted.
116  struct TaintPropagationRule {
117  /// List of arguments which can be taint sources and should be checked.
118  ArgVector SrcArgs;
119  /// List of arguments which should be tainted on function return.
120  ArgVector DstArgs;
121  // TODO: Check if using other data structures would be more optimal.
122 
123  TaintPropagationRule() {}
124 
125  TaintPropagationRule(unsigned SArg,
126  unsigned DArg, bool TaintRet = false) {
127  SrcArgs.push_back(SArg);
128  DstArgs.push_back(DArg);
129  if (TaintRet)
130  DstArgs.push_back(ReturnValueIndex);
131  }
132 
133  TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134  unsigned DArg, bool TaintRet = false) {
135  SrcArgs.push_back(SArg1);
136  SrcArgs.push_back(SArg2);
137  DstArgs.push_back(DArg);
138  if (TaintRet)
139  DstArgs.push_back(ReturnValueIndex);
140  }
141 
142  /// Get the propagation rule for a given function.
143  static TaintPropagationRule
144  getTaintPropagationRule(const FunctionDecl *FDecl,
145  StringRef Name,
146  CheckerContext &C);
147 
148  inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
149  inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
150 
151  inline bool isNull() const { return SrcArgs.empty(); }
152 
153  inline bool isDestinationArgument(unsigned ArgNum) const {
154  return (std::find(DstArgs.begin(),
155  DstArgs.end(), ArgNum) != DstArgs.end());
156  }
157 
158  static inline bool isTaintedOrPointsToTainted(const Expr *E,
160  CheckerContext &C) {
161  return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162  (E->getType().getTypePtr()->isPointerType() &&
163  State->isTainted(getPointedToSymbol(C, E))));
164  }
165 
166  /// \brief Pre-process a function which propagates taint according to the
167  /// taint rule.
168  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169 
170  };
171 };
172 
173 const unsigned GenericTaintChecker::ReturnValueIndex;
174 const unsigned GenericTaintChecker::InvalidArgIndex;
175 
176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177  "Untrusted data is used as a format string "
178  "(CWE-134: Uncontrolled Format String)";
179 
180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181  "Untrusted data is passed to a system call "
182  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183 
184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
185  "Untrusted data is used to specify the buffer size "
186  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187  "character data and the null terminator)";
188 
189 } // end of anonymous namespace
190 
191 /// A set which is used to pass information from call pre-visit instruction
192 /// to the call post-visit. The values are unsigned integers, which are either
193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
194 /// points to data, which should be tainted on return.
195 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
196 
197 GenericTaintChecker::TaintPropagationRule
198 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
199  const FunctionDecl *FDecl,
200  StringRef Name,
201  CheckerContext &C) {
202  // TODO: Currently, we might lose precision here: we always mark a return
203  // value as tainted even if it's just a pointer, pointing to tainted data.
204 
205  // Check for exact name match for functions without builtin substitutes.
206  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207  .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
208  .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
209  .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
210  .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
211  .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
212  .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213  .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
214  .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
215  .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
216  .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
217  .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
218  .Case("read", TaintPropagationRule(0, 2, 1, true))
219  .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
220  .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
221  .Case("fgets", TaintPropagationRule(2, 0, true))
222  .Case("getline", TaintPropagationRule(2, 0))
223  .Case("getdelim", TaintPropagationRule(3, 0))
224  .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
225  .Default(TaintPropagationRule());
226 
227  if (!Rule.isNull())
228  return Rule;
229 
230  // Check if it's one of the memory setting/copying functions.
231  // This check is specialized but faster then calling isCLibraryFunction.
232  unsigned BId = 0;
233  if ( (BId = FDecl->getMemoryFunctionKind()) )
234  switch(BId) {
235  case Builtin::BImemcpy:
236  case Builtin::BImemmove:
237  case Builtin::BIstrncpy:
238  case Builtin::BIstrncat:
239  return TaintPropagationRule(1, 2, 0, true);
240  case Builtin::BIstrlcpy:
241  case Builtin::BIstrlcat:
242  return TaintPropagationRule(1, 2, 0, false);
243  case Builtin::BIstrndup:
244  return TaintPropagationRule(0, 1, ReturnValueIndex);
245 
246  default:
247  break;
248  };
249 
250  // Process all other functions which could be defined as builtins.
251  if (Rule.isNull()) {
252  if (C.isCLibraryFunction(FDecl, "snprintf") ||
253  C.isCLibraryFunction(FDecl, "sprintf"))
254  return TaintPropagationRule(InvalidArgIndex, 0, true);
255  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
256  C.isCLibraryFunction(FDecl, "stpcpy") ||
257  C.isCLibraryFunction(FDecl, "strcat"))
258  return TaintPropagationRule(1, 0, true);
259  else if (C.isCLibraryFunction(FDecl, "bcopy"))
260  return TaintPropagationRule(0, 2, 1, false);
261  else if (C.isCLibraryFunction(FDecl, "strdup") ||
262  C.isCLibraryFunction(FDecl, "strdupa"))
263  return TaintPropagationRule(0, ReturnValueIndex);
264  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
265  return TaintPropagationRule(0, ReturnValueIndex);
266  }
267 
268  // Skipping the following functions, since they might be used for cleansing
269  // or smart memory copy:
270  // - memccpy - copying until hitting a special character.
271 
272  return TaintPropagationRule();
273 }
274 
275 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
276  CheckerContext &C) const {
277  // Check for errors first.
278  if (checkPre(CE, C))
279  return;
280 
281  // Add taint second.
282  addSourcesPre(CE, C);
283 }
284 
285 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
286  CheckerContext &C) const {
287  if (propagateFromPre(CE, C))
288  return;
289  addSourcesPost(CE, C);
290 }
291 
292 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
293  CheckerContext &C) const {
294  ProgramStateRef State = nullptr;
295  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
296  if (!FDecl || FDecl->getKind() != Decl::Function)
297  return;
298 
299  StringRef Name = C.getCalleeName(FDecl);
300  if (Name.empty())
301  return;
302 
303  // First, try generating a propagation rule for this function.
304  TaintPropagationRule Rule =
305  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
306  if (!Rule.isNull()) {
307  State = Rule.process(CE, C);
308  if (!State)
309  return;
310  C.addTransition(State);
311  return;
312  }
313 
314  // Otherwise, check if we have custom pre-processing implemented.
315  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
316  .Case("fscanf", &GenericTaintChecker::preFscanf)
317  .Default(nullptr);
318  // Check and evaluate the call.
319  if (evalFunction)
320  State = (this->*evalFunction)(CE, C);
321  if (!State)
322  return;
323  C.addTransition(State);
324 
325 }
326 
327 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
328  CheckerContext &C) const {
329  ProgramStateRef State = C.getState();
330 
331  // Depending on what was tainted at pre-visit, we determined a set of
332  // arguments which should be tainted after the function returns. These are
333  // stored in the state as TaintArgsOnPostVisit set.
334  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
335  if (TaintArgs.isEmpty())
336  return false;
337 
339  I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340  unsigned ArgNum = *I;
341 
342  // Special handling for the tainted return value.
343  if (ArgNum == ReturnValueIndex) {
344  State = State->addTaint(CE, C.getLocationContext());
345  continue;
346  }
347 
348  // The arguments are pointer arguments. The data they are pointing at is
349  // tainted after the call.
350  if (CE->getNumArgs() < (ArgNum + 1))
351  return false;
352  const Expr* Arg = CE->getArg(ArgNum);
353  SymbolRef Sym = getPointedToSymbol(C, Arg);
354  if (Sym)
355  State = State->addTaint(Sym);
356  }
357 
358  // Clear up the taint info from the state.
359  State = State->remove<TaintArgsOnPostVisit>();
360 
361  if (State != C.getState()) {
362  C.addTransition(State);
363  return true;
364  }
365  return false;
366 }
367 
368 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369  CheckerContext &C) const {
370  // Define the attack surface.
371  // Set the evaluation function by switching on the callee name.
372  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
373  if (!FDecl || FDecl->getKind() != Decl::Function)
374  return;
375 
376  StringRef Name = C.getCalleeName(FDecl);
377  if (Name.empty())
378  return;
379  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
380  .Case("scanf", &GenericTaintChecker::postScanf)
381  // TODO: Add support for vfscanf & family.
382  .Case("getchar", &GenericTaintChecker::postRetTaint)
383  .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
384  .Case("getenv", &GenericTaintChecker::postRetTaint)
385  .Case("fopen", &GenericTaintChecker::postRetTaint)
386  .Case("fdopen", &GenericTaintChecker::postRetTaint)
387  .Case("freopen", &GenericTaintChecker::postRetTaint)
388  .Case("getch", &GenericTaintChecker::postRetTaint)
389  .Case("wgetch", &GenericTaintChecker::postRetTaint)
390  .Case("socket", &GenericTaintChecker::postSocket)
391  .Default(nullptr);
392 
393  // If the callee isn't defined, it is not of security concern.
394  // Check and evaluate the call.
395  ProgramStateRef State = nullptr;
396  if (evalFunction)
397  State = (this->*evalFunction)(CE, C);
398  if (!State)
399  return;
400 
401  C.addTransition(State);
402 }
403 
404 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
405 
406  if (checkUncontrolledFormatString(CE, C))
407  return true;
408 
409  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
410  if (!FDecl || FDecl->getKind() != Decl::Function)
411  return false;
412 
413  StringRef Name = C.getCalleeName(FDecl);
414  if (Name.empty())
415  return false;
416 
417  if (checkSystemCall(CE, Name, C))
418  return true;
419 
420  if (checkTaintedBufferSize(CE, FDecl, C))
421  return true;
422 
423  return false;
424 }
425 
426 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
427  const Expr* Arg) {
428  ProgramStateRef State = C.getState();
429  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
430  if (AddrVal.isUnknownOrUndef())
431  return nullptr;
432 
433  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
434  if (!AddrLoc)
435  return nullptr;
436 
437  const PointerType *ArgTy =
438  dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
439  SVal Val = State->getSVal(*AddrLoc,
440  ArgTy ? ArgTy->getPointeeType(): QualType());
441  return Val.getAsSymbol();
442 }
443 
445 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
446  CheckerContext &C) const {
447  ProgramStateRef State = C.getState();
448 
449  // Check for taint in arguments.
450  bool IsTainted = false;
451  for (ArgVector::const_iterator I = SrcArgs.begin(),
452  E = SrcArgs.end(); I != E; ++I) {
453  unsigned ArgNum = *I;
454 
455  if (ArgNum == InvalidArgIndex) {
456  // Check if any of the arguments is tainted, but skip the
457  // destination arguments.
458  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
459  if (isDestinationArgument(i))
460  continue;
461  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
462  break;
463  }
464  break;
465  }
466 
467  if (CE->getNumArgs() < (ArgNum + 1))
468  return State;
469  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
470  break;
471  }
472  if (!IsTainted)
473  return State;
474 
475  // Mark the arguments which should be tainted after the function returns.
476  for (ArgVector::const_iterator I = DstArgs.begin(),
477  E = DstArgs.end(); I != E; ++I) {
478  unsigned ArgNum = *I;
479 
480  // Should we mark all arguments as tainted?
481  if (ArgNum == InvalidArgIndex) {
482  // For all pointer and references that were passed in:
483  // If they are not pointing to const data, mark data as tainted.
484  // TODO: So far we are just going one level down; ideally we'd need to
485  // recurse here.
486  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
487  const Expr *Arg = CE->getArg(i);
488  // Process pointer argument.
489  const Type *ArgTy = Arg->getType().getTypePtr();
490  QualType PType = ArgTy->getPointeeType();
491  if ((!PType.isNull() && !PType.isConstQualified())
492  || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
493  State = State->add<TaintArgsOnPostVisit>(i);
494  }
495  continue;
496  }
497 
498  // Should mark the return value?
499  if (ArgNum == ReturnValueIndex) {
500  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
501  continue;
502  }
503 
504  // Mark the given argument.
505  assert(ArgNum < CE->getNumArgs());
506  State = State->add<TaintArgsOnPostVisit>(ArgNum);
507  }
508 
509  return State;
510 }
511 
512 
513 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
514 // and arg 1 should get taint.
515 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
516  CheckerContext &C) const {
517  assert(CE->getNumArgs() >= 2);
518  ProgramStateRef State = C.getState();
519 
520  // Check is the file descriptor is tainted.
521  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
522  isStdin(CE->getArg(0), C)) {
523  // All arguments except for the first two should get taint.
524  for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
525  State = State->add<TaintArgsOnPostVisit>(i);
526  return State;
527  }
528 
529  return nullptr;
530 }
531 
532 
533 // If argument 0(protocol domain) is network, the return value should get taint.
534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535  CheckerContext &C) const {
536  ProgramStateRef State = C.getState();
537  if (CE->getNumArgs() < 3)
538  return State;
539 
540  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542  // White list the internal communication protocols.
543  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545  return State;
546  State = State->addTaint(CE, C.getLocationContext());
547  return State;
548 }
549 
550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551  CheckerContext &C) const {
552  ProgramStateRef State = C.getState();
553  if (CE->getNumArgs() < 2)
554  return State;
555 
556  // All arguments except for the very first one should get taint.
557  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558  // The arguments are pointer arguments. The data they are pointing at is
559  // tainted after the call.
560  const Expr* Arg = CE->getArg(i);
561  SymbolRef Sym = getPointedToSymbol(C, Arg);
562  if (Sym)
563  State = State->addTaint(Sym);
564  }
565  return State;
566 }
567 
568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569  CheckerContext &C) const {
570  return C.getState()->addTaint(CE, C.getLocationContext());
571 }
572 
573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574  ProgramStateRef State = C.getState();
575  SVal Val = State->getSVal(E, C.getLocationContext());
576 
577  // stdin is a pointer, so it would be a region.
578  const MemRegion *MemReg = Val.getAsRegion();
579 
580  // The region should be symbolic, we do not know it's value.
581  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582  if (!SymReg)
583  return false;
584 
585  // Get it's symbol and find the declaration region it's pointing to.
586  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
587  if (!Sm)
588  return false;
589  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
590  if (!DeclReg)
591  return false;
592 
593  // This region corresponds to a declaration, find out if it's a global/extern
594  // variable named stdin with the proper type.
595  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
596  D = D->getCanonicalDecl();
597  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
598  if (const PointerType * PtrTy =
599  dyn_cast<PointerType>(D->getType().getTypePtr()))
600  if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
601  return true;
602  }
603  return false;
604 }
605 
606 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
607  const CheckerContext &C,
608  unsigned int &ArgNum) {
609  // Find if the function contains a format string argument.
610  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
611  // vsnprintf, syslog, custom annotated functions.
612  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
613  if (!FDecl)
614  return false;
615  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
616  ArgNum = Format->getFormatIdx() - 1;
617  if ((Format->getType()->getName() == "printf") &&
618  CE->getNumArgs() > ArgNum)
619  return true;
620  }
621 
622  // Or if a function is named setproctitle (this is a heuristic).
623  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
624  ArgNum = 0;
625  return true;
626  }
627 
628  return false;
629 }
630 
631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
632  const char Msg[],
633  CheckerContext &C) const {
634  assert(E);
635 
636  // Check for taint.
637  ProgramStateRef State = C.getState();
638  if (!State->isTainted(getPointedToSymbol(C, E)) &&
639  !State->isTainted(E, C.getLocationContext()))
640  return false;
641 
642  // Generate diagnostic.
644  initBugType();
645  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
646  report->addRange(E->getSourceRange());
647  C.emitReport(std::move(report));
648  return true;
649  }
650  return false;
651 }
652 
653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
654  CheckerContext &C) const{
655  // Check if the function contains a format string argument.
656  unsigned int ArgNum = 0;
657  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
658  return false;
659 
660  // If either the format string content or the pointer itself are tainted, warn.
661  return generateReportIfTainted(CE->getArg(ArgNum),
662  MsgUncontrolledFormatString, C);
663 }
664 
665 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
666  StringRef Name,
667  CheckerContext &C) const {
668  // TODO: It might make sense to run this check on demand. In some cases,
669  // we should check if the environment has been cleansed here. We also might
670  // need to know if the user was reset before these calls(seteuid).
671  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
672  .Case("system", 0)
673  .Case("popen", 0)
674  .Case("execl", 0)
675  .Case("execle", 0)
676  .Case("execlp", 0)
677  .Case("execv", 0)
678  .Case("execvp", 0)
679  .Case("execvP", 0)
680  .Case("execve", 0)
681  .Case("dlopen", 0)
682  .Default(UINT_MAX);
683 
684  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
685  return false;
686 
687  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
688 }
689 
690 // TODO: Should this check be a part of the CString checker?
691 // If yes, should taint be a global setting?
692 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
693  const FunctionDecl *FDecl,
694  CheckerContext &C) const {
695  // If the function has a buffer size argument, set ArgNum.
696  unsigned ArgNum = InvalidArgIndex;
697  unsigned BId = 0;
698  if ( (BId = FDecl->getMemoryFunctionKind()) )
699  switch(BId) {
700  case Builtin::BImemcpy:
701  case Builtin::BImemmove:
702  case Builtin::BIstrncpy:
703  ArgNum = 2;
704  break;
705  case Builtin::BIstrndup:
706  ArgNum = 1;
707  break;
708  default:
709  break;
710  };
711 
712  if (ArgNum == InvalidArgIndex) {
713  if (C.isCLibraryFunction(FDecl, "malloc") ||
714  C.isCLibraryFunction(FDecl, "calloc") ||
715  C.isCLibraryFunction(FDecl, "alloca"))
716  ArgNum = 0;
717  else if (C.isCLibraryFunction(FDecl, "memccpy"))
718  ArgNum = 3;
719  else if (C.isCLibraryFunction(FDecl, "realloc"))
720  ArgNum = 1;
721  else if (C.isCLibraryFunction(FDecl, "bcopy"))
722  ArgNum = 2;
723  }
724 
725  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
726  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
727 }
728 
729 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
730  mgr.registerChecker<GenericTaintChecker>();
731 }
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1561
StringRef getCalleeName(const FunctionDecl *FunDecl) const
Get the name of the called function (path-sensitive).
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2179
A (possibly-)qualified type.
Definition: Type.h:598
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:79
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2217
const Decl * getDecl() const
Definition: MemRegion.h:849
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
Definition: Type.h:1281
VarDecl - An instance of this class is created to represent a variable declaration or definition...
Definition: Decl.h:768
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Symbolic value.
Definition: SymExpr.h:29
class LLVM_ALIGNAS(8) DependentTemplateSpecializationType const IdentifierInfo * Name
Represents a template specialization type whose template cannot be resolved, e.g. ...
Definition: Type.h:4549
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
LineState State
bool isReferenceType() const
Definition: Type.h:5491
SymbolRef getSymbol() const
Definition: MemRegion.h:715
bool isUnknownOrUndef() const
Definition: SVals.h:125
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
#define UINT_MAX
Definition: limits.h:72
detail::InMemoryDirectory::const_iterator I
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:415
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:707
Expr - This represents one expression.
Definition: Expr.h:105
const ProgramStateRef & getState() const
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
Definition: SVals.h:86
QualType getFILEType() const
Retrieve the C FILE type.
Definition: ASTContext.h:1501
const TypedValueRegion * getRegion() const
Definition: SymbolManager.h:49
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
CHECKER * registerChecker()
Used to register checkers.
Encodes a location in the source.
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:5259
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
Definition: SVals.h:46
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3350
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:193
QualType getPointeeType() const
Definition: Type.h:2193
QualType getType() const
Definition: Expr.h:126
A symbol representing the value stored at a MemRegion.
Definition: SymbolManager.h:42
StringRef getMacroNameOrSpelling(SourceLocation &Loc)
Depending on wither the location corresponds to a macro, return either the macro name or the token sp...
detail::InMemoryDirectory::const_iterator E
const MemRegion * getAsRegion() const
Definition: SVals.cpp:135
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2205
QualType getCanonicalType() const
Definition: Type.h:5298
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2148
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:5318
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:665
Defines enum values for all the target-independent builtin functions.
unsigned getNumArgs() const
Retrieve the number of template arguments.
Definition: Type.h:4247
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.
Definition: Expr.cpp:2295
const LocationContext * getLocationContext() const
bool isPointerType() const
Definition: Type.h:5482