clang  3.9.0
ScanfFormatString.cpp
Go to the documentation of this file.
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18 
27 using namespace clang;
28 
31 
34  const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38 
39  // No more characters?
40  if (I == E) {
41  H.HandleIncompleteScanList(start, I);
42  return true;
43  }
44 
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47  if (++I == E) {
48  H.HandleIncompleteScanList(start, I - 1);
49  return true;
50  }
51  }
52 
53  // Special case: "^]" are the first characters.
54  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55  I += 2;
56  if (I == E) {
57  H.HandleIncompleteScanList(start, I - 1);
58  return true;
59  }
60  }
61 
62  // Look for a ']' character which denotes the end of the scan list.
63  while (*I != ']') {
64  if (++I == E) {
65  H.HandleIncompleteScanList(start, I - 1);
66  return true;
67  }
68  }
69 
70  CS.setEndScanList(I);
71  return false;
72 }
73 
74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75 // We can possibly refactor.
77  const char *&Beg,
78  const char *E,
79  unsigned &argIndex,
80  const LangOptions &LO,
81  const TargetInfo &Target) {
82  using namespace clang::analyze_format_string;
83  using namespace clang::analyze_scanf;
84  const char *I = Beg;
85  const char *Start = nullptr;
86  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87 
88  // Look for a '%' character that indicates the start of a format specifier.
89  for ( ; I != E ; ++I) {
90  char c = *I;
91  if (c == '\0') {
92  // Detect spurious null characters, which are likely errors.
93  H.HandleNullChar(I);
94  return true;
95  }
96  if (c == '%') {
97  Start = I++; // Record the start of the format specifier.
98  break;
99  }
100  }
101 
102  // No format specifier found?
103  if (!Start)
104  return false;
105 
106  if (I == E) {
107  // No more characters left?
108  H.HandleIncompleteSpecifier(Start, E - Start);
109  return true;
110  }
111 
112  ScanfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114  return true;
115 
116  if (I == E) {
117  // No more characters left?
118  H.HandleIncompleteSpecifier(Start, E - Start);
119  return true;
120  }
121 
122  // Look for '*' flag if it is present.
123  if (*I == '*') {
124  FS.setSuppressAssignment(I);
125  if (++I == E) {
126  H.HandleIncompleteSpecifier(Start, E - Start);
127  return true;
128  }
129  }
130 
131  // Look for the field width (if any). Unlike printf, this is either
132  // a fixed integer or isn't present.
134  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135  assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136  FS.setFieldWidth(Amt);
137 
138  if (I == E) {
139  // No more characters left?
140  H.HandleIncompleteSpecifier(Start, E - Start);
141  return true;
142  }
143  }
144 
145  // Look for the length modifier.
146  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147  // No more characters left?
148  H.HandleIncompleteSpecifier(Start, E - Start);
149  return true;
150  }
151 
152  // Detect spurious null characters, which are likely errors.
153  if (*I == '\0') {
154  H.HandleNullChar(I);
155  return true;
156  }
157 
158  // Finally, look for the conversion specifier.
159  const char *conversionPosition = I++;
160  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161  switch (*conversionPosition) {
162  default:
163  break;
164  case '%': k = ConversionSpecifier::PercentArg; break;
165  case 'A': k = ConversionSpecifier::AArg; break;
166  case 'E': k = ConversionSpecifier::EArg; break;
167  case 'F': k = ConversionSpecifier::FArg; break;
168  case 'G': k = ConversionSpecifier::GArg; break;
169  case 'X': k = ConversionSpecifier::XArg; break;
170  case 'a': k = ConversionSpecifier::aArg; break;
171  case 'd': k = ConversionSpecifier::dArg; break;
172  case 'e': k = ConversionSpecifier::eArg; break;
173  case 'f': k = ConversionSpecifier::fArg; break;
174  case 'g': k = ConversionSpecifier::gArg; break;
175  case 'i': k = ConversionSpecifier::iArg; break;
176  case 'n': k = ConversionSpecifier::nArg; break;
177  case 'c': k = ConversionSpecifier::cArg; break;
178  case 'C': k = ConversionSpecifier::CArg; break;
179  case 'S': k = ConversionSpecifier::SArg; break;
180  case '[': k = ConversionSpecifier::ScanListArg; break;
181  case 'u': k = ConversionSpecifier::uArg; break;
182  case 'x': k = ConversionSpecifier::xArg; break;
183  case 'o': k = ConversionSpecifier::oArg; break;
184  case 's': k = ConversionSpecifier::sArg; break;
185  case 'p': k = ConversionSpecifier::pArg; break;
186  // Apple extensions
187  // Apple-specific
188  case 'D':
189  if (Target.getTriple().isOSDarwin())
191  break;
192  case 'O':
193  if (Target.getTriple().isOSDarwin())
195  break;
196  case 'U':
197  if (Target.getTriple().isOSDarwin())
199  break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
202  if (k == ScanfConversionSpecifier::ScanListArg) {
203  if (ParseScanList(H, CS, I, E))
204  return true;
205  }
206  FS.setConversionSpecifier(CS);
207  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208  && !FS.usesPositionalArg())
209  FS.setArgIndex(argIndex++);
210 
211  // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 
214  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215  unsigned Len = I - Beg;
216  if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217  CS.setEndScanList(Beg + Len);
218  FS.setConversionSpecifier(CS);
219  }
220  // Assume the conversion takes one argument.
221  return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222  }
223  return ScanfSpecifierResult(Start, FS);
224 }
225 
226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227  const ScanfConversionSpecifier &CS = getConversionSpecifier();
228 
229  if (!CS.consumesDataArgument())
230  return ArgType::Invalid();
231 
232  switch(CS.getKind()) {
233  // Signed int.
237  switch (LM.getKind()) {
239  return ArgType::PtrTo(Ctx.IntTy);
243  return ArgType::PtrTo(Ctx.ShortTy);
245  return ArgType::PtrTo(Ctx.LongTy);
248  return ArgType::PtrTo(Ctx.LongLongTy);
250  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
252  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
254  // FIXME: ssize_t.
255  return ArgType();
257  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
259  // GNU extension.
260  return ArgType::PtrTo(Ctx.LongLongTy);
266  return ArgType::Invalid();
267  }
268 
269  // Unsigned int.
276  switch (LM.getKind()) {
278  return ArgType::PtrTo(Ctx.UnsignedIntTy);
280  return ArgType::PtrTo(Ctx.UnsignedCharTy);
282  return ArgType::PtrTo(Ctx.UnsignedShortTy);
284  return ArgType::PtrTo(Ctx.UnsignedLongTy);
289  return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
291  return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
293  return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
295  // FIXME: Unsigned version of ptrdiff_t?
296  return ArgType();
298  // GNU extension.
305  return ArgType::Invalid();
306  }
307 
308  // Float.
317  switch (LM.getKind()) {
319  return ArgType::PtrTo(Ctx.FloatTy);
321  return ArgType::PtrTo(Ctx.DoubleTy);
323  return ArgType::PtrTo(Ctx.LongDoubleTy);
324  default:
325  return ArgType::Invalid();
326  }
327 
328  // Char, string and scanlist.
332  switch (LM.getKind()) {
337  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
342  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
344  default:
345  return ArgType::Invalid();
346  }
349  // FIXME: Mac OS X specific?
350  switch (LM.getKind()) {
353  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
356  return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
358  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
360  default:
361  return ArgType::Invalid();
362  }
363 
364  // Pointer.
367 
368  // Write-back.
370  switch (LM.getKind()) {
372  return ArgType::PtrTo(Ctx.IntTy);
374  return ArgType::PtrTo(Ctx.SignedCharTy);
376  return ArgType::PtrTo(Ctx.ShortTy);
378  return ArgType::PtrTo(Ctx.LongTy);
381  return ArgType::PtrTo(Ctx.LongLongTy);
383  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
385  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
387  return ArgType(); // FIXME: ssize_t
389  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
391  return ArgType(); // FIXME: Is this a known extension?
397  return ArgType::Invalid();
398  }
399 
400  default:
401  break;
402  }
403 
404  return ArgType();
405 }
406 
407 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
408  const LangOptions &LangOpt,
409  ASTContext &Ctx) {
410 
411  // %n is different from other conversion specifiers; don't try to fix it.
412  if (CS.getKind() == ConversionSpecifier::nArg)
413  return false;
414 
415  if (!QT->isPointerType())
416  return false;
417 
418  QualType PT = QT->getPointeeType();
419 
420  // If it's an enum, get its underlying type.
421  if (const EnumType *ETy = PT->getAs<EnumType>())
422  PT = ETy->getDecl()->getIntegerType();
423 
424  const BuiltinType *BT = PT->getAs<BuiltinType>();
425  if (!BT)
426  return false;
427 
428  // Pointer to a character.
429  if (PT->isAnyCharacterType()) {
430  CS.setKind(ConversionSpecifier::sArg);
431  if (PT->isWideCharType())
432  LM.setKind(LengthModifier::AsWideChar);
433  else
434  LM.setKind(LengthModifier::None);
435 
436  // If we know the target array length, we can use it as a field width.
437  if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
438  if (CAT->getSizeModifier() == ArrayType::Normal)
439  FieldWidth = OptionalAmount(OptionalAmount::Constant,
440  CAT->getSize().getZExtValue() - 1,
441  "", 0, false);
442 
443  }
444  return true;
445  }
446 
447  // Figure out the length modifier.
448  switch (BT->getKind()) {
449  // no modifier
450  case BuiltinType::UInt:
451  case BuiltinType::Int:
452  case BuiltinType::Float:
453  LM.setKind(LengthModifier::None);
454  break;
455 
456  // hh
457  case BuiltinType::Char_U:
458  case BuiltinType::UChar:
459  case BuiltinType::Char_S:
460  case BuiltinType::SChar:
461  LM.setKind(LengthModifier::AsChar);
462  break;
463 
464  // h
465  case BuiltinType::Short:
466  case BuiltinType::UShort:
467  LM.setKind(LengthModifier::AsShort);
468  break;
469 
470  // l
471  case BuiltinType::Long:
472  case BuiltinType::ULong:
473  case BuiltinType::Double:
474  LM.setKind(LengthModifier::AsLong);
475  break;
476 
477  // ll
478  case BuiltinType::LongLong:
479  case BuiltinType::ULongLong:
480  LM.setKind(LengthModifier::AsLongLong);
481  break;
482 
483  // L
484  case BuiltinType::LongDouble:
485  LM.setKind(LengthModifier::AsLongDouble);
486  break;
487 
488  // Don't know.
489  default:
490  return false;
491  }
492 
493  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
494  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
495  namedTypeToLengthModifier(PT, LM);
496 
497  // If fixing the length modifier was enough, we are done.
498  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
499  const analyze_scanf::ArgType &AT = getArgType(Ctx);
500  if (AT.isValid() && AT.matchesType(Ctx, QT))
501  return true;
502  }
503 
504  // Figure out the conversion specifier.
505  if (PT->isRealFloatingType())
506  CS.setKind(ConversionSpecifier::fArg);
507  else if (PT->isSignedIntegerType())
508  CS.setKind(ConversionSpecifier::dArg);
509  else if (PT->isUnsignedIntegerType())
510  CS.setKind(ConversionSpecifier::uArg);
511  else
512  llvm_unreachable("Unexpected type");
513 
514  return true;
515 }
516 
517 void ScanfSpecifier::toString(raw_ostream &os) const {
518  os << "%";
519 
520  if (usesPositionalArg())
521  os << getPositionalArgIndex() << "$";
522  if (SuppressAssignment)
523  os << "*";
524 
525  FieldWidth.toString(os);
526  os << LM.toString();
527  os << CS.toString();
528 }
529 
531  const char *I,
532  const char *E,
533  const LangOptions &LO,
534  const TargetInfo &Target) {
535 
536  unsigned argIndex = 0;
537 
538  // Keep looking for a format specifier until we have exhausted the string.
539  while (I != E) {
540  const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
541  LO, Target);
542  // Did a fail-stop error of any kind occur when parsing the specifier?
543  // If so, don't do any more processing.
544  if (FSR.shouldStop())
545  return true;
546  // Did we exhaust the string or encounter an error that
547  // we can recover from?
548  if (!FSR.hasValue())
549  continue;
550  // We have a format specifier. Pass it to the callback.
551  if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
552  I - FSR.getStart())) {
553  return true;
554  }
555  }
556  assert(I == E && "Format string not exhausted");
557  return false;
558 }
Kind getKind() const
Definition: Type.h:2060
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
CanQualType LongLongTy
Definition: ASTContext.h:901
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:655
A (possibly-)qualified type.
Definition: Type.h:598
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition: Type.cpp:1686
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E)
CanQualType LongTy
Definition: ASTContext.h:901
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:92
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:48
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:588
Represents the length modifier in a format string in scanf/printf.
Definition: FormatString.h:65
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:1746
CanQualType LongDoubleTy
Definition: ASTContext.h:904
detail::InMemoryDirectory::const_iterator I
CanQualType UnsignedCharTy
Definition: ASTContext.h:902
static ArgType PtrTo(const ArgType &A)
Create an ArgType which corresponds to the type pointer to A.
Definition: FormatString.h:259
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:415
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:1799
Exposes information about the current target.
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
CanQualType ShortTy
Definition: ASTContext.h:901
virtual void HandleNullChar(const char *nullCharacter)
Definition: FormatString.h:610
QualType getWideCharType() const
Return the type of wide characters.
Definition: ASTContext.h:1346
CanQualType SignedCharTy
Definition: ASTContext.h:901
Kind
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums...
Definition: Type.h:3733
CanQualType FloatTy
Definition: ASTContext.h:904
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2114
CanQualType UnsignedShortTy
Definition: ASTContext.h:902
CanQualType UnsignedLongLongTy
Definition: ASTContext.h:903
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
detail::InMemoryDirectory::const_iterator E
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len)
Returns true if the invalid specifier in SpecifierBegin is a UTF-8 string; check that it won't go fur...
bool isWideCharType() const
Definition: Type.cpp:1665
std::string toString(const til::SExpr *E)
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>.
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:5818
CanQualType UnsignedLongTy
Definition: ASTContext.h:902
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf=false)
Returns true if a LengthModifier was parsed and installed in the FormatSpecifier& argument...
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:619
This class is used for builtin types like 'int'.
Definition: Type.h:2039
Defines the clang::TargetInfo interface.
OptionalAmount ParseAmount(const char *&Beg, const char *E)
CanQualType IntTy
Definition: ASTContext.h:901
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1706
virtual void HandleIncompleteScanList(const char *start, const char *end)
Definition: FormatString.h:661
CanQualType DoubleTy
Definition: ASTContext.h:904
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:648
Represents the canonical version of C arrays with a specified constant size.
Definition: Type.h:2512
CanQualType UnsignedIntTy
Definition: ASTContext.h:902
bool isPointerType() const
Definition: Type.h:5482