clang  6.0.0svn
ScanfFormatString.cpp
Go to the documentation of this file.
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18 
27 using namespace clang;
28 
31 
34  const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38 
39  // No more characters?
40  if (I == E) {
41  H.HandleIncompleteScanList(start, I);
42  return true;
43  }
44 
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47  if (++I == E) {
48  H.HandleIncompleteScanList(start, I - 1);
49  return true;
50  }
51  }
52 
53  // Special case: "^]" are the first characters.
54  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55  I += 2;
56  if (I == E) {
57  H.HandleIncompleteScanList(start, I - 1);
58  return true;
59  }
60  }
61 
62  // Look for a ']' character which denotes the end of the scan list.
63  while (*I != ']') {
64  if (++I == E) {
65  H.HandleIncompleteScanList(start, I - 1);
66  return true;
67  }
68  }
69 
70  CS.setEndScanList(I);
71  return false;
72 }
73 
74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75 // We can possibly refactor.
77  const char *&Beg,
78  const char *E,
79  unsigned &argIndex,
80  const LangOptions &LO,
81  const TargetInfo &Target) {
82  using namespace clang::analyze_format_string;
83  using namespace clang::analyze_scanf;
84  const char *I = Beg;
85  const char *Start = nullptr;
86  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87 
88  // Look for a '%' character that indicates the start of a format specifier.
89  for ( ; I != E ; ++I) {
90  char c = *I;
91  if (c == '\0') {
92  // Detect spurious null characters, which are likely errors.
93  H.HandleNullChar(I);
94  return true;
95  }
96  if (c == '%') {
97  Start = I++; // Record the start of the format specifier.
98  break;
99  }
100  }
101 
102  // No format specifier found?
103  if (!Start)
104  return false;
105 
106  if (I == E) {
107  // No more characters left?
108  H.HandleIncompleteSpecifier(Start, E - Start);
109  return true;
110  }
111 
112  ScanfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114  return true;
115 
116  if (I == E) {
117  // No more characters left?
118  H.HandleIncompleteSpecifier(Start, E - Start);
119  return true;
120  }
121 
122  // Look for '*' flag if it is present.
123  if (*I == '*') {
124  FS.setSuppressAssignment(I);
125  if (++I == E) {
126  H.HandleIncompleteSpecifier(Start, E - Start);
127  return true;
128  }
129  }
130 
131  // Look for the field width (if any). Unlike printf, this is either
132  // a fixed integer or isn't present.
135  assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136  FS.setFieldWidth(Amt);
137 
138  if (I == E) {
139  // No more characters left?
140  H.HandleIncompleteSpecifier(Start, E - Start);
141  return true;
142  }
143  }
144 
145  // Look for the length modifier.
146  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147  // No more characters left?
148  H.HandleIncompleteSpecifier(Start, E - Start);
149  return true;
150  }
151 
152  // Detect spurious null characters, which are likely errors.
153  if (*I == '\0') {
154  H.HandleNullChar(I);
155  return true;
156  }
157 
158  // Finally, look for the conversion specifier.
159  const char *conversionPosition = I++;
160  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161  switch (*conversionPosition) {
162  default:
163  break;
164  case '%': k = ConversionSpecifier::PercentArg; break;
165  case 'A': k = ConversionSpecifier::AArg; break;
166  case 'E': k = ConversionSpecifier::EArg; break;
167  case 'F': k = ConversionSpecifier::FArg; break;
168  case 'G': k = ConversionSpecifier::GArg; break;
169  case 'X': k = ConversionSpecifier::XArg; break;
170  case 'a': k = ConversionSpecifier::aArg; break;
171  case 'd': k = ConversionSpecifier::dArg; break;
172  case 'e': k = ConversionSpecifier::eArg; break;
173  case 'f': k = ConversionSpecifier::fArg; break;
174  case 'g': k = ConversionSpecifier::gArg; break;
175  case 'i': k = ConversionSpecifier::iArg; break;
176  case 'n': k = ConversionSpecifier::nArg; break;
177  case 'c': k = ConversionSpecifier::cArg; break;
178  case 'C': k = ConversionSpecifier::CArg; break;
179  case 'S': k = ConversionSpecifier::SArg; break;
180  case '[': k = ConversionSpecifier::ScanListArg; break;
181  case 'u': k = ConversionSpecifier::uArg; break;
182  case 'x': k = ConversionSpecifier::xArg; break;
183  case 'o': k = ConversionSpecifier::oArg; break;
184  case 's': k = ConversionSpecifier::sArg; break;
185  case 'p': k = ConversionSpecifier::pArg; break;
186  // Apple extensions
187  // Apple-specific
188  case 'D':
189  if (Target.getTriple().isOSDarwin())
191  break;
192  case 'O':
193  if (Target.getTriple().isOSDarwin())
195  break;
196  case 'U':
197  if (Target.getTriple().isOSDarwin())
199  break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
202  if (k == ScanfConversionSpecifier::ScanListArg) {
203  if (ParseScanList(H, CS, I, E))
204  return true;
205  }
206  FS.setConversionSpecifier(CS);
208  && !FS.usesPositionalArg())
209  FS.setArgIndex(argIndex++);
210 
211  // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 
214  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215  unsigned Len = I - Beg;
216  if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217  CS.setEndScanList(Beg + Len);
218  FS.setConversionSpecifier(CS);
219  }
220  // Assume the conversion takes one argument.
221  return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222  }
223  return ScanfSpecifierResult(Start, FS);
224 }
225 
226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227  const ScanfConversionSpecifier &CS = getConversionSpecifier();
228 
229  if (!CS.consumesDataArgument())
230  return ArgType::Invalid();
231 
232  switch(CS.getKind()) {
233  // Signed int.
237  switch (LM.getKind()) {
239  return ArgType::PtrTo(Ctx.IntTy);
243  return ArgType::PtrTo(Ctx.ShortTy);
245  return ArgType::PtrTo(Ctx.LongTy);
248  return ArgType::PtrTo(Ctx.LongLongTy);
250  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
252  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
254  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
256  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
258  // GNU extension.
259  return ArgType::PtrTo(Ctx.LongLongTy);
265  return ArgType::Invalid();
266  }
267 
268  // Unsigned int.
275  switch (LM.getKind()) {
277  return ArgType::PtrTo(Ctx.UnsignedIntTy);
279  return ArgType::PtrTo(Ctx.UnsignedCharTy);
281  return ArgType::PtrTo(Ctx.UnsignedShortTy);
283  return ArgType::PtrTo(Ctx.UnsignedLongTy);
288  return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290  return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292  return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294  return ArgType::PtrTo(
295  ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
297  // GNU extension.
304  return ArgType::Invalid();
305  }
306 
307  // Float.
316  switch (LM.getKind()) {
318  return ArgType::PtrTo(Ctx.FloatTy);
320  return ArgType::PtrTo(Ctx.DoubleTy);
322  return ArgType::PtrTo(Ctx.LongDoubleTy);
323  default:
324  return ArgType::Invalid();
325  }
326 
327  // Char, string and scanlist.
331  switch (LM.getKind()) {
336  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
341  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
343  LLVM_FALLTHROUGH;
344  default:
345  return ArgType::Invalid();
346  }
349  // FIXME: Mac OS X specific?
350  switch (LM.getKind()) {
353  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
356  return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
358  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
360  LLVM_FALLTHROUGH;
361  default:
362  return ArgType::Invalid();
363  }
364 
365  // Pointer.
368 
369  // Write-back.
371  switch (LM.getKind()) {
373  return ArgType::PtrTo(Ctx.IntTy);
375  return ArgType::PtrTo(Ctx.SignedCharTy);
377  return ArgType::PtrTo(Ctx.ShortTy);
379  return ArgType::PtrTo(Ctx.LongTy);
382  return ArgType::PtrTo(Ctx.LongLongTy);
384  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
386  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
388  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
390  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
392  return ArgType(); // FIXME: Is this a known extension?
398  return ArgType::Invalid();
399  }
400 
401  default:
402  break;
403  }
404 
405  return ArgType();
406 }
407 
408 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
409  const LangOptions &LangOpt,
410  ASTContext &Ctx) {
411 
412  // %n is different from other conversion specifiers; don't try to fix it.
413  if (CS.getKind() == ConversionSpecifier::nArg)
414  return false;
415 
416  if (!QT->isPointerType())
417  return false;
418 
419  QualType PT = QT->getPointeeType();
420 
421  // If it's an enum, get its underlying type.
422  if (const EnumType *ETy = PT->getAs<EnumType>()) {
423  // Don't try to fix incomplete enums.
424  if (!ETy->getDecl()->isComplete())
425  return false;
426  PT = ETy->getDecl()->getIntegerType();
427  }
428 
429  const BuiltinType *BT = PT->getAs<BuiltinType>();
430  if (!BT)
431  return false;
432 
433  // Pointer to a character.
434  if (PT->isAnyCharacterType()) {
435  CS.setKind(ConversionSpecifier::sArg);
436  if (PT->isWideCharType())
437  LM.setKind(LengthModifier::AsWideChar);
438  else
439  LM.setKind(LengthModifier::None);
440 
441  // If we know the target array length, we can use it as a field width.
442  if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
443  if (CAT->getSizeModifier() == ArrayType::Normal)
445  CAT->getSize().getZExtValue() - 1,
446  "", 0, false);
447 
448  }
449  return true;
450  }
451 
452  // Figure out the length modifier.
453  switch (BT->getKind()) {
454  // no modifier
455  case BuiltinType::UInt:
456  case BuiltinType::Int:
457  case BuiltinType::Float:
458  LM.setKind(LengthModifier::None);
459  break;
460 
461  // hh
462  case BuiltinType::Char_U:
463  case BuiltinType::UChar:
464  case BuiltinType::Char_S:
465  case BuiltinType::SChar:
466  LM.setKind(LengthModifier::AsChar);
467  break;
468 
469  // h
470  case BuiltinType::Short:
471  case BuiltinType::UShort:
472  LM.setKind(LengthModifier::AsShort);
473  break;
474 
475  // l
476  case BuiltinType::Long:
477  case BuiltinType::ULong:
478  case BuiltinType::Double:
479  LM.setKind(LengthModifier::AsLong);
480  break;
481 
482  // ll
483  case BuiltinType::LongLong:
484  case BuiltinType::ULongLong:
485  LM.setKind(LengthModifier::AsLongLong);
486  break;
487 
488  // L
489  case BuiltinType::LongDouble:
490  LM.setKind(LengthModifier::AsLongDouble);
491  break;
492 
493  // Don't know.
494  default:
495  return false;
496  }
497 
498  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
499  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
500  namedTypeToLengthModifier(PT, LM);
501 
502  // If fixing the length modifier was enough, we are done.
503  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
504  const analyze_scanf::ArgType &AT = getArgType(Ctx);
505  if (AT.isValid() && AT.matchesType(Ctx, QT))
506  return true;
507  }
508 
509  // Figure out the conversion specifier.
510  if (PT->isRealFloatingType())
511  CS.setKind(ConversionSpecifier::fArg);
512  else if (PT->isSignedIntegerType())
513  CS.setKind(ConversionSpecifier::dArg);
514  else if (PT->isUnsignedIntegerType())
515  CS.setKind(ConversionSpecifier::uArg);
516  else
517  llvm_unreachable("Unexpected type");
518 
519  return true;
520 }
521 
522 void ScanfSpecifier::toString(raw_ostream &os) const {
523  os << "%";
524 
525  if (usesPositionalArg())
526  os << getPositionalArgIndex() << "$";
527  if (SuppressAssignment)
528  os << "*";
529 
530  FieldWidth.toString(os);
531  os << LM.toString();
532  os << CS.toString();
533 }
534 
536  const char *I,
537  const char *E,
538  const LangOptions &LO,
539  const TargetInfo &Target) {
540 
541  unsigned argIndex = 0;
542 
543  // Keep looking for a format specifier until we have exhausted the string.
544  while (I != E) {
545  const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
546  LO, Target);
547  // Did a fail-stop error of any kind occur when parsing the specifier?
548  // If so, don't do any more processing.
549  if (FSR.shouldStop())
550  return true;
551  // Did we exhaust the string or encounter an error that
552  // we can recover from?
553  if (!FSR.hasValue())
554  continue;
555  // We have a format specifier. Pass it to the callback.
556  if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
557  I - FSR.getStart())) {
558  return true;
559  }
560  }
561  assert(I == E && "Format string not exhausted");
562  return false;
563 }
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
CanQualType LongLongTy
Definition: ASTContext.h:973
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:674
A (possibly-)qualified type.
Definition: Type.h:614
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>.
const OptionalFlag & getSuppressAssignment() const
Definition: FormatString.h:589
Kind getKind() const
Definition: Type.h:2103
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:435
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:782
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:1858
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E)
CanQualType LongTy
Definition: ASTContext.h:973
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:645
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:1805
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6099
bool isWideCharType() const
Definition: Type.cpp:1724
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:128
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:48
QualType getUnsignedPointerDiffType() const
Return the unique unsigned counterpart of "ptrdiff_t" integer type.
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
void setFieldWidth(const OptionalAmount &Amt)
Definition: FormatString.h:394
MatchKind matchesType(ASTContext &C, QualType argTy) const
Represents the length modifier in a format string in scanf/printf.
Definition: FormatString.h:65
CanQualType LongDoubleTy
Definition: ASTContext.h:976
CanQualType UnsignedCharTy
Definition: ASTContext.h:974
static ArgType PtrTo(const ArgType &A)
Create an ArgType which corresponds to the type pointer to A.
Definition: FormatString.h:272
Exposes information about the current target.
Definition: TargetInfo.h:55
CanQualType ShortTy
Definition: ASTContext.h:973
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1765
virtual void HandleNullChar(const char *nullCharacter)
Definition: FormatString.h:629
CanQualType SignedCharTy
Definition: ASTContext.h:973
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
QualType getWideCharType() const
Return the type of wide characters.
Definition: ASTContext.h:1477
Kind
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums...
Definition: Type.h:3864
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2272
CanQualType FloatTy
Definition: ASTContext.h:976
CanQualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t. ...
void setConversionSpecifier(const ScanfConversionSpecifier &cs)
Definition: FormatString.h:593
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition: Type.cpp:1745
CanQualType UnsignedShortTy
Definition: ASTContext.h:974
Dataflow Directional Tag Classes.
void setSuppressAssignment(const char *position)
Definition: FormatString.h:585
Pieces specific to fscanf format strings.
Definition: FormatString.h:557
CanQualType UnsignedLongLongTy
Definition: ASTContext.h:975
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len)
Returns true if the invalid specifier in SpecifierBegin is a UTF-8 string; check that it won&#39;t go fur...
Common components of both fprintf and fscanf format strings.
Definition: FormatString.h:30
std::string toString(const til::SExpr *E)
CanQualType UnsignedLongTy
Definition: ASTContext.h:974
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf=false)
Returns true if a LengthModifier was parsed and installed in the FormatSpecifier& argument...
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:638
This class is used for builtin types like &#39;int&#39;.
Definition: Type.h:2082
Defines the clang::TargetInfo interface.
OptionalAmount ParseAmount(const char *&Beg, const char *E)
CanQualType IntTy
Definition: ASTContext.h:973
bool isPointerType() const
Definition: Type.h:5766
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
virtual void HandleIncompleteScanList(const char *start, const char *end)
Definition: FormatString.h:680
CanQualType DoubleTy
Definition: ASTContext.h:976
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:667
Represents the canonical version of C arrays with a specified constant size.
Definition: Type.h:2551
CanQualType UnsignedIntTy
Definition: ASTContext.h:974
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.