clang  9.0.0svn
ScanfFormatString.cpp
Go to the documentation of this file.
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in scanf and friends. The structure of format
10 // strings for fscanf() are described in C99 7.19.6.2.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/FormatString.h"
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/TargetInfo.h"
17 
26 using namespace clang;
27 
30 
33  const char *&Beg, const char *E) {
34  const char *I = Beg;
35  const char *start = I - 1;
36  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37 
38  // No more characters?
39  if (I == E) {
40  H.HandleIncompleteScanList(start, I);
41  return true;
42  }
43 
44  // Special case: ']' is the first character.
45  if (*I == ']') {
46  if (++I == E) {
47  H.HandleIncompleteScanList(start, I - 1);
48  return true;
49  }
50  }
51 
52  // Special case: "^]" are the first characters.
53  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54  I += 2;
55  if (I == E) {
56  H.HandleIncompleteScanList(start, I - 1);
57  return true;
58  }
59  }
60 
61  // Look for a ']' character which denotes the end of the scan list.
62  while (*I != ']') {
63  if (++I == E) {
64  H.HandleIncompleteScanList(start, I - 1);
65  return true;
66  }
67  }
68 
69  CS.setEndScanList(I);
70  return false;
71 }
72 
73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74 // We can possibly refactor.
76  const char *&Beg,
77  const char *E,
78  unsigned &argIndex,
79  const LangOptions &LO,
80  const TargetInfo &Target) {
81  using namespace clang::analyze_format_string;
82  using namespace clang::analyze_scanf;
83  const char *I = Beg;
84  const char *Start = nullptr;
85  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86 
87  // Look for a '%' character that indicates the start of a format specifier.
88  for ( ; I != E ; ++I) {
89  char c = *I;
90  if (c == '\0') {
91  // Detect spurious null characters, which are likely errors.
92  H.HandleNullChar(I);
93  return true;
94  }
95  if (c == '%') {
96  Start = I++; // Record the start of the format specifier.
97  break;
98  }
99  }
100 
101  // No format specifier found?
102  if (!Start)
103  return false;
104 
105  if (I == E) {
106  // No more characters left?
107  H.HandleIncompleteSpecifier(Start, E - Start);
108  return true;
109  }
110 
111  ScanfSpecifier FS;
112  if (ParseArgPosition(H, FS, Start, I, E))
113  return true;
114 
115  if (I == E) {
116  // No more characters left?
117  H.HandleIncompleteSpecifier(Start, E - Start);
118  return true;
119  }
120 
121  // Look for '*' flag if it is present.
122  if (*I == '*') {
123  FS.setSuppressAssignment(I);
124  if (++I == E) {
125  H.HandleIncompleteSpecifier(Start, E - Start);
126  return true;
127  }
128  }
129 
130  // Look for the field width (if any). Unlike printf, this is either
131  // a fixed integer or isn't present.
134  assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135  FS.setFieldWidth(Amt);
136 
137  if (I == E) {
138  // No more characters left?
139  H.HandleIncompleteSpecifier(Start, E - Start);
140  return true;
141  }
142  }
143 
144  // Look for the length modifier.
145  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
146  // No more characters left?
147  H.HandleIncompleteSpecifier(Start, E - Start);
148  return true;
149  }
150 
151  // Detect spurious null characters, which are likely errors.
152  if (*I == '\0') {
153  H.HandleNullChar(I);
154  return true;
155  }
156 
157  // Finally, look for the conversion specifier.
158  const char *conversionPosition = I++;
159  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160  switch (*conversionPosition) {
161  default:
162  break;
163  case '%': k = ConversionSpecifier::PercentArg; break;
164  case 'A': k = ConversionSpecifier::AArg; break;
165  case 'E': k = ConversionSpecifier::EArg; break;
166  case 'F': k = ConversionSpecifier::FArg; break;
167  case 'G': k = ConversionSpecifier::GArg; break;
168  case 'X': k = ConversionSpecifier::XArg; break;
169  case 'a': k = ConversionSpecifier::aArg; break;
170  case 'd': k = ConversionSpecifier::dArg; break;
171  case 'e': k = ConversionSpecifier::eArg; break;
172  case 'f': k = ConversionSpecifier::fArg; break;
173  case 'g': k = ConversionSpecifier::gArg; break;
174  case 'i': k = ConversionSpecifier::iArg; break;
175  case 'n': k = ConversionSpecifier::nArg; break;
176  case 'c': k = ConversionSpecifier::cArg; break;
177  case 'C': k = ConversionSpecifier::CArg; break;
178  case 'S': k = ConversionSpecifier::SArg; break;
179  case '[': k = ConversionSpecifier::ScanListArg; break;
180  case 'u': k = ConversionSpecifier::uArg; break;
181  case 'x': k = ConversionSpecifier::xArg; break;
182  case 'o': k = ConversionSpecifier::oArg; break;
183  case 's': k = ConversionSpecifier::sArg; break;
184  case 'p': k = ConversionSpecifier::pArg; break;
185  // Apple extensions
186  // Apple-specific
187  case 'D':
188  if (Target.getTriple().isOSDarwin())
190  break;
191  case 'O':
192  if (Target.getTriple().isOSDarwin())
194  break;
195  case 'U':
196  if (Target.getTriple().isOSDarwin())
198  break;
199  }
200  ScanfConversionSpecifier CS(conversionPosition, k);
201  if (k == ScanfConversionSpecifier::ScanListArg) {
202  if (ParseScanList(H, CS, I, E))
203  return true;
204  }
205  FS.setConversionSpecifier(CS);
207  && !FS.usesPositionalArg())
208  FS.setArgIndex(argIndex++);
209 
210  // FIXME: '%' and '*' doesn't make sense. Issue a warning.
211  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
212 
213  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
214  unsigned Len = I - Beg;
215  if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
216  CS.setEndScanList(Beg + Len);
217  FS.setConversionSpecifier(CS);
218  }
219  // Assume the conversion takes one argument.
220  return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
221  }
222  return ScanfSpecifierResult(Start, FS);
223 }
224 
225 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
226  const ScanfConversionSpecifier &CS = getConversionSpecifier();
227 
228  if (!CS.consumesDataArgument())
229  return ArgType::Invalid();
230 
231  switch(CS.getKind()) {
232  // Signed int.
236  switch (LM.getKind()) {
238  return ArgType::PtrTo(Ctx.IntTy);
242  return ArgType::PtrTo(Ctx.ShortTy);
244  return ArgType::PtrTo(Ctx.LongTy);
247  return ArgType::PtrTo(Ctx.LongLongTy);
249  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
251  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
253  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
255  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
257  // GNU extension.
258  return ArgType::PtrTo(Ctx.LongLongTy);
265  return ArgType::Invalid();
266  }
267  llvm_unreachable("Unsupported LengthModifier Type");
268 
269  // Unsigned int.
276  switch (LM.getKind()) {
278  return ArgType::PtrTo(Ctx.UnsignedIntTy);
280  return ArgType::PtrTo(Ctx.UnsignedCharTy);
282  return ArgType::PtrTo(Ctx.UnsignedShortTy);
284  return ArgType::PtrTo(Ctx.UnsignedLongTy);
289  return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
291  return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
293  return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
295  return ArgType::PtrTo(
296  ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
298  // GNU extension.
306  return ArgType::Invalid();
307  }
308  llvm_unreachable("Unsupported LengthModifier Type");
309 
310  // Float.
319  switch (LM.getKind()) {
321  return ArgType::PtrTo(Ctx.FloatTy);
323  return ArgType::PtrTo(Ctx.DoubleTy);
325  return ArgType::PtrTo(Ctx.LongDoubleTy);
326  default:
327  return ArgType::Invalid();
328  }
329 
330  // Char, string and scanlist.
334  switch (LM.getKind()) {
339  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
344  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
346  LLVM_FALLTHROUGH;
347  default:
348  return ArgType::Invalid();
349  }
352  // FIXME: Mac OS X specific?
353  switch (LM.getKind()) {
356  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
359  return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
361  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
363  LLVM_FALLTHROUGH;
364  default:
365  return ArgType::Invalid();
366  }
367 
368  // Pointer.
371 
372  // Write-back.
374  switch (LM.getKind()) {
376  return ArgType::PtrTo(Ctx.IntTy);
378  return ArgType::PtrTo(Ctx.SignedCharTy);
380  return ArgType::PtrTo(Ctx.ShortTy);
382  return ArgType::PtrTo(Ctx.LongTy);
385  return ArgType::PtrTo(Ctx.LongLongTy);
387  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
389  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
391  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
393  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
395  return ArgType(); // FIXME: Is this a known extension?
402  return ArgType::Invalid();
403  }
404 
405  default:
406  break;
407  }
408 
409  return ArgType();
410 }
411 
412 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
413  const LangOptions &LangOpt,
414  ASTContext &Ctx) {
415 
416  // %n is different from other conversion specifiers; don't try to fix it.
417  if (CS.getKind() == ConversionSpecifier::nArg)
418  return false;
419 
420  if (!QT->isPointerType())
421  return false;
422 
423  QualType PT = QT->getPointeeType();
424 
425  // If it's an enum, get its underlying type.
426  if (const EnumType *ETy = PT->getAs<EnumType>()) {
427  // Don't try to fix incomplete enums.
428  if (!ETy->getDecl()->isComplete())
429  return false;
430  PT = ETy->getDecl()->getIntegerType();
431  }
432 
433  const BuiltinType *BT = PT->getAs<BuiltinType>();
434  if (!BT)
435  return false;
436 
437  // Pointer to a character.
438  if (PT->isAnyCharacterType()) {
439  CS.setKind(ConversionSpecifier::sArg);
440  if (PT->isWideCharType())
441  LM.setKind(LengthModifier::AsWideChar);
442  else
443  LM.setKind(LengthModifier::None);
444 
445  // If we know the target array length, we can use it as a field width.
446  if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
447  if (CAT->getSizeModifier() == ArrayType::Normal)
449  CAT->getSize().getZExtValue() - 1,
450  "", 0, false);
451 
452  }
453  return true;
454  }
455 
456  // Figure out the length modifier.
457  switch (BT->getKind()) {
458  // no modifier
459  case BuiltinType::UInt:
460  case BuiltinType::Int:
461  case BuiltinType::Float:
462  LM.setKind(LengthModifier::None);
463  break;
464 
465  // hh
466  case BuiltinType::Char_U:
467  case BuiltinType::UChar:
468  case BuiltinType::Char_S:
469  case BuiltinType::SChar:
470  LM.setKind(LengthModifier::AsChar);
471  break;
472 
473  // h
474  case BuiltinType::Short:
475  case BuiltinType::UShort:
476  LM.setKind(LengthModifier::AsShort);
477  break;
478 
479  // l
480  case BuiltinType::Long:
481  case BuiltinType::ULong:
482  case BuiltinType::Double:
483  LM.setKind(LengthModifier::AsLong);
484  break;
485 
486  // ll
487  case BuiltinType::LongLong:
488  case BuiltinType::ULongLong:
489  LM.setKind(LengthModifier::AsLongLong);
490  break;
491 
492  // L
493  case BuiltinType::LongDouble:
494  LM.setKind(LengthModifier::AsLongDouble);
495  break;
496 
497  // Don't know.
498  default:
499  return false;
500  }
501 
502  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
504  namedTypeToLengthModifier(PT, LM);
505 
506  // If fixing the length modifier was enough, we are done.
507  if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508  const analyze_scanf::ArgType &AT = getArgType(Ctx);
509  if (AT.isValid() && AT.matchesType(Ctx, QT))
510  return true;
511  }
512 
513  // Figure out the conversion specifier.
514  if (PT->isRealFloatingType())
515  CS.setKind(ConversionSpecifier::fArg);
516  else if (PT->isSignedIntegerType())
517  CS.setKind(ConversionSpecifier::dArg);
518  else if (PT->isUnsignedIntegerType())
519  CS.setKind(ConversionSpecifier::uArg);
520  else
521  llvm_unreachable("Unexpected type");
522 
523  return true;
524 }
525 
526 void ScanfSpecifier::toString(raw_ostream &os) const {
527  os << "%";
528 
529  if (usesPositionalArg())
530  os << getPositionalArgIndex() << "$";
531  if (SuppressAssignment)
532  os << "*";
533 
534  FieldWidth.toString(os);
535  os << LM.toString();
536  os << CS.toString();
537 }
538 
540  const char *I,
541  const char *E,
542  const LangOptions &LO,
543  const TargetInfo &Target) {
544 
545  unsigned argIndex = 0;
546 
547  // Keep looking for a format specifier until we have exhausted the string.
548  while (I != E) {
549  const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
550  LO, Target);
551  // Did a fail-stop error of any kind occur when parsing the specifier?
552  // If so, don't do any more processing.
553  if (FSR.shouldStop())
554  return true;
555  // Did we exhaust the string or encounter an error that
556  // we can recover from?
557  if (!FSR.hasValue())
558  continue;
559  // We have a format specifier. Pass it to the callback.
560  if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561  I - FSR.getStart())) {
562  return true;
563  }
564  }
565  assert(I == E && "Format string not exhausted");
566  return false;
567 }
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
CanQualType LongLongTy
Definition: ASTContext.h:1023
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:731
A (possibly-)qualified type.
Definition: Type.h:643
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>.
const OptionalFlag & getSuppressAssignment() const
Definition: FormatString.h:643
Kind getKind() const
Definition: Type.h:2435
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:983
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:1968
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E)
CanQualType LongTy
Definition: ASTContext.h:1023
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:693
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:1915
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6818
bool isWideCharType() const
Definition: Type.cpp:1827
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:154
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
c
Definition: emmintrin.h:306
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
QualType getUnsignedPointerDiffType() const
Return the unique unsigned counterpart of "ptrdiff_t" integer type.
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
void setFieldWidth(const OptionalAmount &Amt)
Definition: FormatString.h:434
MatchKind matchesType(ASTContext &C, QualType argTy) const
Represents the length modifier in a format string in scanf/printf.
Definition: FormatString.h:64
CanQualType LongDoubleTy
Definition: ASTContext.h:1026
CanQualType UnsignedCharTy
Definition: ASTContext.h:1024
static ArgType PtrTo(const ArgType &A)
Create an ArgType which corresponds to the type pointer to A.
Definition: FormatString.h:280
Exposes information about the current target.
Definition: TargetInfo.h:161
CanQualType ShortTy
Definition: ASTContext.h:1023
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1875
virtual void HandleNullChar(const char *nullCharacter)
Definition: FormatString.h:683
CanQualType SignedCharTy
Definition: ASTContext.h:1023
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
QualType getWideCharType() const
Return the type of wide characters.
Definition: ASTContext.h:1570
Kind
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums...
Definition: Type.h:4449
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2441
CanQualType FloatTy
Definition: ASTContext.h:1026
CanQualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t. ...
void setConversionSpecifier(const ScanfConversionSpecifier &cs)
Definition: FormatString.h:647
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition: Type.cpp:1854
CanQualType UnsignedShortTy
Definition: ASTContext.h:1024
Dataflow Directional Tag Classes.
void setSuppressAssignment(const char *position)
Definition: FormatString.h:639
Pieces specific to fscanf format strings.
Definition: FormatString.h:611
CanQualType UnsignedLongLongTy
Definition: ASTContext.h:1025
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len)
Returns true if the invalid specifier in SpecifierBegin is a UTF-8 string; check that it won&#39;t go fur...
Common components of both fprintf and fscanf format strings.
Definition: FormatString.h:29
CanQualType UnsignedLongTy
Definition: ASTContext.h:1024
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf=false)
Returns true if a LengthModifier was parsed and installed in the FormatSpecifier& argument...
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:692
This class is used for builtin types like &#39;int&#39;.
Definition: Type.h:2408
Defines the clang::TargetInfo interface.
OptionalAmount ParseAmount(const char *&Beg, const char *E)
CanQualType IntTy
Definition: ASTContext.h:1023
bool isPointerType() const
Definition: Type.h:6351
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
virtual void HandleIncompleteScanList(const char *start, const char *end)
Definition: FormatString.h:737
CanQualType DoubleTy
Definition: ASTContext.h:1026
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:724
Represents the canonical version of C arrays with a specified constant size.
Definition: Type.h:2889
CanQualType UnsignedIntTy
Definition: ASTContext.h:1024
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.