clang 23.0.0git
ScanfFormatString.cpp
Go to the documentation of this file.
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in scanf and friends. The structure of format
10// strings for fscanf() are described in C99 7.19.6.2.
11//
12//===----------------------------------------------------------------------===//
13
14#include "FormatStringParsing.h"
17
26using namespace clang;
27
30
32 const char *&Beg, const char *E) {
33 const char *I = Beg;
34 const char *start = I - 1;
35 UpdateOnReturn<const char *> UpdateBeg(Beg, I);
36
37 // No more characters?
38 if (I == E) {
39 H.HandleIncompleteScanList(start, I);
40 return true;
41 }
42
43 // Special case: ']' is the first character.
44 if (*I == ']') {
45 if (++I == E) {
46 H.HandleIncompleteScanList(start, I - 1);
47 return true;
48 }
49 }
50
51 // Special case: "^]" are the first characters.
52 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
53 I += 2;
54 if (I == E) {
55 H.HandleIncompleteScanList(start, I - 1);
56 return true;
57 }
58 }
59
60 // Look for a ']' character which denotes the end of the scan list.
61 while (*I != ']') {
62 if (++I == E) {
63 H.HandleIncompleteScanList(start, I - 1);
64 return true;
65 }
66 }
67
68 CS.setEndScanList(I);
69 return false;
70}
71
72// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
73// We can possibly refactor.
75 const char *&Beg, const char *E,
76 unsigned &argIndex,
77 const LangOptions &LO,
78 const TargetInfo &Target) {
79 using namespace clang::analyze_format_string;
80 using namespace clang::analyze_scanf;
81 const char *I = Beg;
82 const char *Start = nullptr;
83 UpdateOnReturn<const char *> UpdateBeg(Beg, I);
84
85 // Look for a '%' character that indicates the start of a format specifier.
86 for (; I != E; ++I) {
87 char c = *I;
88 if (c == '\0') {
89 // Detect spurious null characters, which are likely errors.
90 H.HandleNullChar(I);
91 return true;
92 }
93 if (c == '%') {
94 Start = I++; // Record the start of the format specifier.
95 break;
96 }
97 }
98
99 // No format specifier found?
100 if (!Start)
101 return false;
102
103 if (I == E) {
104 // No more characters left?
105 H.HandleIncompleteSpecifier(Start, E - Start);
106 return true;
107 }
108
110 if (ParseArgPosition(H, FS, Start, I, E))
111 return true;
112
113 if (I == E) {
114 // No more characters left?
115 H.HandleIncompleteSpecifier(Start, E - Start);
116 return true;
117 }
118
119 // Look for '*' flag if it is present.
120 if (*I == '*') {
122 if (++I == E) {
123 H.HandleIncompleteSpecifier(Start, E - Start);
124 return true;
125 }
126 }
127
128 // Look for the field width (if any). Unlike printf, this is either
129 // a fixed integer or isn't present.
133 FS.setFieldWidth(Amt);
134
135 if (I == E) {
136 // No more characters left?
137 H.HandleIncompleteSpecifier(Start, E - Start);
138 return true;
139 }
140 }
141
142 // Look for the length modifier.
143 if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
144 // No more characters left?
145 H.HandleIncompleteSpecifier(Start, E - Start);
146 return true;
147 }
148
149 // Detect spurious null characters, which are likely errors.
150 if (*I == '\0') {
151 H.HandleNullChar(I);
152 return true;
153 }
154
155 // Finally, look for the conversion specifier.
156 const char *conversionPosition = I++;
158 switch (*conversionPosition) {
159 default:
160 break;
161 case '%':
163 break;
164 case 'b':
166 break;
167 case 'A':
169 break;
170 case 'E':
172 break;
173 case 'F':
175 break;
176 case 'G':
178 break;
179 case 'X':
181 break;
182 case 'a':
184 break;
185 case 'd':
187 break;
188 case 'e':
190 break;
191 case 'f':
193 break;
194 case 'g':
196 break;
197 case 'i':
199 break;
200 case 'n':
202 break;
203 case 'c':
205 break;
206 case 'C':
208 break;
209 case 'S':
211 break;
212 case '[':
214 break;
215 case 'u':
217 break;
218 case 'x':
220 break;
221 case 'o':
223 break;
224 case 's':
226 break;
227 case 'p':
229 break;
230 // Apple extensions
231 // Apple-specific
232 case 'D':
233 if (Target.getTriple().isOSDarwin())
235 break;
236 case 'O':
237 if (Target.getTriple().isOSDarwin())
239 break;
240 case 'U':
241 if (Target.getTriple().isOSDarwin())
243 break;
244 }
245 ScanfConversionSpecifier CS(conversionPosition, k);
247 if (ParseScanList(H, CS, I, E))
248 return true;
249 }
252 !FS.usesPositionalArg())
253 FS.setArgIndex(argIndex++);
254
255 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
256 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
257
259 unsigned Len = I - Beg;
260 if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
261 CS.setEndScanList(Beg + Len);
263 }
264 // Assume the conversion takes one argument.
265 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
266 }
267 return ScanfSpecifierResult(Start, FS);
268}
269
272
273 if (!CS.consumesDataArgument())
274 return ArgType::Invalid();
275
276 switch (CS.getKind()) {
277 // Signed int.
281 switch (LM.getKind()) {
283 return ArgType::PtrTo(Ctx.IntTy);
287 return ArgType::PtrTo(Ctx.ShortTy);
289 return ArgType::PtrTo(Ctx.LongTy);
292 return ArgType::PtrTo(Ctx.LongLongTy);
294 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
296 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
299 ArgType(Ctx.getSignedSizeType(), "signed size_t")));
302 ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")));
304 // GNU extension.
305 return ArgType::PtrTo(Ctx.LongLongTy);
312 return ArgType::Invalid();
313 }
314 llvm_unreachable("Unsupported LengthModifier Type");
315
316 // Unsigned int.
324 switch (LM.getKind()) {
326 return ArgType::PtrTo(Ctx.UnsignedIntTy);
328 return ArgType::PtrTo(Ctx.UnsignedCharTy);
332 return ArgType::PtrTo(Ctx.UnsignedLongTy);
337 return ArgType::PtrTo(
338 ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
340 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
342 return ArgType::PtrTo(
343 ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t")));
346 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")));
348 // GNU extension.
356 return ArgType::Invalid();
357 }
358 llvm_unreachable("Unsupported LengthModifier Type");
359
360 // Float.
369 switch (LM.getKind()) {
371 return ArgType::PtrTo(Ctx.FloatTy);
373 return ArgType::PtrTo(Ctx.DoubleTy);
375 return ArgType::PtrTo(Ctx.LongDoubleTy);
376 default:
377 return ArgType::Invalid();
378 }
379
380 // Char, string and scanlist.
384 switch (LM.getKind()) {
389 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
394 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
396 [[fallthrough]];
397 default:
398 return ArgType::Invalid();
399 }
402 // FIXME: Mac OS X specific?
403 switch (LM.getKind()) {
406 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
409 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
411 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
413 [[fallthrough]];
414 default:
415 return ArgType::Invalid();
416 }
417
418 // Pointer.
421
422 // Write-back.
424 switch (LM.getKind()) {
426 return ArgType::PtrTo(Ctx.IntTy);
428 return ArgType::PtrTo(Ctx.SignedCharTy);
430 return ArgType::PtrTo(Ctx.ShortTy);
432 return ArgType::PtrTo(Ctx.LongTy);
435 return ArgType::PtrTo(Ctx.LongLongTy);
437 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
439 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
442 ArgType(Ctx.getSignedSizeType(), "signed size_t")));
445 ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")));
447 return ArgType(); // FIXME: Is this a known extension?
454 return ArgType::Invalid();
455 }
456
457 default:
458 break;
459 }
460
461 return ArgType();
462}
463
465 const LangOptions &LangOpt, ASTContext &Ctx) {
466
467 // %n is different from other conversion specifiers; don't try to fix it.
468 if (CS.getKind() == ConversionSpecifier::nArg)
469 return false;
470
471 if (!QT->isPointerType())
472 return false;
473
474 QualType PT = QT->getPointeeType();
475
476 // If it's an enum, get its underlying type.
477 if (const auto *ED = PT->getAsEnumDecl()) {
478 // Don't try to fix incomplete enums.
479 if (!ED->isComplete())
480 return false;
481 PT = ED->getIntegerType();
482 }
483
484 const BuiltinType *BT = PT->getAs<BuiltinType>();
485 if (!BT)
486 return false;
487
488 // Pointer to a character.
489 if (PT->isAnyCharacterType()) {
491 if (PT->isWideCharType())
493 else
494 LM.setKind(LengthModifier::None);
495
496 // If we know the target array length, we can use it as a field width.
497 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
498 if (CAT->getSizeModifier() == ArraySizeModifier::Normal)
500 CAT->getZExtSize() - 1, "", 0, false);
501 }
502 return true;
503 }
504
505 // Figure out the length modifier.
506 switch (BT->getKind()) {
507 // no modifier
508 case BuiltinType::UInt:
509 case BuiltinType::Int:
510 case BuiltinType::Float:
511 LM.setKind(LengthModifier::None);
512 break;
513
514 // hh
515 case BuiltinType::Char_U:
516 case BuiltinType::UChar:
517 case BuiltinType::Char_S:
518 case BuiltinType::SChar:
519 LM.setKind(LengthModifier::AsChar);
520 break;
521
522 // h
523 case BuiltinType::Short:
524 case BuiltinType::UShort:
526 break;
527
528 // l
529 case BuiltinType::Long:
530 case BuiltinType::ULong:
531 case BuiltinType::Double:
532 LM.setKind(LengthModifier::AsLong);
533 break;
534
535 // ll
536 case BuiltinType::LongLong:
537 case BuiltinType::ULongLong:
539 break;
540
541 // L
542 case BuiltinType::LongDouble:
544 break;
545
546 // Don't know.
547 default:
548 return false;
549 }
550
551 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
552 if (LangOpt.C99 || LangOpt.CPlusPlus11)
554
555 // If fixing the length modifier was enough, we are done.
556 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
557 const analyze_scanf::ArgType &AT = getArgType(Ctx);
558 if (AT.isValid() && AT.matchesType(Ctx, QT))
559 return true;
560 }
561
562 // Figure out the conversion specifier.
563 if (PT->isRealFloatingType())
565 else if (PT->isSignedIntegerType())
567 else if (PT->isUnsignedIntegerType())
569 else
570 llvm_unreachable("Unexpected type");
571
572 return true;
573}
574
575void ScanfSpecifier::toString(raw_ostream &os) const {
576 os << "%";
577
578 if (usesPositionalArg())
579 os << getPositionalArgIndex() << "$";
580 if (SuppressAssignment)
581 os << "*";
582
583 FieldWidth.toString(os);
584 os << LM.toString();
585 os << CS.toString();
586}
587
589 const char *I,
590 const char *E,
591 const LangOptions &LO,
592 const TargetInfo &Target) {
593
594 unsigned argIndex = 0;
595
596 // Keep looking for a format specifier until we have exhausted the string.
597 while (I != E) {
598 const ScanfSpecifierResult &FSR =
599 ParseScanfSpecifier(H, I, E, argIndex, LO, Target);
600 // Did a fail-stop error of any kind occur when parsing the specifier?
601 // If so, don't do any more processing.
602 if (FSR.shouldStop())
603 return true;
604 // Did we exhaust the string or encounter an error that
605 // we can recover from?
606 if (!FSR.hasValue())
607 continue;
608 // We have a format specifier. Pass it to the callback.
609 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
610 I - FSR.getStart())) {
611 return true;
612 }
613 }
614 assert(I == E && "Format string not exhausted");
615 return false;
616}
llvm::MachO::Target Target
Definition MachO.h:51
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
__device__ __2f16 float c
ArgType getArgType(ASTContext &Ctx) const
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:227
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CanQualType LongTy
QualType getUnsignedPointerDiffType() const
Return the unique unsigned counterpart of "ptrdiff_t" integer type.
CanQualType FloatTy
CanQualType DoubleTy
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
CanQualType LongDoubleTy
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>.
CanQualType UnsignedLongTy
CanQualType IntTy
CanQualType SignedCharTy
CanQualType UnsignedCharTy
CanQualType UnsignedIntTy
CanQualType UnsignedLongLongTy
CanQualType UnsignedShortTy
CanQualType ShortTy
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:924
QualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t.
CanQualType LongLongTy
QualType getWideCharType() const
Return the type of wide characters.
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
This class is used for builtin types like 'int'.
Definition TypeBase.h:3219
Kind getKind() const
Definition TypeBase.h:3267
Represents the canonical version of C arrays with a specified constant size.
Definition TypeBase.h:3815
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
A (possibly-)qualified type.
Definition TypeBase.h:937
Exposes information about the current target.
Definition TargetInfo.h:227
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2266
bool isPointerType() const
Definition TypeBase.h:8673
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:789
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition Type.cpp:2229
EnumDecl * getAsEnumDecl() const
Retrieves the EnumDecl this type refers to.
Definition Type.h:53
bool isRealFloatingType() const
Floating point categories.
Definition Type.cpp:2405
bool isWideCharType() const
Definition Type.cpp:2202
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2332
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9266
MatchKind matchesType(ASTContext &C, QualType argTy) const
static bool namedTypeToLengthModifier(ASTContext &Ctx, QualType QT, LengthModifier &LM)
For a TypedefType QT, if it is a named integer type such as size_t, assign the appropriate value to L...
void setFieldWidth(const OptionalAmount &Amt)
bool hasValidLengthModifier(const TargetInfo &Target, const LangOptions &LO) const
virtual void HandleIncompleteScanList(const char *start, const char *end)
virtual void HandleNullChar(const char *nullCharacter)
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Represents the length modifier in a format string in scanf/printf.
static ArgType makePtrdiffT(const ArgType &A)
Create an ArgType which corresponds to the ptrdiff_t/unsigned ptrdiff_t type.
static ArgType PtrTo(const ArgType &A)
Create an ArgType which corresponds to the type pointer to A.
static ArgType makeSizeT(const ArgType &A)
Create an ArgType which corresponds to the size_t/ssize_t type.
bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt, ASTContext &Ctx)
const OptionalFlag & getSuppressAssignment() const
void setConversionSpecifier(const ScanfConversionSpecifier &cs)
const ScanfConversionSpecifier & getConversionSpecifier() const
void setSuppressAssignment(const char *position)
ArgType getArgType(ASTContext &Ctx) const
Defines the clang::TargetInfo interface.
Common components of both fprintf and fscanf format strings.
OptionalAmount ParseAmount(const char *&Beg, const char *E)
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf=false)
Returns true if a LengthModifier was parsed and installed in the FormatSpecifier& argument,...
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E)
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len)
Returns true if the invalid specifier in SpecifierBegin is a UTF-8 string; check that it won't go fur...
Pieces specific to fscanf format strings.
The JSON file list parser is used to communicate input to InstallAPI.