clang 20.0.0git
ScanfFormatString.cpp
Go to the documentation of this file.
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in scanf and friends. The structure of format
10// strings for fscanf() are described in C99 7.19.6.2.
11//
12//===----------------------------------------------------------------------===//
13
15#include "FormatStringParsing.h"
17
26using namespace clang;
27
30
33 const char *&Beg, const char *E) {
34 const char *I = Beg;
35 const char *start = I - 1;
36 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37
38 // No more characters?
39 if (I == E) {
40 H.HandleIncompleteScanList(start, I);
41 return true;
42 }
43
44 // Special case: ']' is the first character.
45 if (*I == ']') {
46 if (++I == E) {
47 H.HandleIncompleteScanList(start, I - 1);
48 return true;
49 }
50 }
51
52 // Special case: "^]" are the first characters.
53 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54 I += 2;
55 if (I == E) {
56 H.HandleIncompleteScanList(start, I - 1);
57 return true;
58 }
59 }
60
61 // Look for a ']' character which denotes the end of the scan list.
62 while (*I != ']') {
63 if (++I == E) {
64 H.HandleIncompleteScanList(start, I - 1);
65 return true;
66 }
67 }
68
69 CS.setEndScanList(I);
70 return false;
71}
72
73// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74// We can possibly refactor.
76 const char *&Beg,
77 const char *E,
78 unsigned &argIndex,
79 const LangOptions &LO,
80 const TargetInfo &Target) {
81 using namespace clang::analyze_format_string;
82 using namespace clang::analyze_scanf;
83 const char *I = Beg;
84 const char *Start = nullptr;
85 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87 // Look for a '%' character that indicates the start of a format specifier.
88 for ( ; I != E ; ++I) {
89 char c = *I;
90 if (c == '\0') {
91 // Detect spurious null characters, which are likely errors.
92 H.HandleNullChar(I);
93 return true;
94 }
95 if (c == '%') {
96 Start = I++; // Record the start of the format specifier.
97 break;
98 }
99 }
100
101 // No format specifier found?
102 if (!Start)
103 return false;
104
105 if (I == E) {
106 // No more characters left?
107 H.HandleIncompleteSpecifier(Start, E - Start);
108 return true;
109 }
110
112 if (ParseArgPosition(H, FS, Start, I, E))
113 return true;
114
115 if (I == E) {
116 // No more characters left?
117 H.HandleIncompleteSpecifier(Start, E - Start);
118 return true;
119 }
120
121 // Look for '*' flag if it is present.
122 if (*I == '*') {
123 FS.setSuppressAssignment(I);
124 if (++I == E) {
125 H.HandleIncompleteSpecifier(Start, E - Start);
126 return true;
127 }
128 }
129
130 // Look for the field width (if any). Unlike printf, this is either
131 // a fixed integer or isn't present.
135 FS.setFieldWidth(Amt);
136
137 if (I == E) {
138 // No more characters left?
139 H.HandleIncompleteSpecifier(Start, E - Start);
140 return true;
141 }
142 }
143
144 // Look for the length modifier.
145 if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146 // No more characters left?
147 H.HandleIncompleteSpecifier(Start, E - Start);
148 return true;
149 }
150
151 // Detect spurious null characters, which are likely errors.
152 if (*I == '\0') {
153 H.HandleNullChar(I);
154 return true;
155 }
156
157 // Finally, look for the conversion specifier.
158 const char *conversionPosition = I++;
160 switch (*conversionPosition) {
161 default:
162 break;
163 case '%': k = ConversionSpecifier::PercentArg; break;
164 case 'b': k = ConversionSpecifier::bArg; break;
165 case 'A': k = ConversionSpecifier::AArg; break;
166 case 'E': k = ConversionSpecifier::EArg; break;
167 case 'F': k = ConversionSpecifier::FArg; break;
168 case 'G': k = ConversionSpecifier::GArg; break;
169 case 'X': k = ConversionSpecifier::XArg; break;
170 case 'a': k = ConversionSpecifier::aArg; break;
171 case 'd': k = ConversionSpecifier::dArg; break;
172 case 'e': k = ConversionSpecifier::eArg; break;
173 case 'f': k = ConversionSpecifier::fArg; break;
174 case 'g': k = ConversionSpecifier::gArg; break;
175 case 'i': k = ConversionSpecifier::iArg; break;
176 case 'n': k = ConversionSpecifier::nArg; break;
177 case 'c': k = ConversionSpecifier::cArg; break;
178 case 'C': k = ConversionSpecifier::CArg; break;
179 case 'S': k = ConversionSpecifier::SArg; break;
180 case '[': k = ConversionSpecifier::ScanListArg; break;
181 case 'u': k = ConversionSpecifier::uArg; break;
182 case 'x': k = ConversionSpecifier::xArg; break;
183 case 'o': k = ConversionSpecifier::oArg; break;
184 case 's': k = ConversionSpecifier::sArg; break;
185 case 'p': k = ConversionSpecifier::pArg; break;
186 // Apple extensions
187 // Apple-specific
188 case 'D':
189 if (Target.getTriple().isOSDarwin())
191 break;
192 case 'O':
193 if (Target.getTriple().isOSDarwin())
195 break;
196 case 'U':
197 if (Target.getTriple().isOSDarwin())
199 break;
200 }
201 ScanfConversionSpecifier CS(conversionPosition, k);
203 if (ParseScanList(H, CS, I, E))
204 return true;
205 }
206 FS.setConversionSpecifier(CS);
207 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208 && !FS.usesPositionalArg())
209 FS.setArgIndex(argIndex++);
210
211 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213
215 unsigned Len = I - Beg;
216 if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217 CS.setEndScanList(Beg + Len);
218 FS.setConversionSpecifier(CS);
219 }
220 // Assume the conversion takes one argument.
221 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222 }
223 return ScanfSpecifierResult(Start, FS);
224}
225
228
230 return ArgType::Invalid();
231
232 switch(CS.getKind()) {
233 // Signed int.
234 case ConversionSpecifier::dArg:
235 case ConversionSpecifier::DArg:
236 case ConversionSpecifier::iArg:
237 switch (LM.getKind()) {
239 return ArgType::PtrTo(Ctx.IntTy);
243 return ArgType::PtrTo(Ctx.ShortTy);
245 return ArgType::PtrTo(Ctx.LongTy);
248 return ArgType::PtrTo(Ctx.LongLongTy);
250 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
252 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
254 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
256 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
258 // GNU extension.
259 return ArgType::PtrTo(Ctx.LongLongTy);
266 return ArgType::Invalid();
267 }
268 llvm_unreachable("Unsupported LengthModifier Type");
269
270 // Unsigned int.
271 case ConversionSpecifier::bArg:
272 case ConversionSpecifier::oArg:
273 case ConversionSpecifier::OArg:
274 case ConversionSpecifier::uArg:
275 case ConversionSpecifier::UArg:
276 case ConversionSpecifier::xArg:
277 case ConversionSpecifier::XArg:
278 switch (LM.getKind()) {
280 return ArgType::PtrTo(Ctx.UnsignedIntTy);
282 return ArgType::PtrTo(Ctx.UnsignedCharTy);
286 return ArgType::PtrTo(Ctx.UnsignedLongTy);
291 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
293 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
295 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
297 return ArgType::PtrTo(
298 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
300 // GNU extension.
308 return ArgType::Invalid();
309 }
310 llvm_unreachable("Unsupported LengthModifier Type");
311
312 // Float.
313 case ConversionSpecifier::aArg:
314 case ConversionSpecifier::AArg:
315 case ConversionSpecifier::eArg:
316 case ConversionSpecifier::EArg:
317 case ConversionSpecifier::fArg:
318 case ConversionSpecifier::FArg:
319 case ConversionSpecifier::gArg:
320 case ConversionSpecifier::GArg:
321 switch (LM.getKind()) {
323 return ArgType::PtrTo(Ctx.FloatTy);
325 return ArgType::PtrTo(Ctx.DoubleTy);
327 return ArgType::PtrTo(Ctx.LongDoubleTy);
328 default:
329 return ArgType::Invalid();
330 }
331
332 // Char, string and scanlist.
333 case ConversionSpecifier::cArg:
334 case ConversionSpecifier::sArg:
335 case ConversionSpecifier::ScanListArg:
336 switch (LM.getKind()) {
341 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
346 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
348 [[fallthrough]];
349 default:
350 return ArgType::Invalid();
351 }
352 case ConversionSpecifier::CArg:
353 case ConversionSpecifier::SArg:
354 // FIXME: Mac OS X specific?
355 switch (LM.getKind()) {
358 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
361 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
363 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
365 [[fallthrough]];
366 default:
367 return ArgType::Invalid();
368 }
369
370 // Pointer.
371 case ConversionSpecifier::pArg:
373
374 // Write-back.
375 case ConversionSpecifier::nArg:
376 switch (LM.getKind()) {
378 return ArgType::PtrTo(Ctx.IntTy);
380 return ArgType::PtrTo(Ctx.SignedCharTy);
382 return ArgType::PtrTo(Ctx.ShortTy);
384 return ArgType::PtrTo(Ctx.LongTy);
387 return ArgType::PtrTo(Ctx.LongLongTy);
389 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
391 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
393 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
395 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
397 return ArgType(); // FIXME: Is this a known extension?
404 return ArgType::Invalid();
405 }
406
407 default:
408 break;
409 }
410
411 return ArgType();
412}
413
415 const LangOptions &LangOpt,
416 ASTContext &Ctx) {
417
418 // %n is different from other conversion specifiers; don't try to fix it.
419 if (CS.getKind() == ConversionSpecifier::nArg)
420 return false;
421
422 if (!QT->isPointerType())
423 return false;
424
425 QualType PT = QT->getPointeeType();
426
427 // If it's an enum, get its underlying type.
428 if (const EnumType *ETy = PT->getAs<EnumType>()) {
429 // Don't try to fix incomplete enums.
430 if (!ETy->getDecl()->isComplete())
431 return false;
432 PT = ETy->getDecl()->getIntegerType();
433 }
434
435 const BuiltinType *BT = PT->getAs<BuiltinType>();
436 if (!BT)
437 return false;
438
439 // Pointer to a character.
440 if (PT->isAnyCharacterType()) {
441 CS.setKind(ConversionSpecifier::sArg);
442 if (PT->isWideCharType())
444 else
446
447 // If we know the target array length, we can use it as a field width.
448 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
449 if (CAT->getSizeModifier() == ArraySizeModifier::Normal)
451 CAT->getZExtSize() - 1, "", 0, false);
452 }
453 return true;
454 }
455
456 // Figure out the length modifier.
457 switch (BT->getKind()) {
458 // no modifier
459 case BuiltinType::UInt:
460 case BuiltinType::Int:
461 case BuiltinType::Float:
463 break;
464
465 // hh
466 case BuiltinType::Char_U:
467 case BuiltinType::UChar:
468 case BuiltinType::Char_S:
469 case BuiltinType::SChar:
471 break;
472
473 // h
474 case BuiltinType::Short:
475 case BuiltinType::UShort:
477 break;
478
479 // l
480 case BuiltinType::Long:
481 case BuiltinType::ULong:
482 case BuiltinType::Double:
484 break;
485
486 // ll
487 case BuiltinType::LongLong:
488 case BuiltinType::ULongLong:
490 break;
491
492 // L
493 case BuiltinType::LongDouble:
495 break;
496
497 // Don't know.
498 default:
499 return false;
500 }
501
502 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503 if (LangOpt.C99 || LangOpt.CPlusPlus11)
505
506 // If fixing the length modifier was enough, we are done.
507 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508 const analyze_scanf::ArgType &AT = getArgType(Ctx);
509 if (AT.isValid() && AT.matchesType(Ctx, QT))
510 return true;
511 }
512
513 // Figure out the conversion specifier.
514 if (PT->isRealFloatingType())
515 CS.setKind(ConversionSpecifier::fArg);
516 else if (PT->isSignedIntegerType())
517 CS.setKind(ConversionSpecifier::dArg);
518 else if (PT->isUnsignedIntegerType())
519 CS.setKind(ConversionSpecifier::uArg);
520 else
521 llvm_unreachable("Unexpected type");
522
523 return true;
524}
525
526void ScanfSpecifier::toString(raw_ostream &os) const {
527 os << "%";
528
529 if (usesPositionalArg())
530 os << getPositionalArgIndex() << "$";
531 if (SuppressAssignment)
532 os << "*";
533
535 os << LM.toString();
536 os << CS.toString();
537}
538
540 const char *I,
541 const char *E,
542 const LangOptions &LO,
543 const TargetInfo &Target) {
544
545 unsigned argIndex = 0;
546
547 // Keep looking for a format specifier until we have exhausted the string.
548 while (I != E) {
549 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
550 LO, Target);
551 // Did a fail-stop error of any kind occur when parsing the specifier?
552 // If so, don't do any more processing.
553 if (FSR.shouldStop())
554 return true;
555 // Did we exhaust the string or encounter an error that
556 // we can recover from?
557 if (!FSR.hasValue())
558 continue;
559 // We have a format specifier. Pass it to the callback.
560 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561 I - FSR.getStart())) {
562 return true;
563 }
564 }
565 assert(I == E && "Format string not exhausted");
566 return false;
567}
Expr * E
llvm::MachO::Target Target
Definition: MachO.h:51
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
__device__ __2f16 float c
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2825
CanQualType LongTy
Definition: ASTContext.h:1128
QualType getUnsignedPointerDiffType() const
Return the unique unsigned counterpart of "ptrdiff_t" integer type.
CanQualType FloatTy
Definition: ASTContext.h:1131
CanQualType DoubleTy
Definition: ASTContext.h:1131
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
CanQualType LongDoubleTy
Definition: ASTContext.h:1131
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>.
CanQualType UnsignedLongTy
Definition: ASTContext.h:1129
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CanQualType IntTy
Definition: ASTContext.h:1128
CanQualType SignedCharTy
Definition: ASTContext.h:1128
CanQualType UnsignedCharTy
Definition: ASTContext.h:1129
CanQualType UnsignedIntTy
Definition: ASTContext.h:1129
CanQualType UnsignedLongLongTy
Definition: ASTContext.h:1130
CanQualType UnsignedShortTy
Definition: ASTContext.h:1129
CanQualType ShortTy
Definition: ASTContext.h:1128
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:779
CanQualType LongLongTy
Definition: ASTContext.h:1128
QualType getWideCharType() const
Return the type of wide characters.
Definition: ASTContext.h:1847
CanQualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t.
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
This class is used for builtin types like 'int'.
Definition: Type.h:3023
Kind getKind() const
Definition: Type.h:3071
Represents the canonical version of C arrays with a specified constant size.
Definition: Type.h:3604
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums.
Definition: Type.h:5991
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:461
A (possibly-)qualified type.
Definition: Type.h:941
Exposes information about the current target.
Definition: TargetInfo.h:218
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1256
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2146
bool isPointerType() const
Definition: Type.h:8003
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition: Type.cpp:2125
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:2266
bool isWideCharType() const
Definition: Type.cpp:2098
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2196
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8540
static ArgType PtrTo(const ArgType &A)
Create an ArgType which corresponds to the type pointer to A.
Definition: FormatString.h:318
MatchKind matchesType(ASTContext &C, QualType argTy) const
static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM)
For a TypedefType QT, if it is a named integer type such as size_t, assign the appropriate value to L...
bool hasValidLengthModifier(const TargetInfo &Target, const LangOptions &LO) const
virtual void HandleIncompleteScanList(const char *start, const char *end)
Definition: FormatString.h:776
virtual void HandleNullChar(const char *nullCharacter)
Definition: FormatString.h:721
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:730
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:770
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:763
Represents the length modifier in a format string in scanf/printf.
Definition: FormatString.h:65
bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt, ASTContext &Ctx)
void toString(raw_ostream &os) const
const ScanfConversionSpecifier & getConversionSpecifier() const
Definition: FormatString.h:689
ArgType getArgType(ASTContext &Ctx) const
Defines the clang::TargetInfo interface.
Common components of both fprintf and fscanf format strings.
Definition: FormatString.h:30
OptionalAmount ParseAmount(const char *&Beg, const char *E)
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
Pieces specific to fscanf format strings.
Definition: FormatString.h:649
The JSON file list parser is used to communicate input to InstallAPI.