clang-tools 22.0.0git
FormatStringConverter.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implementation of the FormatStringConverter class which is used to convert
11/// printf format strings to C++ std::formatter format strings.
12///
13//===----------------------------------------------------------------------===//
14
17#include "clang/AST/Expr.h"
18#include "clang/ASTMatchers/ASTMatchFinder.h"
19#include "clang/Basic/LangOptions.h"
20#include "clang/Lex/Lexer.h"
21#include "clang/Lex/Preprocessor.h"
22#include "clang/Tooling/FixIt.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/Support/Debug.h"
25
26using namespace clang::ast_matchers;
27using namespace clang::analyze_printf;
28
29namespace clang::tidy::utils {
30using clang::analyze_format_string::ConversionSpecifier;
31
32/// Is the passed type the actual "char" type, whether that be signed or
33/// unsigned, rather than explicit signed char or unsigned char types.
34static bool isRealCharType(const clang::QualType &Ty) {
35 using namespace clang;
36 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
37 if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
38 return (BT->getKind() == BuiltinType::Char_U ||
39 BT->getKind() == BuiltinType::Char_S);
40 return false;
41}
42
43/// If possible, return the text name of the signed type that corresponds to the
44/// passed integer type. If the passed type is already signed then its name is
45/// just returned. Only supports BuiltinTypes.
46static std::optional<std::string>
47getCorrespondingSignedTypeName(const clang::QualType &QT) {
48 using namespace clang;
49 const auto UQT = QT.getUnqualifiedType();
50 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
51 switch (BT->getKind()) {
52 case BuiltinType::UChar:
53 case BuiltinType::Char_U:
54 case BuiltinType::SChar:
55 case BuiltinType::Char_S:
56 return "signed char";
57 case BuiltinType::UShort:
58 case BuiltinType::Short:
59 return "short";
60 case BuiltinType::UInt:
61 case BuiltinType::Int:
62 return "int";
63 case BuiltinType::ULong:
64 case BuiltinType::Long:
65 return "long";
66 case BuiltinType::ULongLong:
67 case BuiltinType::LongLong:
68 return "long long";
69 default:
70 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
71 << QT.getAsString() << "'\n";
72 return std::nullopt;
73 }
74 }
75
76 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
77 // if the argument type does.
78 const std::string TypeName = UQT.getAsString();
79 StringRef SimplifiedTypeName{TypeName};
80 const bool InStd = SimplifiedTypeName.consume_front("std::");
81 const StringRef Prefix = InStd ? "std::" : "";
82
83 if (SimplifiedTypeName.starts_with("uint") &&
84 SimplifiedTypeName.ends_with("_t"))
85 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
86
87 if (SimplifiedTypeName == "size_t")
88 return (Twine(Prefix) + "ssize_t").str();
89
90 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
91 << UQT.getAsString() << "'\n";
92 return std::nullopt;
93}
94
95/// If possible, return the text name of the unsigned type that corresponds to
96/// the passed integer type. If the passed type is already unsigned then its
97/// name is just returned. Only supports BuiltinTypes.
98static std::optional<std::string>
99getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
100 using namespace clang;
101 const auto UQT = QT.getUnqualifiedType();
102 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
103 switch (BT->getKind()) {
104 case BuiltinType::SChar:
105 case BuiltinType::Char_S:
106 case BuiltinType::UChar:
107 case BuiltinType::Char_U:
108 return "unsigned char";
109 case BuiltinType::Short:
110 case BuiltinType::UShort:
111 return "unsigned short";
112 case BuiltinType::Int:
113 case BuiltinType::UInt:
114 return "unsigned int";
115 case BuiltinType::Long:
116 case BuiltinType::ULong:
117 return "unsigned long";
118 case BuiltinType::LongLong:
119 case BuiltinType::ULongLong:
120 return "unsigned long long";
121 default:
122 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
123 << UQT.getAsString() << "'\n";
124 return std::nullopt;
125 }
126 }
127
128 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
129 // if the argument type does.
130 const std::string TypeName = UQT.getAsString();
131 StringRef SimplifiedTypeName{TypeName};
132 const bool InStd = SimplifiedTypeName.consume_front("std::");
133 const StringRef Prefix = InStd ? "std::" : "";
134
135 if (SimplifiedTypeName.starts_with("int") &&
136 SimplifiedTypeName.ends_with("_t"))
137 return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
138
139 if (SimplifiedTypeName == "ssize_t")
140 return (Twine(Prefix) + "size_t").str();
141 if (SimplifiedTypeName == "ptrdiff_t")
142 return (Twine(Prefix) + "size_t").str();
143
144 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
145 << UQT.getAsString() << "'\n";
146 return std::nullopt;
147}
148
149static std::optional<std::string>
150castTypeForArgument(ConversionSpecifier::Kind ArgKind,
151 const clang::QualType &QT) {
152 if (ArgKind == ConversionSpecifier::Kind::uArg)
155}
156
157static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
158 const clang::QualType &ArgType) {
159 if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
160 // Unadorned char never matches any expected signedness since it
161 // could be signed or unsigned.
162 const auto ArgTypeKind = BT->getKind();
163 if (ArgTypeKind == BuiltinType::Char_U ||
164 ArgTypeKind == BuiltinType::Char_S)
165 return false;
166 }
167
168 if (ArgKind == ConversionSpecifier::Kind::uArg)
169 return ArgType->isUnsignedIntegerType();
170 return ArgType->isSignedIntegerType();
171}
172
173namespace {
174AST_MATCHER(clang::QualType, isRealChar) {
176}
177} // namespace
178
179static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
180 /// For printf-style functions, the signedness of the type printed is
181 /// indicated by the corresponding type in the format string.
182 /// std::print will determine the signedness from the type of the
183 /// argument. This means that it is necessary to generate a cast in
184 /// StrictMode to ensure that the exact behaviour is maintained.
185 /// However, for templated functions like absl::PrintF and
186 /// fmt::printf, the signedness of the type printed is also taken from
187 /// the actual argument like std::print, so such casts are never
188 /// necessary. printf-style functions are variadic, whereas templated
189 /// ones aren't, so we can use that to distinguish between the two
190 /// cases.
191 if (StrictMode) {
192 const FunctionDecl *FuncDecl = Call->getDirectCallee();
193 assert(FuncDecl);
194 return FuncDecl->isVariadic();
195 }
196 return false;
197}
198
200 ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
201 const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
202 Preprocessor &PP)
203 : Context(ContextIn), Config(ConfigIn),
204 CastMismatchedIntegerTypes(
205 castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
206 Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
207 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
208 assert(ArgsOffset <= NumArgs);
209 FormatExpr = llvm::dyn_cast<StringLiteral>(
210 Args[FormatArgOffset]->IgnoreUnlessSpelledInSource());
211
212 assert(FormatExpr && FormatExpr->isOrdinary());
213
214 if (const std::optional<StringRef> MaybeMacroName =
215 formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
216 MaybeMacroName) {
217 conversionNotPossible(
218 ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
219 .str());
220 return;
221 }
222
223 PrintfFormatString = FormatExpr->getString();
224
225 // Assume that the output will be approximately the same size as the input,
226 // but perhaps with a few escapes expanded.
227 const size_t EstimatedGrowth = 8;
228 StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
229 StandardFormatString.push_back('\"');
230
231 const bool IsFreeBsdkPrintf = false;
232
233 using clang::analyze_format_string::ParsePrintfString;
234 ParsePrintfString(*this, PrintfFormatString.data(),
235 PrintfFormatString.data() + PrintfFormatString.size(),
236 LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
237 finalizeFormatText();
238}
239
240std::optional<StringRef>
241FormatStringConverter::formatStringContainsUnreplaceableMacro(
242 const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
243 Preprocessor &PP) {
244 // If a macro invocation surrounds the entire call then we don't want that to
245 // inhibit conversion. The whole format string will appear to come from that
246 // macro, as will the function call.
247 std::optional<StringRef> MaybeSurroundingMacroName;
248 if (const SourceLocation BeginCallLoc = Call->getBeginLoc();
249 BeginCallLoc.isMacroID())
250 MaybeSurroundingMacroName =
251 Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());
252
253 for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
254 I != E; ++I) {
255 const SourceLocation &TokenLoc = *I;
256 if (TokenLoc.isMacroID()) {
257 const StringRef MacroName =
258 Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());
259
260 if (MaybeSurroundingMacroName != MacroName) {
261 // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
262 // for types that change size so we must look for multiple prefixes.
263 if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
264 return MacroName;
265
266 const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
267 const OptionalFileEntryRef MaybeFileEntry =
268 SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
269 if (!MaybeFileEntry)
270 return MacroName;
271
272 HeaderSearch &HS = PP.getHeaderSearchInfo();
273 // Check if the file is a system header
274 if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
275 llvm::sys::path::filename(MaybeFileEntry->getName()) !=
276 "inttypes.h")
277 return MacroName;
278 }
279 }
280 }
281 return std::nullopt;
282}
283
284void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
285 std::string &FormatSpec) {
286 const ConversionSpecifier::Kind ArgKind =
287 FS.getConversionSpecifier().getKind();
288
289 // We only care about alignment if a field width is specified
290 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
291 if (ArgKind == ConversionSpecifier::sArg) {
292 // Strings are left-aligned by default with std::format, so we only
293 // need to emit an alignment if this one needs to be right aligned.
294 if (!FS.isLeftJustified())
295 FormatSpec.push_back('>');
296 } else {
297 // Numbers are right-aligned by default with std::format, so we only
298 // need to emit an alignment if this one needs to be left aligned.
299 if (FS.isLeftJustified())
300 FormatSpec.push_back('<');
301 }
302 }
303}
304
305void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
306 std::string &FormatSpec) {
307 const ConversionSpecifier Spec = FS.getConversionSpecifier();
308
309 // Ignore on something that isn't numeric. For printf it's would be a
310 // compile-time warning but ignored at runtime, but for std::format it
311 // ought to be a compile-time error.
312 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
313 // + is preferred to ' '
314 if (FS.hasPlusPrefix())
315 FormatSpec.push_back('+');
316 else if (FS.hasSpacePrefix())
317 FormatSpec.push_back(' ');
318 }
319}
320
321void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
322 std::string &FormatSpec) {
323 if (FS.hasAlternativeForm()) {
324 switch (FS.getConversionSpecifier().getKind()) {
325 case ConversionSpecifier::Kind::aArg:
326 case ConversionSpecifier::Kind::AArg:
327 case ConversionSpecifier::Kind::eArg:
328 case ConversionSpecifier::Kind::EArg:
329 case ConversionSpecifier::Kind::fArg:
330 case ConversionSpecifier::Kind::FArg:
331 case ConversionSpecifier::Kind::gArg:
332 case ConversionSpecifier::Kind::GArg:
333 case ConversionSpecifier::Kind::xArg:
334 case ConversionSpecifier::Kind::XArg:
335 case ConversionSpecifier::Kind::oArg:
336 FormatSpec.push_back('#');
337 break;
338 default:
339 // Alternative forms don't exist for other argument kinds
340 break;
341 }
342 }
343}
344
345void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
346 std::string &FormatSpec) {
347 {
348 const OptionalAmount FieldWidth = FS.getFieldWidth();
349 switch (FieldWidth.getHowSpecified()) {
350 case OptionalAmount::NotSpecified:
351 break;
352 case OptionalAmount::Constant:
353 FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
354 break;
355 case OptionalAmount::Arg:
356 FormatSpec.push_back('{');
357 if (FieldWidth.usesPositionalArg()) {
358 // std::format argument identifiers are zero-based, whereas printf
359 // ones are one based.
360 assert(FieldWidth.getPositionalArgIndex() > 0U);
361 FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
362 }
363 FormatSpec.push_back('}');
364 break;
365 case OptionalAmount::Invalid:
366 break;
367 }
368 }
369}
370
371void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
372 std::string &FormatSpec) {
373 const OptionalAmount FieldPrecision = FS.getPrecision();
374 switch (FieldPrecision.getHowSpecified()) {
375 case OptionalAmount::NotSpecified:
376 break;
377 case OptionalAmount::Constant:
378 FormatSpec.push_back('.');
379 FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
380 break;
381 case OptionalAmount::Arg:
382 FormatSpec.push_back('.');
383 FormatSpec.push_back('{');
384 if (FieldPrecision.usesPositionalArg()) {
385 // std::format argument identifiers are zero-based, whereas printf
386 // ones are one based.
387 assert(FieldPrecision.getPositionalArgIndex() > 0U);
388 FormatSpec.append(
389 llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
390 }
391 FormatSpec.push_back('}');
392 break;
393 case OptionalAmount::Invalid:
394 break;
395 }
396}
397
398void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
399 unsigned ArgCount = 0;
400 const OptionalAmount FieldWidth = FS.getFieldWidth();
401 const OptionalAmount FieldPrecision = FS.getPrecision();
402
403 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
404 !FieldWidth.usesPositionalArg())
405 ++ArgCount;
406 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
407 !FieldPrecision.usesPositionalArg())
408 ++ArgCount;
409
410 if (ArgCount)
411 ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
412}
413
414void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
415 const Expr *Arg) {
416 // If the argument is the result of a call to std::string::c_str() or
417 // data() with a return type of char then we can remove that call and
418 // pass the std::string directly. We don't want to do so if the return
419 // type is not a char pointer (though it's unlikely that such code would
420 // compile without warnings anyway.) See RedundantStringCStrCheck.
421
422 if (!StringCStrCallExprMatcher) {
423 // Lazily create the matcher
424 const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
425 hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
426 const auto StringExpr = expr(
427 anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
428
429 StringCStrCallExprMatcher =
430 cxxMemberCallExpr(
431 on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
432 callee(cxxMethodDecl(hasAnyName("c_str", "data"),
433 returns(pointerType(pointee(isRealChar()))))))
434 .bind("call");
435 }
436
437 auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
438 if (CStrMatches.size() == 1)
439 ArgCStrRemovals.push_back(CStrMatches.front());
440 else if (Arg->getType()->isPointerType()) {
441 const QualType Pointee = Arg->getType()->getPointeeType();
442 // printf is happy to print signed char and unsigned char strings, but
443 // std::format only likes char strings.
444 if (Pointee->isCharType() && !isRealCharType(Pointee))
445 ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
446 }
447}
448
449bool FormatStringConverter::emitIntegerArgument(
450 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
451 std::string &FormatSpec) {
452 const clang::QualType &ArgType = Arg->getType();
453 if (ArgType->isBooleanType()) {
454 // std::format will print bool as either "true" or "false" by default,
455 // but printf prints them as "0" or "1". Be compatible with printf by
456 // requesting decimal output.
457 FormatSpec.push_back('d');
458 } else if (ArgType->isEnumeralType()) {
459 // std::format will try to find a specialization to print the enum
460 // (and probably fail), whereas printf would have just expected it to
461 // be passed as its underlying type. However, printf will have forced
462 // the signedness based on the format string, so we need to do the
463 // same.
464 if (const auto *ED = ArgType->getAsEnumDecl()) {
465 if (const std::optional<std::string> MaybeCastType =
466 castTypeForArgument(ArgKind, ED->getIntegerType()))
467 ArgFixes.emplace_back(
468 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
469 else
470 return conversionNotPossible(
471 (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
472 .str());
473 }
474 } else if (CastMismatchedIntegerTypes &&
475 !isMatchingSignedness(ArgKind, ArgType)) {
476 // printf will happily print an unsigned type as signed if told to.
477 // Even -Wformat doesn't warn for this. std::format will format as
478 // unsigned unless we cast it.
479 if (const std::optional<std::string> MaybeCastType =
480 castTypeForArgument(ArgKind, ArgType))
481 ArgFixes.emplace_back(
482 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
483 else
484 return conversionNotPossible(
485 (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
486 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
487 : "signed") +
488 " integer type to match format"
489 " specifier and StrictMode is enabled")
490 .str());
491 } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
492 // Only specify integer if the argument is of a different type
493 FormatSpec.push_back('d');
494 }
495 return true;
496}
497
498/// Append the corresponding standard format string type fragment to FormatSpec,
499/// and store any argument fixes for later application.
500/// @returns true on success, false on failure
501bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
502 std::string &FormatSpec) {
503 const ConversionSpecifier::Kind ArgKind =
504 FS.getConversionSpecifier().getKind();
505 switch (ArgKind) {
506 case ConversionSpecifier::Kind::sArg:
507 emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
508 break;
509 case ConversionSpecifier::Kind::cArg:
510 // The type must be "c" to get a character unless the type is exactly
511 // char (whether that be signed or unsigned for the target.)
512 if (!isRealCharType(Arg->getType()))
513 FormatSpec.push_back('c');
514 break;
515 case ConversionSpecifier::Kind::dArg:
516 case ConversionSpecifier::Kind::iArg:
517 case ConversionSpecifier::Kind::uArg:
518 if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
519 FormatSpec))
520 return false;
521 break;
522 case ConversionSpecifier::Kind::pArg: {
523 const clang::QualType &ArgType = Arg->getType();
524 // std::format knows how to format void pointers and nullptrs
525 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
526 ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
527 "static_cast<const void *>(");
528 break;
529 }
530 case ConversionSpecifier::Kind::xArg:
531 FormatSpec.push_back('x');
532 break;
533 case ConversionSpecifier::Kind::XArg:
534 FormatSpec.push_back('X');
535 break;
536 case ConversionSpecifier::Kind::oArg:
537 FormatSpec.push_back('o');
538 break;
539 case ConversionSpecifier::Kind::aArg:
540 FormatSpec.push_back('a');
541 break;
542 case ConversionSpecifier::Kind::AArg:
543 FormatSpec.push_back('A');
544 break;
545 case ConversionSpecifier::Kind::eArg:
546 FormatSpec.push_back('e');
547 break;
548 case ConversionSpecifier::Kind::EArg:
549 FormatSpec.push_back('E');
550 break;
551 case ConversionSpecifier::Kind::fArg:
552 FormatSpec.push_back('f');
553 break;
554 case ConversionSpecifier::Kind::FArg:
555 FormatSpec.push_back('F');
556 break;
557 case ConversionSpecifier::Kind::gArg:
558 FormatSpec.push_back('g');
559 break;
560 case ConversionSpecifier::Kind::GArg:
561 FormatSpec.push_back('G');
562 break;
563 default:
564 // Something we don't understand
565 return conversionNotPossible((Twine("argument ") +
566 Twine(FS.getArgIndex() + ArgsOffset) +
567 " has an unsupported format specifier")
568 .str());
569 }
570
571 return true;
572}
573
574/// Append the standard format string equivalent of the passed PrintfSpecifier
575/// to StandardFormatString and store any argument fixes for later application.
576/// @returns true on success, false on failure
577bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
578 const Expr *Arg,
579 std::string &StandardFormatString) {
580 // The specifier must have an associated argument
581 assert(FS.consumesDataArgument());
582
583 StandardFormatString.push_back('{');
584
585 if (FS.usesPositionalArg()) {
586 // std::format argument identifiers are zero-based, whereas printf ones
587 // are one based.
588 assert(FS.getPositionalArgIndex() > 0U);
589 StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
590 }
591
592 // std::format format argument parts to potentially emit:
593 // [[fill]align][sign]["#"]["0"][width]["."precision][type]
594 std::string FormatSpec;
595
596 // printf doesn't support specifying the fill character - it's always a
597 // space, so we never need to generate one.
598
599 emitAlignment(FS, FormatSpec);
600 emitSign(FS, FormatSpec);
601 emitAlternativeForm(FS, FormatSpec);
602
603 if (FS.hasLeadingZeros())
604 FormatSpec.push_back('0');
605
606 emitFieldWidth(FS, FormatSpec);
607 emitPrecision(FS, FormatSpec);
608 maybeRotateArguments(FS);
609
610 if (!emitType(FS, Arg, FormatSpec))
611 return false;
612
613 if (!FormatSpec.empty()) {
614 StandardFormatString.push_back(':');
615 StandardFormatString.append(FormatSpec);
616 }
617
618 StandardFormatString.push_back('}');
619 return true;
620}
621
622/// Called for each format specifier by ParsePrintfString.
623bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
624 const char *StartSpecifier,
625 unsigned SpecifierLen,
626 const TargetInfo &Target) {
627
628 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
629 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
630
631 // Everything before the specifier needs copying verbatim
632 assert(StartSpecifierPos >= PrintfFormatStringPos);
633
634 appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
635 StartSpecifierPos - PrintfFormatStringPos));
636
637 const ConversionSpecifier::Kind ArgKind =
638 FS.getConversionSpecifier().getKind();
639
640 // Skip over specifier
641 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
642 assert(PrintfFormatStringPos <= PrintfFormatString.size());
643
644 FormatStringNeededRewriting = true;
645
646 if (ArgKind == ConversionSpecifier::Kind::nArg) {
647 // std::print doesn't do the equivalent of %n
648 return conversionNotPossible("'%n' is not supported in format string");
649 }
650
651 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
652 // std::print doesn't support %m. In theory we could insert a
653 // strerror(errno) parameter (assuming that libc has a thread-safe
654 // implementation, which glibc does), but that would require keeping track
655 // of the input and output parameter indices for position arguments too.
656 return conversionNotPossible("'%m' is not supported in format string");
657 }
658
659 if (ArgKind == ConversionSpecifier::PercentArg) {
660 StandardFormatString.push_back('%');
661 return true;
662 }
663
664 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
665 if (ArgIndex >= NumArgs) {
666 // Argument index out of range. Give up.
667 return conversionNotPossible(
668 (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
669 .str());
670 }
671
672 return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
673 StandardFormatString);
674}
675
676/// Called at the very end just before applying fixes to capture the last part
677/// of the format string.
678void FormatStringConverter::finalizeFormatText() {
679 appendFormatText(
680 StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
681 PrintfFormatString.size() - PrintfFormatStringPos));
682 PrintfFormatStringPos = PrintfFormatString.size();
683
684 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
685 // than to std::println("Hello\r");
686 // Use StringRef until C++20 std::string::ends_with() is available.
687 const auto StandardFormatStringRef = StringRef(StandardFormatString);
688 if (Config.AllowTrailingNewlineRemoval &&
689 StandardFormatStringRef.ends_with("\\n") &&
690 !StandardFormatStringRef.ends_with("\\\\n") &&
691 !StandardFormatStringRef.ends_with("\\r\\n")) {
692 UsePrintNewlineFunction = true;
693 FormatStringNeededRewriting = true;
694 StandardFormatString.erase(StandardFormatString.end() - 2,
695 StandardFormatString.end());
696 }
697
698 StandardFormatString.push_back('\"');
699}
700
701/// Append literal parts of the format text, reinstating escapes as required.
702void FormatStringConverter::appendFormatText(const StringRef Text) {
703 for (const char Ch : Text) {
704 if (Ch == '\a')
705 StandardFormatString += "\\a";
706 else if (Ch == '\b')
707 StandardFormatString += "\\b";
708 else if (Ch == '\f')
709 StandardFormatString += "\\f";
710 else if (Ch == '\n')
711 StandardFormatString += "\\n";
712 else if (Ch == '\r')
713 StandardFormatString += "\\r";
714 else if (Ch == '\t')
715 StandardFormatString += "\\t";
716 else if (Ch == '\v')
717 StandardFormatString += "\\v";
718 else if (Ch == '\"')
719 StandardFormatString += "\\\"";
720 else if (Ch == '\\')
721 StandardFormatString += "\\\\";
722 else if (Ch == '{') {
723 StandardFormatString += "{{";
724 FormatStringNeededRewriting = true;
725 } else if (Ch == '}') {
726 StandardFormatString += "}}";
727 FormatStringNeededRewriting = true;
728 } else if (Ch < 32) {
729 StandardFormatString += "\\x";
730 StandardFormatString += llvm::hexdigit(Ch >> 4, true);
731 StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
732 } else
733 StandardFormatString += Ch;
734 }
735}
736
737static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
738 ASTContext &Context) {
739 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
740 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
741 const bool Arrow = Member->isArrow();
742 return Arrow ? utils::fixit::formatDereference(*Arg, Context)
743 : tooling::fixit::getText(*Arg, Context).str();
744}
745
746/// Called by the check when it is ready to apply the fixes.
747void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
748 SourceManager &SM) {
749 if (FormatStringNeededRewriting) {
750 Diag << FixItHint::CreateReplacement(
751 CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
752 FormatExpr->getEndLoc()),
753 StandardFormatString);
754 }
755
756 // ArgCount is one less than the number of arguments to be rotated.
757 for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
758 assert(ValueArgIndex < NumArgs);
759 assert(ValueArgIndex > ArgCount);
760
761 // First move the value argument to the right place. But if there's a
762 // pending c_str() removal then we must do that at the same time.
763 if (const auto CStrRemovalMatch =
764 std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
765 [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
766 const BoundNodes &Match) {
767 // This c_str() removal corresponds to the argument
768 // being moved if they start at the same location.
769 const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
770 return ArgStartPos == CStrArg->getBeginLoc();
771 });
772 CStrRemovalMatch != ArgCStrRemovals.end()) {
773 const std::string ArgText =
774 withoutCStrReplacement(*CStrRemovalMatch, *Context);
775 assert(!ArgText.empty());
776
777 Diag << FixItHint::CreateReplacement(
778 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
779
780 // That c_str() removal is now dealt with, so we don't need to do it again
781 ArgCStrRemovals.erase(CStrRemovalMatch);
782 } else
783 Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
784 *Args[ValueArgIndex], *Context);
785
786 // Now shift down the field width and precision (if either are present) to
787 // accommodate it.
788 for (size_t Offset = 0; Offset < ArgCount; ++Offset)
789 Diag << tooling::fixit::createReplacement(
790 *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
791 *Context);
792
793 // Now we need to modify the ArgFix index too so that we fix the right
794 // argument. We don't need to care about the width and precision indices
795 // since they never need fixing.
796 for (auto &ArgFix : ArgFixes) {
797 if (ArgFix.ArgIndex == ValueArgIndex)
798 ArgFix.ArgIndex = ValueArgIndex - ArgCount;
799 }
800 }
801
802 for (const auto &[ArgIndex, Replacement] : ArgFixes) {
803 const SourceLocation AfterOtherSide =
804 Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts)
805 ->getLocation();
806
807 Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
808 Replacement, true)
809 << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
810 }
811
812 for (const auto &Match : ArgCStrRemovals) {
813 const auto *Call = Match.getNodeAs<CallExpr>("call");
814 const std::string ArgText = withoutCStrReplacement(Match, *Context);
815 if (!ArgText.empty())
816 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
817 }
818}
819} // namespace clang::tidy::utils
Declaration of the FormatStringConverter class which is used to convert printf format strings to C++ ...
void applyFixes(DiagnosticBuilder &Diag, SourceManager &SM)
Called by the check when it is ready to apply the fixes.
clang::analyze_format_string::ConversionSpecifier ConversionSpecifier
FormatStringConverter(ASTContext *Context, const CallExpr *Call, unsigned FormatArgOffset, Configuration Config, const LangOptions &LO, SourceManager &SM, Preprocessor &PP)
std::vector< std::string > match(const SymbolIndex &I, const FuzzyFindRequest &Req, bool *Incomplete)
std::string formatDereference(const Expr &ExprNode, const ASTContext &Context)
static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, ASTContext &Context)
static std::optional< std::string > getCorrespondingSignedTypeName(const clang::QualType &QT)
If possible, return the text name of the signed type that corresponds to the passed integer type.
static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode)
static bool isRealCharType(const clang::QualType &Ty)
Is the passed type the actual "char" type, whether that be signed or unsigned, rather than explicit s...
static std::optional< std::string > castTypeForArgument(ConversionSpecifier::Kind ArgKind, const clang::QualType &QT)
static std::optional< std::string > getCorrespondingUnsignedTypeName(const clang::QualType &QT)
If possible, return the text name of the unsigned type that corresponds to the passed integer type.
static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, const clang::QualType &ArgType)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static constexpr const char FuncDecl[]