clang-tools 22.0.0git
FormatStringConverter.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implementation of the FormatStringConverter class which is used to convert
11/// printf format strings to C++ std::formatter format strings.
12///
13//===----------------------------------------------------------------------===//
14
17#include "../utils/LexerUtils.h"
18#include "clang/AST/Expr.h"
19#include "clang/ASTMatchers/ASTMatchFinder.h"
20#include "clang/Basic/LangOptions.h"
21#include "clang/Lex/Lexer.h"
22#include "clang/Lex/Preprocessor.h"
23#include "clang/Tooling/FixIt.h"
24#include "llvm/ADT/StringExtras.h"
25#include "llvm/Support/Debug.h"
26
27using namespace clang::ast_matchers;
28using namespace clang::analyze_printf;
29
30namespace clang::tidy::utils {
31using clang::analyze_format_string::ConversionSpecifier;
32
33/// Is the passed type the actual "char" type, whether that be signed or
34/// unsigned, rather than explicit signed char or unsigned char types.
35static bool isRealCharType(const clang::QualType &Ty) {
36 using namespace clang;
37 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
38 if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
39 return (BT->getKind() == BuiltinType::Char_U ||
40 BT->getKind() == BuiltinType::Char_S);
41 return false;
42}
43
44/// If possible, return the text name of the signed type that corresponds to the
45/// passed integer type. If the passed type is already signed then its name is
46/// just returned. Only supports BuiltinTypes.
47static std::optional<std::string>
48getCorrespondingSignedTypeName(const clang::QualType &QT) {
49 using namespace clang;
50 const auto UQT = QT.getUnqualifiedType();
51 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
52 switch (BT->getKind()) {
53 case BuiltinType::UChar:
54 case BuiltinType::Char_U:
55 case BuiltinType::SChar:
56 case BuiltinType::Char_S:
57 return "signed char";
58 case BuiltinType::UShort:
59 case BuiltinType::Short:
60 return "short";
61 case BuiltinType::UInt:
62 case BuiltinType::Int:
63 return "int";
64 case BuiltinType::ULong:
65 case BuiltinType::Long:
66 return "long";
67 case BuiltinType::ULongLong:
68 case BuiltinType::LongLong:
69 return "long long";
70 default:
71 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
72 << QT.getAsString() << "'\n";
73 return std::nullopt;
74 }
75 }
76
77 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
78 // if the argument type does.
79 const std::string TypeName = UQT.getAsString();
80 StringRef SimplifiedTypeName{TypeName};
81 const bool InStd = SimplifiedTypeName.consume_front("std::");
82 const StringRef Prefix = InStd ? "std::" : "";
83
84 if (SimplifiedTypeName.starts_with("uint") &&
85 SimplifiedTypeName.ends_with("_t"))
86 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
87
88 if (SimplifiedTypeName == "size_t")
89 return (Twine(Prefix) + "ssize_t").str();
90
91 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
92 << UQT.getAsString() << "'\n";
93 return std::nullopt;
94}
95
96/// If possible, return the text name of the unsigned type that corresponds to
97/// the passed integer type. If the passed type is already unsigned then its
98/// name is just returned. Only supports BuiltinTypes.
99static std::optional<std::string>
100getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
101 using namespace clang;
102 const auto UQT = QT.getUnqualifiedType();
103 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
104 switch (BT->getKind()) {
105 case BuiltinType::SChar:
106 case BuiltinType::Char_S:
107 case BuiltinType::UChar:
108 case BuiltinType::Char_U:
109 return "unsigned char";
110 case BuiltinType::Short:
111 case BuiltinType::UShort:
112 return "unsigned short";
113 case BuiltinType::Int:
114 case BuiltinType::UInt:
115 return "unsigned int";
116 case BuiltinType::Long:
117 case BuiltinType::ULong:
118 return "unsigned long";
119 case BuiltinType::LongLong:
120 case BuiltinType::ULongLong:
121 return "unsigned long long";
122 default:
123 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
124 << UQT.getAsString() << "'\n";
125 return std::nullopt;
126 }
127 }
128
129 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
130 // if the argument type does.
131 const std::string TypeName = UQT.getAsString();
132 StringRef SimplifiedTypeName{TypeName};
133 const bool InStd = SimplifiedTypeName.consume_front("std::");
134 const StringRef Prefix = InStd ? "std::" : "";
135
136 if (SimplifiedTypeName.starts_with("int") &&
137 SimplifiedTypeName.ends_with("_t"))
138 return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
139
140 if (SimplifiedTypeName == "ssize_t")
141 return (Twine(Prefix) + "size_t").str();
142 if (SimplifiedTypeName == "ptrdiff_t")
143 return (Twine(Prefix) + "size_t").str();
144
145 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
146 << UQT.getAsString() << "'\n";
147 return std::nullopt;
148}
149
150static std::optional<std::string>
151castTypeForArgument(ConversionSpecifier::Kind ArgKind,
152 const clang::QualType &QT) {
153 if (ArgKind == ConversionSpecifier::Kind::uArg)
156}
157
158static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
159 const clang::QualType &ArgType) {
160 if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
161 // Unadorned char never matches any expected signedness since it
162 // could be signed or unsigned.
163 const auto ArgTypeKind = BT->getKind();
164 if (ArgTypeKind == BuiltinType::Char_U ||
165 ArgTypeKind == BuiltinType::Char_S)
166 return false;
167 }
168
169 if (ArgKind == ConversionSpecifier::Kind::uArg)
170 return ArgType->isUnsignedIntegerType();
171 return ArgType->isSignedIntegerType();
172}
173
174namespace {
175AST_MATCHER(clang::QualType, isRealChar) {
177}
178} // namespace
179
180static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
181 /// For printf-style functions, the signedness of the type printed is
182 /// indicated by the corresponding type in the format string.
183 /// std::print will determine the signedness from the type of the
184 /// argument. This means that it is necessary to generate a cast in
185 /// StrictMode to ensure that the exact behaviour is maintained.
186 /// However, for templated functions like absl::PrintF and
187 /// fmt::printf, the signedness of the type printed is also taken from
188 /// the actual argument like std::print, so such casts are never
189 /// necessary. printf-style functions are variadic, whereas templated
190 /// ones aren't, so we can use that to distinguish between the two
191 /// cases.
192 if (StrictMode) {
193 const FunctionDecl *FuncDecl = Call->getDirectCallee();
194 assert(FuncDecl);
195 return FuncDecl->isVariadic();
196 }
197 return false;
198}
199
201 ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
202 const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
203 Preprocessor &PP)
204 : Context(ContextIn), Config(ConfigIn),
205 CastMismatchedIntegerTypes(
206 castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
207 Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
208 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
209 assert(ArgsOffset <= NumArgs);
210 FormatExpr = llvm::dyn_cast<StringLiteral>(
211 Args[FormatArgOffset]->IgnoreUnlessSpelledInSource());
212
213 assert(FormatExpr && FormatExpr->isOrdinary());
214
215 if (const std::optional<StringRef> MaybeMacroName =
216 formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
217 MaybeMacroName) {
218 conversionNotPossible(
219 ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
220 .str());
221 return;
222 }
223
224 PrintfFormatString = FormatExpr->getString();
225
226 // Assume that the output will be approximately the same size as the input,
227 // but perhaps with a few escapes expanded.
228 const size_t EstimatedGrowth = 8;
229 StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
230 StandardFormatString.push_back('\"');
231
232 const bool IsFreeBsdkPrintf = false;
233
234 using clang::analyze_format_string::ParsePrintfString;
235 ParsePrintfString(*this, PrintfFormatString.data(),
236 PrintfFormatString.data() + PrintfFormatString.size(),
237 LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
238 finalizeFormatText();
239}
240
241std::optional<StringRef>
242FormatStringConverter::formatStringContainsUnreplaceableMacro(
243 const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
244 Preprocessor &PP) {
245 // If a macro invocation surrounds the entire call then we don't want that to
246 // inhibit conversion. The whole format string will appear to come from that
247 // macro, as will the function call.
248 std::optional<StringRef> MaybeSurroundingMacroName;
249 if (const SourceLocation BeginCallLoc = Call->getBeginLoc();
250 BeginCallLoc.isMacroID())
251 MaybeSurroundingMacroName =
252 Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());
253
254 for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
255 I != E; ++I) {
256 const SourceLocation &TokenLoc = *I;
257 if (TokenLoc.isMacroID()) {
258 const StringRef MacroName =
259 Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());
260
261 if (MaybeSurroundingMacroName != MacroName) {
262 // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
263 // for types that change size so we must look for multiple prefixes.
264 if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
265 return MacroName;
266
267 const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
268 const OptionalFileEntryRef MaybeFileEntry =
269 SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
270 if (!MaybeFileEntry)
271 return MacroName;
272
273 HeaderSearch &HS = PP.getHeaderSearchInfo();
274 // Check if the file is a system header
275 if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
276 llvm::sys::path::filename(MaybeFileEntry->getName()) !=
277 "inttypes.h")
278 return MacroName;
279 }
280 }
281 }
282 return std::nullopt;
283}
284
285void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
286 std::string &FormatSpec) {
287 const ConversionSpecifier::Kind ArgKind =
288 FS.getConversionSpecifier().getKind();
289
290 // We only care about alignment if a field width is specified
291 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
292 if (ArgKind == ConversionSpecifier::sArg) {
293 // Strings are left-aligned by default with std::format, so we only
294 // need to emit an alignment if this one needs to be right aligned.
295 if (!FS.isLeftJustified())
296 FormatSpec.push_back('>');
297 } else {
298 // Numbers are right-aligned by default with std::format, so we only
299 // need to emit an alignment if this one needs to be left aligned.
300 if (FS.isLeftJustified())
301 FormatSpec.push_back('<');
302 }
303 }
304}
305
306void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
307 std::string &FormatSpec) {
308 const ConversionSpecifier Spec = FS.getConversionSpecifier();
309
310 // Ignore on something that isn't numeric. For printf it's would be a
311 // compile-time warning but ignored at runtime, but for std::format it
312 // ought to be a compile-time error.
313 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
314 // + is preferred to ' '
315 if (FS.hasPlusPrefix())
316 FormatSpec.push_back('+');
317 else if (FS.hasSpacePrefix())
318 FormatSpec.push_back(' ');
319 }
320}
321
322void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
323 std::string &FormatSpec) {
324 if (FS.hasAlternativeForm()) {
325 switch (FS.getConversionSpecifier().getKind()) {
326 case ConversionSpecifier::Kind::aArg:
327 case ConversionSpecifier::Kind::AArg:
328 case ConversionSpecifier::Kind::eArg:
329 case ConversionSpecifier::Kind::EArg:
330 case ConversionSpecifier::Kind::fArg:
331 case ConversionSpecifier::Kind::FArg:
332 case ConversionSpecifier::Kind::gArg:
333 case ConversionSpecifier::Kind::GArg:
334 case ConversionSpecifier::Kind::xArg:
335 case ConversionSpecifier::Kind::XArg:
336 case ConversionSpecifier::Kind::oArg:
337 FormatSpec.push_back('#');
338 break;
339 default:
340 // Alternative forms don't exist for other argument kinds
341 break;
342 }
343 }
344}
345
346void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
347 std::string &FormatSpec) {
348 {
349 const OptionalAmount FieldWidth = FS.getFieldWidth();
350 switch (FieldWidth.getHowSpecified()) {
351 case OptionalAmount::NotSpecified:
352 break;
353 case OptionalAmount::Constant:
354 FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
355 break;
356 case OptionalAmount::Arg:
357 FormatSpec.push_back('{');
358 if (FieldWidth.usesPositionalArg()) {
359 // std::format argument identifiers are zero-based, whereas printf
360 // ones are one based.
361 assert(FieldWidth.getPositionalArgIndex() > 0U);
362 FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
363 }
364 FormatSpec.push_back('}');
365 break;
366 case OptionalAmount::Invalid:
367 break;
368 }
369 }
370}
371
372void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
373 std::string &FormatSpec) {
374 const OptionalAmount FieldPrecision = FS.getPrecision();
375 switch (FieldPrecision.getHowSpecified()) {
376 case OptionalAmount::NotSpecified:
377 break;
378 case OptionalAmount::Constant:
379 FormatSpec.push_back('.');
380 FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
381 break;
382 case OptionalAmount::Arg:
383 FormatSpec.push_back('.');
384 FormatSpec.push_back('{');
385 if (FieldPrecision.usesPositionalArg()) {
386 // std::format argument identifiers are zero-based, whereas printf
387 // ones are one based.
388 assert(FieldPrecision.getPositionalArgIndex() > 0U);
389 FormatSpec.append(
390 llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
391 }
392 FormatSpec.push_back('}');
393 break;
394 case OptionalAmount::Invalid:
395 break;
396 }
397}
398
399void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
400 unsigned ArgCount = 0;
401 const OptionalAmount FieldWidth = FS.getFieldWidth();
402 const OptionalAmount FieldPrecision = FS.getPrecision();
403
404 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
405 !FieldWidth.usesPositionalArg())
406 ++ArgCount;
407 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
408 !FieldPrecision.usesPositionalArg())
409 ++ArgCount;
410
411 if (ArgCount)
412 ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
413}
414
415void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
416 const Expr *Arg) {
417 // If the argument is the result of a call to std::string::c_str() or
418 // data() with a return type of char then we can remove that call and
419 // pass the std::string directly. We don't want to do so if the return
420 // type is not a char pointer (though it's unlikely that such code would
421 // compile without warnings anyway.) See RedundantStringCStrCheck.
422
423 if (!StringCStrCallExprMatcher) {
424 // Lazily create the matcher
425 const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
426 hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
427 const auto StringExpr = expr(
428 anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
429
430 StringCStrCallExprMatcher =
431 cxxMemberCallExpr(
432 on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
433 callee(cxxMethodDecl(hasAnyName("c_str", "data"),
434 returns(pointerType(pointee(isRealChar()))))))
435 .bind("call");
436 }
437
438 auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
439 if (CStrMatches.size() == 1)
440 ArgCStrRemovals.push_back(CStrMatches.front());
441 else if (Arg->getType()->isPointerType()) {
442 const QualType Pointee = Arg->getType()->getPointeeType();
443 // printf is happy to print signed char and unsigned char strings, but
444 // std::format only likes char strings.
445 if (Pointee->isCharType() && !isRealCharType(Pointee))
446 ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
447 }
448}
449
450bool FormatStringConverter::emitIntegerArgument(
451 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
452 std::string &FormatSpec) {
453 const clang::QualType &ArgType = Arg->getType();
454 if (ArgType->isBooleanType()) {
455 // std::format will print bool as either "true" or "false" by default,
456 // but printf prints them as "0" or "1". Be compatible with printf by
457 // requesting decimal output.
458 FormatSpec.push_back('d');
459 } else if (ArgType->isEnumeralType()) {
460 // std::format will try to find a specialization to print the enum
461 // (and probably fail), whereas printf would have just expected it to
462 // be passed as its underlying type. However, printf will have forced
463 // the signedness based on the format string, so we need to do the
464 // same.
465 if (const auto *ED = ArgType->getAsEnumDecl()) {
466 if (const std::optional<std::string> MaybeCastType =
467 castTypeForArgument(ArgKind, ED->getIntegerType()))
468 ArgFixes.emplace_back(
469 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
470 else
471 return conversionNotPossible(
472 (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
473 .str());
474 }
475 } else if (CastMismatchedIntegerTypes &&
476 !isMatchingSignedness(ArgKind, ArgType)) {
477 // printf will happily print an unsigned type as signed if told to.
478 // Even -Wformat doesn't warn for this. std::format will format as
479 // unsigned unless we cast it.
480 if (const std::optional<std::string> MaybeCastType =
481 castTypeForArgument(ArgKind, ArgType))
482 ArgFixes.emplace_back(
483 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
484 else
485 return conversionNotPossible(
486 (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
487 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
488 : "signed") +
489 " integer type to match format"
490 " specifier and StrictMode is enabled")
491 .str());
492 } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
493 // Only specify integer if the argument is of a different type
494 FormatSpec.push_back('d');
495 }
496 return true;
497}
498
499/// Append the corresponding standard format string type fragment to FormatSpec,
500/// and store any argument fixes for later application.
501/// @returns true on success, false on failure
502bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
503 std::string &FormatSpec) {
504 const ConversionSpecifier::Kind ArgKind =
505 FS.getConversionSpecifier().getKind();
506 switch (ArgKind) {
507 case ConversionSpecifier::Kind::sArg:
508 emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
509 break;
510 case ConversionSpecifier::Kind::cArg:
511 // The type must be "c" to get a character unless the type is exactly
512 // char (whether that be signed or unsigned for the target.)
513 if (!isRealCharType(Arg->getType()))
514 FormatSpec.push_back('c');
515 break;
516 case ConversionSpecifier::Kind::dArg:
517 case ConversionSpecifier::Kind::iArg:
518 case ConversionSpecifier::Kind::uArg:
519 if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
520 FormatSpec))
521 return false;
522 break;
523 case ConversionSpecifier::Kind::pArg: {
524 const clang::QualType &ArgType = Arg->getType();
525 // std::format knows how to format void pointers and nullptrs
526 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
527 ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
528 "static_cast<const void *>(");
529 break;
530 }
531 case ConversionSpecifier::Kind::xArg:
532 FormatSpec.push_back('x');
533 break;
534 case ConversionSpecifier::Kind::XArg:
535 FormatSpec.push_back('X');
536 break;
537 case ConversionSpecifier::Kind::oArg:
538 FormatSpec.push_back('o');
539 break;
540 case ConversionSpecifier::Kind::aArg:
541 FormatSpec.push_back('a');
542 break;
543 case ConversionSpecifier::Kind::AArg:
544 FormatSpec.push_back('A');
545 break;
546 case ConversionSpecifier::Kind::eArg:
547 FormatSpec.push_back('e');
548 break;
549 case ConversionSpecifier::Kind::EArg:
550 FormatSpec.push_back('E');
551 break;
552 case ConversionSpecifier::Kind::fArg:
553 FormatSpec.push_back('f');
554 break;
555 case ConversionSpecifier::Kind::FArg:
556 FormatSpec.push_back('F');
557 break;
558 case ConversionSpecifier::Kind::gArg:
559 FormatSpec.push_back('g');
560 break;
561 case ConversionSpecifier::Kind::GArg:
562 FormatSpec.push_back('G');
563 break;
564 default:
565 // Something we don't understand
566 return conversionNotPossible((Twine("argument ") +
567 Twine(FS.getArgIndex() + ArgsOffset) +
568 " has an unsupported format specifier")
569 .str());
570 }
571
572 return true;
573}
574
575/// Append the standard format string equivalent of the passed PrintfSpecifier
576/// to StandardFormatString and store any argument fixes for later application.
577/// @returns true on success, false on failure
578bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
579 const Expr *Arg,
580 std::string &StandardFormatString) {
581 // The specifier must have an associated argument
582 assert(FS.consumesDataArgument());
583
584 StandardFormatString.push_back('{');
585
586 if (FS.usesPositionalArg()) {
587 // std::format argument identifiers are zero-based, whereas printf ones
588 // are one based.
589 assert(FS.getPositionalArgIndex() > 0U);
590 StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
591 }
592
593 // std::format format argument parts to potentially emit:
594 // [[fill]align][sign]["#"]["0"][width]["."precision][type]
595 std::string FormatSpec;
596
597 // printf doesn't support specifying the fill character - it's always a
598 // space, so we never need to generate one.
599
600 emitAlignment(FS, FormatSpec);
601 emitSign(FS, FormatSpec);
602 emitAlternativeForm(FS, FormatSpec);
603
604 if (FS.hasLeadingZeros())
605 FormatSpec.push_back('0');
606
607 emitFieldWidth(FS, FormatSpec);
608 emitPrecision(FS, FormatSpec);
609 maybeRotateArguments(FS);
610
611 if (!emitType(FS, Arg, FormatSpec))
612 return false;
613
614 if (!FormatSpec.empty()) {
615 StandardFormatString.push_back(':');
616 StandardFormatString.append(FormatSpec);
617 }
618
619 StandardFormatString.push_back('}');
620 return true;
621}
622
623/// Called for each format specifier by ParsePrintfString.
624bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
625 const char *StartSpecifier,
626 unsigned SpecifierLen,
627 const TargetInfo &Target) {
628 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
629 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
630
631 // Everything before the specifier needs copying verbatim
632 assert(StartSpecifierPos >= PrintfFormatStringPos);
633
634 appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
635 StartSpecifierPos - PrintfFormatStringPos));
636
637 const ConversionSpecifier::Kind ArgKind =
638 FS.getConversionSpecifier().getKind();
639
640 // Skip over specifier
641 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
642 assert(PrintfFormatStringPos <= PrintfFormatString.size());
643
644 FormatStringNeededRewriting = true;
645
646 if (ArgKind == ConversionSpecifier::Kind::nArg) {
647 // std::print doesn't do the equivalent of %n
648 return conversionNotPossible("'%n' is not supported in format string");
649 }
650
651 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
652 // std::print doesn't support %m. In theory we could insert a
653 // strerror(errno) parameter (assuming that libc has a thread-safe
654 // implementation, which glibc does), but that would require keeping track
655 // of the input and output parameter indices for position arguments too.
656 return conversionNotPossible("'%m' is not supported in format string");
657 }
658
659 if (ArgKind == ConversionSpecifier::PercentArg) {
660 StandardFormatString.push_back('%');
661 return true;
662 }
663
664 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
665 if (ArgIndex >= NumArgs) {
666 // Argument index out of range. Give up.
667 return conversionNotPossible(
668 (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
669 .str());
670 }
671
672 return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
673 StandardFormatString);
674}
675
676/// Called at the very end just before applying fixes to capture the last part
677/// of the format string.
678void FormatStringConverter::finalizeFormatText() {
679 appendFormatText(
680 StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
681 PrintfFormatString.size() - PrintfFormatStringPos));
682 PrintfFormatStringPos = PrintfFormatString.size();
683
684 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
685 // than to std::println("Hello\r");
686 // Use StringRef until C++20 std::string::ends_with() is available.
687 const auto StandardFormatStringRef = StringRef(StandardFormatString);
688 if (Config.AllowTrailingNewlineRemoval &&
689 StandardFormatStringRef.ends_with("\\n") &&
690 !StandardFormatStringRef.ends_with("\\\\n") &&
691 !StandardFormatStringRef.ends_with("\\r\\n")) {
692 UsePrintNewlineFunction = true;
693 FormatStringNeededRewriting = true;
694 StandardFormatString.erase(StandardFormatString.end() - 2,
695 StandardFormatString.end());
696 }
697
698 StandardFormatString.push_back('\"');
699}
700
701/// Append literal parts of the format text, reinstating escapes as required.
702void FormatStringConverter::appendFormatText(const StringRef Text) {
703 for (const char Ch : Text) {
704 const auto UCh = static_cast<unsigned char>(Ch);
705 if (Ch == '\a')
706 StandardFormatString += "\\a";
707 else if (Ch == '\b')
708 StandardFormatString += "\\b";
709 else if (Ch == '\f')
710 StandardFormatString += "\\f";
711 else if (Ch == '\n')
712 StandardFormatString += "\\n";
713 else if (Ch == '\r')
714 StandardFormatString += "\\r";
715 else if (Ch == '\t')
716 StandardFormatString += "\\t";
717 else if (Ch == '\v')
718 StandardFormatString += "\\v";
719 else if (Ch == '\"')
720 StandardFormatString += "\\\"";
721 else if (Ch == '\\')
722 StandardFormatString += "\\\\";
723 else if (Ch == '{') {
724 StandardFormatString += "{{";
725 FormatStringNeededRewriting = true;
726 } else if (Ch == '}') {
727 StandardFormatString += "}}";
728 FormatStringNeededRewriting = true;
729 } else if (UCh < 32) {
730 StandardFormatString += "\\x";
731 StandardFormatString += llvm::hexdigit(UCh >> 4, true);
732 StandardFormatString += llvm::hexdigit(UCh & 0xf, true);
733 } else
734 StandardFormatString += Ch;
735 }
736}
737
738static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
739 ASTContext &Context) {
740 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
741 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
742 const bool Arrow = Member->isArrow();
743 return Arrow ? utils::fixit::formatDereference(*Arg, Context)
744 : tooling::fixit::getText(*Arg, Context).str();
745}
746
747/// Called by the check when it is ready to apply the fixes.
748void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
749 SourceManager &SM) {
750 if (FormatStringNeededRewriting) {
751 Diag << FixItHint::CreateReplacement(
752 CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
753 FormatExpr->getEndLoc()),
754 StandardFormatString);
755 }
756
757 // ArgCount is one less than the number of arguments to be rotated.
758 for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
759 assert(ValueArgIndex < NumArgs);
760 assert(ValueArgIndex > ArgCount);
761
762 // First move the value argument to the right place. But if there's a
763 // pending c_str() removal then we must do that at the same time.
764 if (const auto CStrRemovalMatch =
765 std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
766 [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
767 const BoundNodes &Match) {
768 // This c_str() removal corresponds to the argument
769 // being moved if they start at the same location.
770 const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
771 return ArgStartPos == CStrArg->getBeginLoc();
772 });
773 CStrRemovalMatch != ArgCStrRemovals.end()) {
774 const std::string ArgText =
775 withoutCStrReplacement(*CStrRemovalMatch, *Context);
776 assert(!ArgText.empty());
777
778 Diag << FixItHint::CreateReplacement(
779 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
780
781 // That c_str() removal is now dealt with, so we don't need to do it again
782 ArgCStrRemovals.erase(CStrRemovalMatch);
783 } else
784 Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
785 *Args[ValueArgIndex], *Context);
786
787 // Now shift down the field width and precision (if either are present) to
788 // accommodate it.
789 for (size_t Offset = 0; Offset < ArgCount; ++Offset)
790 Diag << tooling::fixit::createReplacement(
791 *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
792 *Context);
793
794 // Now we need to modify the ArgFix index too so that we fix the right
795 // argument. We don't need to care about the width and precision indices
796 // since they never need fixing.
797 for (auto &ArgFix : ArgFixes)
798 if (ArgFix.ArgIndex == ValueArgIndex)
799 ArgFix.ArgIndex = ValueArgIndex - ArgCount;
800 }
801
802 for (const auto &[ArgIndex, Replacement] : ArgFixes) {
803 const SourceLocation AfterOtherSide =
804 utils::lexer::findNextTokenSkippingComments(Args[ArgIndex]->getEndLoc(),
805 SM, LangOpts)
806 ->getLocation();
807
808 Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
809 Replacement, true)
810 << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
811 }
812
813 for (const auto &Match : ArgCStrRemovals) {
814 const auto *Call = Match.getNodeAs<CallExpr>("call");
815 const std::string ArgText = withoutCStrReplacement(Match, *Context);
816 if (!ArgText.empty())
817 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
818 }
819}
820} // namespace clang::tidy::utils
Declaration of the FormatStringConverter class which is used to convert printf format strings to C++ ...
void applyFixes(DiagnosticBuilder &Diag, SourceManager &SM)
Called by the check when it is ready to apply the fixes.
clang::analyze_format_string::ConversionSpecifier ConversionSpecifier
FormatStringConverter(ASTContext *Context, const CallExpr *Call, unsigned FormatArgOffset, Configuration Config, const LangOptions &LO, SourceManager &SM, Preprocessor &PP)
std::vector< std::string > match(const SymbolIndex &I, const FuzzyFindRequest &Req, bool *Incomplete)
std::string formatDereference(const Expr &ExprNode, const ASTContext &Context)
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition LexerUtils.h:101
static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, ASTContext &Context)
static std::optional< std::string > getCorrespondingSignedTypeName(const clang::QualType &QT)
If possible, return the text name of the signed type that corresponds to the passed integer type.
static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode)
static bool isRealCharType(const clang::QualType &Ty)
Is the passed type the actual "char" type, whether that be signed or unsigned, rather than explicit s...
static std::optional< std::string > castTypeForArgument(ConversionSpecifier::Kind ArgKind, const clang::QualType &QT)
static std::optional< std::string > getCorrespondingUnsignedTypeName(const clang::QualType &QT)
If possible, return the text name of the unsigned type that corresponds to the passed integer type.
static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, const clang::QualType &ArgType)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static constexpr const char FuncDecl[]