clang-tools 23.0.0git
FormatStringConverter.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implementation of the FormatStringConverter class which is used to convert
11/// printf format strings to C++ std::formatter format strings.
12///
13//===----------------------------------------------------------------------===//
14
17#include "../utils/LexerUtils.h"
18#include "clang/AST/Expr.h"
19#include "clang/ASTMatchers/ASTMatchFinder.h"
20#include "clang/Basic/LangOptions.h"
21#include "clang/Lex/Lexer.h"
22#include "clang/Lex/Preprocessor.h"
23#include "clang/Tooling/FixIt.h"
24#include "llvm/ADT/StringExtras.h"
25#include "llvm/Support/Debug.h"
26
27using namespace clang::ast_matchers;
28using namespace clang::analyze_printf;
29
30namespace clang::tidy::utils {
31using clang::analyze_format_string::ConversionSpecifier;
32
33/// Is the passed type the actual "char" type, whether that be signed or
34/// unsigned, rather than explicit signed char or unsigned char types.
35static bool isRealCharType(const QualType &Ty) {
36 using namespace clang;
37 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
38 if (const auto *BT = dyn_cast<BuiltinType>(DesugaredType))
39 return (BT->getKind() == BuiltinType::Char_U ||
40 BT->getKind() == BuiltinType::Char_S);
41 return false;
42}
43
44/// If possible, return the text name of the signed type that corresponds to the
45/// passed integer type. If the passed type is already signed then its name is
46/// just returned. Only supports BuiltinTypes.
47static std::optional<std::string>
48getCorrespondingSignedTypeName(const QualType &QT) {
49 using namespace clang;
50 const auto UQT = QT.getUnqualifiedType();
51 if (const auto *BT = dyn_cast<BuiltinType>(UQT)) {
52 switch (BT->getKind()) {
53 case BuiltinType::UChar:
54 case BuiltinType::Char_U:
55 case BuiltinType::SChar:
56 case BuiltinType::Char_S:
57 return "signed char";
58 case BuiltinType::UShort:
59 case BuiltinType::Short:
60 return "short";
61 case BuiltinType::UInt:
62 case BuiltinType::Int:
63 return "int";
64 case BuiltinType::ULong:
65 case BuiltinType::Long:
66 return "long";
67 case BuiltinType::ULongLong:
68 case BuiltinType::LongLong:
69 return "long long";
70 default:
71 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
72 << QT.getAsString() << "'\n";
73 return std::nullopt;
74 }
75 }
76
77 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
78 // if the argument type does.
79 const std::string TypeName = UQT.getAsString();
80 StringRef SimplifiedTypeName{TypeName};
81 const bool InStd = SimplifiedTypeName.consume_front("std::");
82 const StringRef Prefix = InStd ? "std::" : "";
83
84 if (SimplifiedTypeName.starts_with("uint") &&
85 SimplifiedTypeName.ends_with("_t"))
86 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
87
88 if (SimplifiedTypeName == "size_t")
89 return (Twine(Prefix) + "ssize_t").str();
90
91 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
92 << UQT.getAsString() << "'\n";
93 return std::nullopt;
94}
95
96/// If possible, return the text name of the unsigned type that corresponds to
97/// the passed integer type. If the passed type is already unsigned then its
98/// name is just returned. Only supports BuiltinTypes.
99static std::optional<std::string>
101 using namespace clang;
102 const auto UQT = QT.getUnqualifiedType();
103 if (const auto *BT = dyn_cast<BuiltinType>(UQT)) {
104 switch (BT->getKind()) {
105 case BuiltinType::SChar:
106 case BuiltinType::Char_S:
107 case BuiltinType::UChar:
108 case BuiltinType::Char_U:
109 return "unsigned char";
110 case BuiltinType::Short:
111 case BuiltinType::UShort:
112 return "unsigned short";
113 case BuiltinType::Int:
114 case BuiltinType::UInt:
115 return "unsigned int";
116 case BuiltinType::Long:
117 case BuiltinType::ULong:
118 return "unsigned long";
119 case BuiltinType::LongLong:
120 case BuiltinType::ULongLong:
121 return "unsigned long long";
122 default:
123 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
124 << UQT.getAsString() << "'\n";
125 return std::nullopt;
126 }
127 }
128
129 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
130 // if the argument type does.
131 const std::string TypeName = UQT.getAsString();
132 StringRef SimplifiedTypeName{TypeName};
133 const bool InStd = SimplifiedTypeName.consume_front("std::");
134 const StringRef Prefix = InStd ? "std::" : "";
135
136 if (SimplifiedTypeName.starts_with("int") &&
137 SimplifiedTypeName.ends_with("_t"))
138 return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
139
140 if (SimplifiedTypeName == "ssize_t")
141 return (Twine(Prefix) + "size_t").str();
142 if (SimplifiedTypeName == "ptrdiff_t")
143 return (Twine(Prefix) + "size_t").str();
144
145 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
146 << UQT.getAsString() << "'\n";
147 return std::nullopt;
148}
149
150static std::optional<std::string>
151castTypeForArgument(ConversionSpecifier::Kind ArgKind, const QualType &QT) {
152 if (ArgKind == ConversionSpecifier::Kind::uArg)
155}
156
157static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
158 const QualType &ArgType) {
159 if (const auto *BT = dyn_cast<BuiltinType>(ArgType)) {
160 // Unadorned char never matches any expected signedness since it
161 // could be signed or unsigned.
162 const auto ArgTypeKind = BT->getKind();
163 if (ArgTypeKind == BuiltinType::Char_U ||
164 ArgTypeKind == BuiltinType::Char_S)
165 return false;
166 }
167
168 if (ArgKind == ConversionSpecifier::Kind::uArg)
169 return ArgType->isUnsignedIntegerType();
170 return ArgType->isSignedIntegerType();
171}
172
173namespace {
174AST_MATCHER(QualType, isRealChar) { return utils::isRealCharType(Node); }
175} // namespace
176
177static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
178 /// For printf-style functions, the signedness of the type printed is
179 /// indicated by the corresponding type in the format string.
180 /// std::print will determine the signedness from the type of the
181 /// argument. This means that it is necessary to generate a cast in
182 /// StrictMode to ensure that the exact behaviour is maintained.
183 /// However, for templated functions like absl::PrintF and
184 /// fmt::printf, the signedness of the type printed is also taken from
185 /// the actual argument like std::print, so such casts are never
186 /// necessary. printf-style functions are variadic, whereas templated
187 /// ones aren't, so we can use that to distinguish between the two
188 /// cases.
189 if (StrictMode) {
190 const FunctionDecl *FuncDecl = Call->getDirectCallee();
191 assert(FuncDecl);
192 return FuncDecl->isVariadic();
193 }
194 return false;
195}
196
198 ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
199 const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
200 Preprocessor &PP)
201 : Context(ContextIn), Config(ConfigIn),
202 CastMismatchedIntegerTypes(
203 castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
204 Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
205 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
206 assert(ArgsOffset <= NumArgs);
207 FormatExpr = dyn_cast<StringLiteral>(
208 Args[FormatArgOffset]->IgnoreUnlessSpelledInSource());
209
210 assert(FormatExpr && FormatExpr->isOrdinary());
211
212 if (const std::optional<StringRef> MaybeMacroName =
213 formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
214 MaybeMacroName) {
215 conversionNotPossible(
216 ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
217 .str());
218 return;
219 }
220
221 PrintfFormatString = FormatExpr->getString();
222
223 // Assume that the output will be approximately the same size as the input,
224 // but perhaps with a few escapes expanded.
225 const size_t EstimatedGrowth = 8;
226 StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
227 StandardFormatString.push_back('\"');
228
229 const bool IsFreeBsdkPrintf = false;
230
231 using clang::analyze_format_string::ParsePrintfString;
232 ParsePrintfString(*this, PrintfFormatString.data(),
233 PrintfFormatString.data() + PrintfFormatString.size(),
234 LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
235 finalizeFormatText();
236}
237
238std::optional<StringRef>
239FormatStringConverter::formatStringContainsUnreplaceableMacro(
240 const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
241 Preprocessor &PP) {
242 // If a macro invocation surrounds the entire call then we don't want that to
243 // inhibit conversion. The whole format string will appear to come from that
244 // macro, as will the function call.
245 std::optional<StringRef> MaybeSurroundingMacroName;
246 if (const SourceLocation BeginCallLoc = Call->getBeginLoc();
247 BeginCallLoc.isMacroID())
248 MaybeSurroundingMacroName =
249 Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());
250
251 for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
252 I != E; ++I) {
253 const SourceLocation &TokenLoc = *I;
254 if (TokenLoc.isMacroID()) {
255 const StringRef MacroName =
256 Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());
257
258 if (MaybeSurroundingMacroName != MacroName) {
259 // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
260 // for types that change size so we must look for multiple prefixes.
261 if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
262 return MacroName;
263
264 const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
265 const OptionalFileEntryRef MaybeFileEntry =
266 SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
267 if (!MaybeFileEntry)
268 return MacroName;
269
270 HeaderSearch &HS = PP.getHeaderSearchInfo();
271 // Check if the file is a system header
272 if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
273 llvm::sys::path::filename(MaybeFileEntry->getName()) !=
274 "inttypes.h")
275 return MacroName;
276 }
277 }
278 }
279 return std::nullopt;
280}
281
282void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
283 std::string &FormatSpec) {
284 const ConversionSpecifier::Kind ArgKind =
285 FS.getConversionSpecifier().getKind();
286
287 // We only care about alignment if a field width is specified
288 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
289 if (ArgKind == ConversionSpecifier::sArg) {
290 // Strings are left-aligned by default with std::format, so we only
291 // need to emit an alignment if this one needs to be right aligned.
292 if (!FS.isLeftJustified())
293 FormatSpec.push_back('>');
294 } else {
295 // Numbers are right-aligned by default with std::format, so we only
296 // need to emit an alignment if this one needs to be left aligned.
297 if (FS.isLeftJustified())
298 FormatSpec.push_back('<');
299 }
300 }
301}
302
303void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
304 std::string &FormatSpec) {
305 const ConversionSpecifier Spec = FS.getConversionSpecifier();
306
307 // Ignore on something that isn't numeric. For printf it's would be a
308 // compile-time warning but ignored at runtime, but for std::format it
309 // ought to be a compile-time error.
310 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
311 // + is preferred to ' '
312 if (FS.hasPlusPrefix())
313 FormatSpec.push_back('+');
314 else if (FS.hasSpacePrefix())
315 FormatSpec.push_back(' ');
316 }
317}
318
319void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
320 std::string &FormatSpec) {
321 if (FS.hasAlternativeForm()) {
322 switch (FS.getConversionSpecifier().getKind()) {
323 case ConversionSpecifier::Kind::aArg:
324 case ConversionSpecifier::Kind::AArg:
325 case ConversionSpecifier::Kind::eArg:
326 case ConversionSpecifier::Kind::EArg:
327 case ConversionSpecifier::Kind::fArg:
328 case ConversionSpecifier::Kind::FArg:
329 case ConversionSpecifier::Kind::gArg:
330 case ConversionSpecifier::Kind::GArg:
331 case ConversionSpecifier::Kind::xArg:
332 case ConversionSpecifier::Kind::XArg:
333 case ConversionSpecifier::Kind::oArg:
334 FormatSpec.push_back('#');
335 break;
336 default:
337 // Alternative forms don't exist for other argument kinds
338 break;
339 }
340 }
341}
342
343void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
344 std::string &FormatSpec) {
345 {
346 const OptionalAmount FieldWidth = FS.getFieldWidth();
347 switch (FieldWidth.getHowSpecified()) {
348 case OptionalAmount::NotSpecified:
349 break;
350 case OptionalAmount::Constant:
351 FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
352 break;
353 case OptionalAmount::Arg:
354 FormatSpec.push_back('{');
355 if (FieldWidth.usesPositionalArg()) {
356 // std::format argument identifiers are zero-based, whereas printf
357 // ones are one based.
358 assert(FieldWidth.getPositionalArgIndex() > 0U);
359 FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
360 }
361 FormatSpec.push_back('}');
362 break;
363 case OptionalAmount::Invalid:
364 break;
365 }
366 }
367}
368
369void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
370 std::string &FormatSpec) {
371 const OptionalAmount FieldPrecision = FS.getPrecision();
372 switch (FieldPrecision.getHowSpecified()) {
373 case OptionalAmount::NotSpecified:
374 break;
375 case OptionalAmount::Constant:
376 FormatSpec.push_back('.');
377 FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
378 break;
379 case OptionalAmount::Arg:
380 FormatSpec.push_back('.');
381 FormatSpec.push_back('{');
382 if (FieldPrecision.usesPositionalArg()) {
383 // std::format argument identifiers are zero-based, whereas printf
384 // ones are one based.
385 assert(FieldPrecision.getPositionalArgIndex() > 0U);
386 FormatSpec.append(
387 llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
388 }
389 FormatSpec.push_back('}');
390 break;
391 case OptionalAmount::Invalid:
392 break;
393 }
394}
395
396void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
397 unsigned ArgCount = 0;
398 const OptionalAmount FieldWidth = FS.getFieldWidth();
399 const OptionalAmount FieldPrecision = FS.getPrecision();
400
401 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
402 !FieldWidth.usesPositionalArg())
403 ++ArgCount;
404 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
405 !FieldPrecision.usesPositionalArg())
406 ++ArgCount;
407
408 if (ArgCount)
409 ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
410}
411
412void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
413 const Expr *Arg) {
414 // If the argument is the result of a call to std::string::c_str() or
415 // data() with a return type of char then we can remove that call and
416 // pass the std::string directly. We don't want to do so if the return
417 // type is not a char pointer (though it's unlikely that such code would
418 // compile without warnings anyway.) See RedundantStringCStrCheck.
419
420 if (!StringCStrCallExprMatcher) {
421 // Lazily create the matcher
422 const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
423 hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
424 const auto StringExpr = expr(
425 anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
426
427 StringCStrCallExprMatcher =
428 cxxMemberCallExpr(
429 on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
430 callee(cxxMethodDecl(hasAnyName("c_str", "data"),
431 returns(pointerType(pointee(isRealChar()))))))
432 .bind("call");
433 }
434
435 auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
436 if (CStrMatches.size() == 1) {
437 ArgCStrRemovals.push_back(CStrMatches.front());
438 } else if (Arg->getType()->isPointerType()) {
439 const QualType Pointee = Arg->getType()->getPointeeType();
440 // printf is happy to print signed char and unsigned char strings, but
441 // std::format only likes char strings.
442 if (Pointee->isCharType() && !isRealCharType(Pointee))
443 ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
444 }
445}
446
447bool FormatStringConverter::emitIntegerArgument(
448 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
449 std::string &FormatSpec) {
450 const QualType &ArgType = Arg->getType();
451 if (ArgType->isBooleanType()) {
452 // std::format will print bool as either "true" or "false" by default,
453 // but printf prints them as "0" or "1". Be compatible with printf by
454 // requesting decimal output.
455 FormatSpec.push_back('d');
456 } else if (ArgType->isEnumeralType()) {
457 // std::format will try to find a specialization to print the enum
458 // (and probably fail), whereas printf would have just expected it to
459 // be passed as its underlying type. However, printf will have forced
460 // the signedness based on the format string, so we need to do the
461 // same.
462 if (const auto *ED = ArgType->getAsEnumDecl()) {
463 if (const std::optional<std::string> MaybeCastType =
464 castTypeForArgument(ArgKind, ED->getIntegerType()))
465 ArgFixes.emplace_back(
466 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
467 else
468 return conversionNotPossible(
469 (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
470 .str());
471 }
472 } else if (CastMismatchedIntegerTypes &&
473 !isMatchingSignedness(ArgKind, ArgType)) {
474 // printf will happily print an unsigned type as signed if told to.
475 // Even -Wformat doesn't warn for this. std::format will format as
476 // unsigned unless we cast it.
477 if (const std::optional<std::string> MaybeCastType =
478 castTypeForArgument(ArgKind, ArgType))
479 ArgFixes.emplace_back(
480 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
481 else
482 return conversionNotPossible(
483 (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
484 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
485 : "signed") +
486 " integer type to match format"
487 " specifier and StrictMode is enabled")
488 .str());
489 } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
490 // Only specify integer if the argument is of a different type
491 FormatSpec.push_back('d');
492 }
493 return true;
494}
495
496/// Append the corresponding standard format string type fragment to FormatSpec,
497/// and store any argument fixes for later application.
498/// @returns true on success, false on failure
499bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
500 std::string &FormatSpec) {
501 const ConversionSpecifier::Kind ArgKind =
502 FS.getConversionSpecifier().getKind();
503 switch (ArgKind) {
504 case ConversionSpecifier::Kind::sArg:
505 emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
506 break;
507 case ConversionSpecifier::Kind::cArg:
508 // The type must be "c" to get a character unless the type is exactly
509 // char (whether that be signed or unsigned for the target.)
510 if (!isRealCharType(Arg->getType()))
511 FormatSpec.push_back('c');
512 break;
513 case ConversionSpecifier::Kind::dArg:
514 case ConversionSpecifier::Kind::iArg:
515 case ConversionSpecifier::Kind::uArg:
516 if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
517 FormatSpec))
518 return false;
519 break;
520 case ConversionSpecifier::Kind::pArg: {
521 const QualType &ArgType = Arg->getType();
522 // std::format knows how to format void pointers and nullptrs
523 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
524 ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
525 "static_cast<const void *>(");
526 break;
527 }
528 case ConversionSpecifier::Kind::xArg:
529 FormatSpec.push_back('x');
530 break;
531 case ConversionSpecifier::Kind::XArg:
532 FormatSpec.push_back('X');
533 break;
534 case ConversionSpecifier::Kind::oArg:
535 FormatSpec.push_back('o');
536 break;
537 case ConversionSpecifier::Kind::aArg:
538 FormatSpec.push_back('a');
539 break;
540 case ConversionSpecifier::Kind::AArg:
541 FormatSpec.push_back('A');
542 break;
543 case ConversionSpecifier::Kind::eArg:
544 FormatSpec.push_back('e');
545 break;
546 case ConversionSpecifier::Kind::EArg:
547 FormatSpec.push_back('E');
548 break;
549 case ConversionSpecifier::Kind::fArg:
550 FormatSpec.push_back('f');
551 break;
552 case ConversionSpecifier::Kind::FArg:
553 FormatSpec.push_back('F');
554 break;
555 case ConversionSpecifier::Kind::gArg:
556 FormatSpec.push_back('g');
557 break;
558 case ConversionSpecifier::Kind::GArg:
559 FormatSpec.push_back('G');
560 break;
561 default:
562 // Something we don't understand
563 return conversionNotPossible((Twine("argument ") +
564 Twine(FS.getArgIndex() + ArgsOffset) +
565 " has an unsupported format specifier")
566 .str());
567 }
568
569 return true;
570}
571
572/// Append the standard format string equivalent of the passed PrintfSpecifier
573/// to StandardFormatString and store any argument fixes for later application.
574/// @returns true on success, false on failure
575bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
576 const Expr *Arg,
577 std::string &StandardFormatString) {
578 // The specifier must have an associated argument
579 assert(FS.consumesDataArgument());
580
581 StandardFormatString.push_back('{');
582
583 if (FS.usesPositionalArg()) {
584 // std::format argument identifiers are zero-based, whereas printf ones
585 // are one based.
586 assert(FS.getPositionalArgIndex() > 0U);
587 StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
588 }
589
590 // std::format format argument parts to potentially emit:
591 // [[fill]align][sign]["#"]["0"][width]["."precision][type]
592 std::string FormatSpec;
593
594 // printf doesn't support specifying the fill character - it's always a
595 // space, so we never need to generate one.
596
597 emitAlignment(FS, FormatSpec);
598 emitSign(FS, FormatSpec);
599 emitAlternativeForm(FS, FormatSpec);
600
601 if (FS.hasLeadingZeros())
602 FormatSpec.push_back('0');
603
604 emitFieldWidth(FS, FormatSpec);
605 emitPrecision(FS, FormatSpec);
606 maybeRotateArguments(FS);
607
608 if (!emitType(FS, Arg, FormatSpec))
609 return false;
610
611 if (!FormatSpec.empty()) {
612 StandardFormatString.push_back(':');
613 StandardFormatString.append(FormatSpec);
614 }
615
616 StandardFormatString.push_back('}');
617 return true;
618}
619
620/// Called for each format specifier by ParsePrintfString.
621bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
622 const char *StartSpecifier,
623 unsigned SpecifierLen,
624 const TargetInfo &Target) {
625 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
626 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
627
628 // Everything before the specifier needs copying verbatim
629 assert(StartSpecifierPos >= PrintfFormatStringPos);
630
631 appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
632 StartSpecifierPos - PrintfFormatStringPos));
633
634 const ConversionSpecifier::Kind ArgKind =
635 FS.getConversionSpecifier().getKind();
636
637 // Skip over specifier
638 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
639 assert(PrintfFormatStringPos <= PrintfFormatString.size());
640
641 FormatStringNeededRewriting = true;
642
643 if (ArgKind == ConversionSpecifier::Kind::nArg) {
644 // std::print doesn't do the equivalent of %n
645 return conversionNotPossible("'%n' is not supported in format string");
646 }
647
648 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
649 // std::print doesn't support %m. In theory we could insert a
650 // strerror(errno) parameter (assuming that libc has a thread-safe
651 // implementation, which glibc does), but that would require keeping track
652 // of the input and output parameter indices for position arguments too.
653 return conversionNotPossible("'%m' is not supported in format string");
654 }
655
656 if (ArgKind == ConversionSpecifier::PercentArg) {
657 StandardFormatString.push_back('%');
658 return true;
659 }
660
661 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
662 if (ArgIndex >= NumArgs) {
663 // Argument index out of range. Give up.
664 return conversionNotPossible(
665 (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
666 .str());
667 }
668
669 return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
670 StandardFormatString);
671}
672
673/// Called at the very end just before applying fixes to capture the last part
674/// of the format string.
675void FormatStringConverter::finalizeFormatText() {
676 appendFormatText(
677 StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
678 PrintfFormatString.size() - PrintfFormatStringPos));
679 PrintfFormatStringPos = PrintfFormatString.size();
680
681 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
682 // than to std::println("Hello\r");
683 // Use StringRef until C++20 std::string::ends_with() is available.
684 const auto StandardFormatStringRef = StringRef(StandardFormatString);
685 if (Config.AllowTrailingNewlineRemoval &&
686 StandardFormatStringRef.ends_with("\\n") &&
687 !StandardFormatStringRef.ends_with("\\\\n") &&
688 !StandardFormatStringRef.ends_with("\\r\\n")) {
689 UsePrintNewlineFunction = true;
690 FormatStringNeededRewriting = true;
691 StandardFormatString.erase(StandardFormatString.end() - 2,
692 StandardFormatString.end());
693 }
694
695 StandardFormatString.push_back('\"');
696}
697
698/// Append literal parts of the format text, reinstating escapes as required.
699void FormatStringConverter::appendFormatText(const StringRef Text) {
700 for (const char Ch : Text) {
701 const auto UCh = static_cast<unsigned char>(Ch);
702 if (Ch == '\a') {
703 StandardFormatString += "\\a";
704 } else if (Ch == '\b') {
705 StandardFormatString += "\\b";
706 } else if (Ch == '\f') {
707 StandardFormatString += "\\f";
708 } else if (Ch == '\n') {
709 StandardFormatString += "\\n";
710 } else if (Ch == '\r') {
711 StandardFormatString += "\\r";
712 } else if (Ch == '\t') {
713 StandardFormatString += "\\t";
714 } else if (Ch == '\v') {
715 StandardFormatString += "\\v";
716 } else if (Ch == '\"') {
717 StandardFormatString += "\\\"";
718 } else if (Ch == '\\') {
719 StandardFormatString += "\\\\";
720 } else if (Ch == '{') {
721 StandardFormatString += "{{";
722 FormatStringNeededRewriting = true;
723 } else if (Ch == '}') {
724 StandardFormatString += "}}";
725 FormatStringNeededRewriting = true;
726 } else if (UCh < 32) {
727 StandardFormatString += "\\x";
728 StandardFormatString += llvm::hexdigit(UCh >> 4, true);
729 StandardFormatString += llvm::hexdigit(UCh & 0xf, true);
730 } else {
731 StandardFormatString += Ch;
732 }
733 }
734}
735
736static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
737 ASTContext &Context) {
738 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
739 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
740 const bool Arrow = Member->isArrow();
741 return Arrow ? utils::fixit::formatDereference(*Arg, Context)
742 : tooling::fixit::getText(*Arg, Context).str();
743}
744
745/// Called by the check when it is ready to apply the fixes.
746void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
747 SourceManager &SM) {
748 if (FormatStringNeededRewriting) {
749 Diag << FixItHint::CreateReplacement(
750 CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
751 FormatExpr->getEndLoc()),
752 StandardFormatString);
753 }
754
755 // ArgCount is one less than the number of arguments to be rotated.
756 for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
757 assert(ValueArgIndex < NumArgs);
758 assert(ValueArgIndex > ArgCount);
759
760 // First move the value argument to the right place. But if there's a
761 // pending c_str() removal then we must do that at the same time.
762 if (const auto CStrRemovalMatch =
763 llvm::find_if(ArgCStrRemovals,
764 [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
765 const BoundNodes &Match) {
766 // This c_str() removal corresponds to the argument
767 // being moved if they start at the same location.
768 const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
769 return ArgStartPos == CStrArg->getBeginLoc();
770 });
771 CStrRemovalMatch != ArgCStrRemovals.end()) {
772 const std::string ArgText =
773 withoutCStrReplacement(*CStrRemovalMatch, *Context);
774 assert(!ArgText.empty());
775
776 Diag << FixItHint::CreateReplacement(
777 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
778
779 // That c_str() removal is now dealt with, so we don't need to do it again
780 ArgCStrRemovals.erase(CStrRemovalMatch);
781 } else {
782 Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
783 *Args[ValueArgIndex], *Context);
784 }
785
786 // Now shift down the field width and precision (if either are present) to
787 // accommodate it.
788 for (size_t Offset = 0; Offset < ArgCount; ++Offset)
789 Diag << tooling::fixit::createReplacement(
790 *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
791 *Context);
792
793 // Now we need to modify the ArgFix index too so that we fix the right
794 // argument. We don't need to care about the width and precision indices
795 // since they never need fixing.
796 for (auto &ArgFix : ArgFixes)
797 if (ArgFix.ArgIndex == ValueArgIndex)
798 ArgFix.ArgIndex = ValueArgIndex - ArgCount;
799 }
800
801 for (const auto &[ArgIndex, Replacement] : ArgFixes) {
802 const std::optional<Token> NextToken =
803 utils::lexer::findNextTokenSkippingComments(Args[ArgIndex]->getEndLoc(),
804 SM, LangOpts);
805 if (!NextToken)
806 continue;
807 const SourceLocation AfterOtherSide = NextToken->getLocation();
808
809 Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
810 Replacement, true)
811 << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
812 }
813
814 for (const auto &Match : ArgCStrRemovals) {
815 const auto *Call = Match.getNodeAs<CallExpr>("call");
816 const std::string ArgText = withoutCStrReplacement(Match, *Context);
817 if (!ArgText.empty())
818 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
819 }
820}
821} // namespace clang::tidy::utils
Declaration of the FormatStringConverter class which is used to convert printf format strings to C++ ...
void applyFixes(DiagnosticBuilder &Diag, SourceManager &SM)
Called by the check when it is ready to apply the fixes.
clang::analyze_format_string::ConversionSpecifier ConversionSpecifier
FormatStringConverter(ASTContext *Context, const CallExpr *Call, unsigned FormatArgOffset, Configuration Config, const LangOptions &LO, SourceManager &SM, Preprocessor &PP)
std::vector< std::string > match(const SymbolIndex &I, const FuzzyFindRequest &Req, bool *Incomplete)
std::string formatDereference(const Expr &ExprNode, const ASTContext &Context)
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition LexerUtils.h:106
static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, ASTContext &Context)
static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, const QualType &ArgType)
static bool isRealCharType(const QualType &Ty)
Is the passed type the actual "char" type, whether that be signed or unsigned, rather than explicit s...
static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode)
static std::optional< std::string > castTypeForArgument(ConversionSpecifier::Kind ArgKind, const QualType &QT)
static std::optional< std::string > getCorrespondingSignedTypeName(const QualType &QT)
If possible, return the text name of the signed type that corresponds to the passed integer type.
static std::optional< std::string > getCorrespondingUnsignedTypeName(const QualType &QT)
If possible, return the text name of the unsigned type that corresponds to the passed integer type.
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static constexpr const char FuncDecl[]