clang-tools 22.0.0git
FormatStringConverter.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implementation of the FormatStringConverter class which is used to convert
11/// printf format strings to C++ std::formatter format strings.
12///
13//===----------------------------------------------------------------------===//
14
17#include "clang/AST/Expr.h"
18#include "clang/ASTMatchers/ASTMatchFinder.h"
19#include "clang/Basic/LangOptions.h"
20#include "clang/Lex/Lexer.h"
21#include "clang/Lex/Preprocessor.h"
22#include "clang/Tooling/FixIt.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/Support/Debug.h"
25
26using namespace clang::ast_matchers;
27using namespace clang::analyze_printf;
28
29namespace clang::tidy::utils {
30using clang::analyze_format_string::ConversionSpecifier;
31
32/// Is the passed type the actual "char" type, whether that be signed or
33/// unsigned, rather than explicit signed char or unsigned char types.
34static bool isRealCharType(const clang::QualType &Ty) {
35 using namespace clang;
36 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
37 if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
38 return (BT->getKind() == BuiltinType::Char_U ||
39 BT->getKind() == BuiltinType::Char_S);
40 return false;
41}
42
43/// If possible, return the text name of the signed type that corresponds to the
44/// passed integer type. If the passed type is already signed then its name is
45/// just returned. Only supports BuiltinTypes.
46static std::optional<std::string>
47getCorrespondingSignedTypeName(const clang::QualType &QT) {
48 using namespace clang;
49 const auto UQT = QT.getUnqualifiedType();
50 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
51 switch (BT->getKind()) {
52 case BuiltinType::UChar:
53 case BuiltinType::Char_U:
54 case BuiltinType::SChar:
55 case BuiltinType::Char_S:
56 return "signed char";
57 case BuiltinType::UShort:
58 case BuiltinType::Short:
59 return "short";
60 case BuiltinType::UInt:
61 case BuiltinType::Int:
62 return "int";
63 case BuiltinType::ULong:
64 case BuiltinType::Long:
65 return "long";
66 case BuiltinType::ULongLong:
67 case BuiltinType::LongLong:
68 return "long long";
69 default:
70 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
71 << QT.getAsString() << "'\n";
72 return std::nullopt;
73 }
74 }
75
76 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
77 // if the argument type does.
78 const std::string TypeName = UQT.getAsString();
79 StringRef SimplifiedTypeName{TypeName};
80 const bool InStd = SimplifiedTypeName.consume_front("std::");
81 const StringRef Prefix = InStd ? "std::" : "";
82
83 if (SimplifiedTypeName.starts_with("uint") &&
84 SimplifiedTypeName.ends_with("_t"))
85 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
86
87 if (SimplifiedTypeName == "size_t")
88 return (Twine(Prefix) + "ssize_t").str();
89
90 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
91 << UQT.getAsString() << "'\n";
92 return std::nullopt;
93}
94
95/// If possible, return the text name of the unsigned type that corresponds to
96/// the passed integer type. If the passed type is already unsigned then its
97/// name is just returned. Only supports BuiltinTypes.
98static std::optional<std::string>
99getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
100 using namespace clang;
101 const auto UQT = QT.getUnqualifiedType();
102 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
103 switch (BT->getKind()) {
104 case BuiltinType::SChar:
105 case BuiltinType::Char_S:
106 case BuiltinType::UChar:
107 case BuiltinType::Char_U:
108 return "unsigned char";
109 case BuiltinType::Short:
110 case BuiltinType::UShort:
111 return "unsigned short";
112 case BuiltinType::Int:
113 case BuiltinType::UInt:
114 return "unsigned int";
115 case BuiltinType::Long:
116 case BuiltinType::ULong:
117 return "unsigned long";
118 case BuiltinType::LongLong:
119 case BuiltinType::ULongLong:
120 return "unsigned long long";
121 default:
122 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
123 << UQT.getAsString() << "'\n";
124 return std::nullopt;
125 }
126 }
127
128 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
129 // if the argument type does.
130 const std::string TypeName = UQT.getAsString();
131 StringRef SimplifiedTypeName{TypeName};
132 const bool InStd = SimplifiedTypeName.consume_front("std::");
133 const StringRef Prefix = InStd ? "std::" : "";
134
135 if (SimplifiedTypeName.starts_with("int") &&
136 SimplifiedTypeName.ends_with("_t"))
137 return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
138
139 if (SimplifiedTypeName == "ssize_t")
140 return (Twine(Prefix) + "size_t").str();
141 if (SimplifiedTypeName == "ptrdiff_t")
142 return (Twine(Prefix) + "size_t").str();
143
144 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
145 << UQT.getAsString() << "'\n";
146 return std::nullopt;
147}
148
149static std::optional<std::string>
150castTypeForArgument(ConversionSpecifier::Kind ArgKind,
151 const clang::QualType &QT) {
152 if (ArgKind == ConversionSpecifier::Kind::uArg)
155}
156
157static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
158 const clang::QualType &ArgType) {
159 if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
160 // Unadorned char never matches any expected signedness since it
161 // could be signed or unsigned.
162 const auto ArgTypeKind = BT->getKind();
163 if (ArgTypeKind == BuiltinType::Char_U ||
164 ArgTypeKind == BuiltinType::Char_S)
165 return false;
166 }
167
168 if (ArgKind == ConversionSpecifier::Kind::uArg)
169 return ArgType->isUnsignedIntegerType();
170 return ArgType->isSignedIntegerType();
171}
172
173namespace {
174AST_MATCHER(clang::QualType, isRealChar) {
176}
177} // namespace
178
179static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
180 /// For printf-style functions, the signedness of the type printed is
181 /// indicated by the corresponding type in the format string.
182 /// std::print will determine the signedness from the type of the
183 /// argument. This means that it is necessary to generate a cast in
184 /// StrictMode to ensure that the exact behaviour is maintained.
185 /// However, for templated functions like absl::PrintF and
186 /// fmt::printf, the signedness of the type printed is also taken from
187 /// the actual argument like std::print, so such casts are never
188 /// necessary. printf-style functions are variadic, whereas templated
189 /// ones aren't, so we can use that to distinguish between the two
190 /// cases.
191 if (StrictMode) {
192 const FunctionDecl *FuncDecl = Call->getDirectCallee();
193 assert(FuncDecl);
194 return FuncDecl->isVariadic();
195 }
196 return false;
197}
198
200 ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
201 const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
202 Preprocessor &PP)
203 : Context(ContextIn), Config(ConfigIn),
204 CastMismatchedIntegerTypes(
205 castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
206 Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
207 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
208 assert(ArgsOffset <= NumArgs);
209 FormatExpr = llvm::dyn_cast<StringLiteral>(
210 Args[FormatArgOffset]->IgnoreUnlessSpelledInSource());
211
212 assert(FormatExpr && FormatExpr->isOrdinary());
213
214 if (const std::optional<StringRef> MaybeMacroName =
215 formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
216 MaybeMacroName) {
217 conversionNotPossible(
218 ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
219 .str());
220 return;
221 }
222
223 PrintfFormatString = FormatExpr->getString();
224
225 // Assume that the output will be approximately the same size as the input,
226 // but perhaps with a few escapes expanded.
227 const size_t EstimatedGrowth = 8;
228 StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
229 StandardFormatString.push_back('\"');
230
231 const bool IsFreeBsdkPrintf = false;
232
233 using clang::analyze_format_string::ParsePrintfString;
234 ParsePrintfString(*this, PrintfFormatString.data(),
235 PrintfFormatString.data() + PrintfFormatString.size(),
236 LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
237 finalizeFormatText();
238}
239
240std::optional<StringRef>
241FormatStringConverter::formatStringContainsUnreplaceableMacro(
242 const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
243 Preprocessor &PP) {
244 // If a macro invocation surrounds the entire call then we don't want that to
245 // inhibit conversion. The whole format string will appear to come from that
246 // macro, as will the function call.
247 std::optional<StringRef> MaybeSurroundingMacroName;
248 if (SourceLocation BeginCallLoc = Call->getBeginLoc();
249 BeginCallLoc.isMacroID())
250 MaybeSurroundingMacroName =
251 Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());
252
253 for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
254 I != E; ++I) {
255 const SourceLocation &TokenLoc = *I;
256 if (TokenLoc.isMacroID()) {
257 const StringRef MacroName =
258 Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());
259
260 if (MaybeSurroundingMacroName != MacroName) {
261 // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
262 // for types that change size so we must look for multiple prefixes.
263 if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
264 return MacroName;
265
266 const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
267 const OptionalFileEntryRef MaybeFileEntry =
268 SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
269 if (!MaybeFileEntry)
270 return MacroName;
271
272 HeaderSearch &HS = PP.getHeaderSearchInfo();
273 // Check if the file is a system header
274 if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
275 llvm::sys::path::filename(MaybeFileEntry->getName()) !=
276 "inttypes.h")
277 return MacroName;
278 }
279 }
280 }
281 return std::nullopt;
282}
283
284void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
285 std::string &FormatSpec) {
286 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
287
288 // We only care about alignment if a field width is specified
289 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
290 if (ArgKind == ConversionSpecifier::sArg) {
291 // Strings are left-aligned by default with std::format, so we only
292 // need to emit an alignment if this one needs to be right aligned.
293 if (!FS.isLeftJustified())
294 FormatSpec.push_back('>');
295 } else {
296 // Numbers are right-aligned by default with std::format, so we only
297 // need to emit an alignment if this one needs to be left aligned.
298 if (FS.isLeftJustified())
299 FormatSpec.push_back('<');
300 }
301 }
302}
303
304void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
305 std::string &FormatSpec) {
306 const ConversionSpecifier Spec = FS.getConversionSpecifier();
307
308 // Ignore on something that isn't numeric. For printf it's would be a
309 // compile-time warning but ignored at runtime, but for std::format it
310 // ought to be a compile-time error.
311 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
312 // + is preferred to ' '
313 if (FS.hasPlusPrefix())
314 FormatSpec.push_back('+');
315 else if (FS.hasSpacePrefix())
316 FormatSpec.push_back(' ');
317 }
318}
319
320void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
321 std::string &FormatSpec) {
322 if (FS.hasAlternativeForm()) {
323 switch (FS.getConversionSpecifier().getKind()) {
324 case ConversionSpecifier::Kind::aArg:
325 case ConversionSpecifier::Kind::AArg:
326 case ConversionSpecifier::Kind::eArg:
327 case ConversionSpecifier::Kind::EArg:
328 case ConversionSpecifier::Kind::fArg:
329 case ConversionSpecifier::Kind::FArg:
330 case ConversionSpecifier::Kind::gArg:
331 case ConversionSpecifier::Kind::GArg:
332 case ConversionSpecifier::Kind::xArg:
333 case ConversionSpecifier::Kind::XArg:
334 case ConversionSpecifier::Kind::oArg:
335 FormatSpec.push_back('#');
336 break;
337 default:
338 // Alternative forms don't exist for other argument kinds
339 break;
340 }
341 }
342}
343
344void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
345 std::string &FormatSpec) {
346 {
347 const OptionalAmount FieldWidth = FS.getFieldWidth();
348 switch (FieldWidth.getHowSpecified()) {
349 case OptionalAmount::NotSpecified:
350 break;
351 case OptionalAmount::Constant:
352 FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
353 break;
354 case OptionalAmount::Arg:
355 FormatSpec.push_back('{');
356 if (FieldWidth.usesPositionalArg()) {
357 // std::format argument identifiers are zero-based, whereas printf
358 // ones are one based.
359 assert(FieldWidth.getPositionalArgIndex() > 0U);
360 FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
361 }
362 FormatSpec.push_back('}');
363 break;
364 case OptionalAmount::Invalid:
365 break;
366 }
367 }
368}
369
370void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
371 std::string &FormatSpec) {
372 const OptionalAmount FieldPrecision = FS.getPrecision();
373 switch (FieldPrecision.getHowSpecified()) {
374 case OptionalAmount::NotSpecified:
375 break;
376 case OptionalAmount::Constant:
377 FormatSpec.push_back('.');
378 FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
379 break;
380 case OptionalAmount::Arg:
381 FormatSpec.push_back('.');
382 FormatSpec.push_back('{');
383 if (FieldPrecision.usesPositionalArg()) {
384 // std::format argument identifiers are zero-based, whereas printf
385 // ones are one based.
386 assert(FieldPrecision.getPositionalArgIndex() > 0U);
387 FormatSpec.append(
388 llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
389 }
390 FormatSpec.push_back('}');
391 break;
392 case OptionalAmount::Invalid:
393 break;
394 }
395}
396
397void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
398 unsigned ArgCount = 0;
399 const OptionalAmount FieldWidth = FS.getFieldWidth();
400 const OptionalAmount FieldPrecision = FS.getPrecision();
401
402 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
403 !FieldWidth.usesPositionalArg())
404 ++ArgCount;
405 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
406 !FieldPrecision.usesPositionalArg())
407 ++ArgCount;
408
409 if (ArgCount)
410 ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
411}
412
413void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
414 const Expr *Arg) {
415 // If the argument is the result of a call to std::string::c_str() or
416 // data() with a return type of char then we can remove that call and
417 // pass the std::string directly. We don't want to do so if the return
418 // type is not a char pointer (though it's unlikely that such code would
419 // compile without warnings anyway.) See RedundantStringCStrCheck.
420
421 if (!StringCStrCallExprMatcher) {
422 // Lazily create the matcher
423 const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
424 hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
425 const auto StringExpr = expr(
426 anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
427
428 StringCStrCallExprMatcher =
429 cxxMemberCallExpr(
430 on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
431 callee(cxxMethodDecl(hasAnyName("c_str", "data"),
432 returns(pointerType(pointee(isRealChar()))))))
433 .bind("call");
434 }
435
436 auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
437 if (CStrMatches.size() == 1)
438 ArgCStrRemovals.push_back(CStrMatches.front());
439 else if (Arg->getType()->isPointerType()) {
440 const QualType Pointee = Arg->getType()->getPointeeType();
441 // printf is happy to print signed char and unsigned char strings, but
442 // std::format only likes char strings.
443 if (Pointee->isCharType() && !isRealCharType(Pointee))
444 ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
445 }
446}
447
448bool FormatStringConverter::emitIntegerArgument(
449 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
450 std::string &FormatSpec) {
451 const clang::QualType &ArgType = Arg->getType();
452 if (ArgType->isBooleanType()) {
453 // std::format will print bool as either "true" or "false" by default,
454 // but printf prints them as "0" or "1". Be compatible with printf by
455 // requesting decimal output.
456 FormatSpec.push_back('d');
457 } else if (ArgType->isEnumeralType()) {
458 // std::format will try to find a specialization to print the enum
459 // (and probably fail), whereas printf would have just expected it to
460 // be passed as its underlying type. However, printf will have forced
461 // the signedness based on the format string, so we need to do the
462 // same.
463 if (const auto *ED = ArgType->getAsEnumDecl()) {
464 if (const std::optional<std::string> MaybeCastType =
465 castTypeForArgument(ArgKind, ED->getIntegerType()))
466 ArgFixes.emplace_back(
467 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
468 else
469 return conversionNotPossible(
470 (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
471 .str());
472 }
473 } else if (CastMismatchedIntegerTypes &&
474 !isMatchingSignedness(ArgKind, ArgType)) {
475 // printf will happily print an unsigned type as signed if told to.
476 // Even -Wformat doesn't warn for this. std::format will format as
477 // unsigned unless we cast it.
478 if (const std::optional<std::string> MaybeCastType =
479 castTypeForArgument(ArgKind, ArgType))
480 ArgFixes.emplace_back(
481 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
482 else
483 return conversionNotPossible(
484 (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
485 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
486 : "signed") +
487 " integer type to match format"
488 " specifier and StrictMode is enabled")
489 .str());
490 } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
491 // Only specify integer if the argument is of a different type
492 FormatSpec.push_back('d');
493 }
494 return true;
495}
496
497/// Append the corresponding standard format string type fragment to FormatSpec,
498/// and store any argument fixes for later application.
499/// @returns true on success, false on failure
500bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
501 std::string &FormatSpec) {
502 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
503 switch (ArgKind) {
504 case ConversionSpecifier::Kind::sArg:
505 emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
506 break;
507 case ConversionSpecifier::Kind::cArg:
508 // The type must be "c" to get a character unless the type is exactly
509 // char (whether that be signed or unsigned for the target.)
510 if (!isRealCharType(Arg->getType()))
511 FormatSpec.push_back('c');
512 break;
513 case ConversionSpecifier::Kind::dArg:
514 case ConversionSpecifier::Kind::iArg:
515 case ConversionSpecifier::Kind::uArg:
516 if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
517 FormatSpec))
518 return false;
519 break;
520 case ConversionSpecifier::Kind::pArg: {
521 const clang::QualType &ArgType = Arg->getType();
522 // std::format knows how to format void pointers and nullptrs
523 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
524 ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
525 "static_cast<const void *>(");
526 break;
527 }
528 case ConversionSpecifier::Kind::xArg:
529 FormatSpec.push_back('x');
530 break;
531 case ConversionSpecifier::Kind::XArg:
532 FormatSpec.push_back('X');
533 break;
534 case ConversionSpecifier::Kind::oArg:
535 FormatSpec.push_back('o');
536 break;
537 case ConversionSpecifier::Kind::aArg:
538 FormatSpec.push_back('a');
539 break;
540 case ConversionSpecifier::Kind::AArg:
541 FormatSpec.push_back('A');
542 break;
543 case ConversionSpecifier::Kind::eArg:
544 FormatSpec.push_back('e');
545 break;
546 case ConversionSpecifier::Kind::EArg:
547 FormatSpec.push_back('E');
548 break;
549 case ConversionSpecifier::Kind::fArg:
550 FormatSpec.push_back('f');
551 break;
552 case ConversionSpecifier::Kind::FArg:
553 FormatSpec.push_back('F');
554 break;
555 case ConversionSpecifier::Kind::gArg:
556 FormatSpec.push_back('g');
557 break;
558 case ConversionSpecifier::Kind::GArg:
559 FormatSpec.push_back('G');
560 break;
561 default:
562 // Something we don't understand
563 return conversionNotPossible((Twine("argument ") +
564 Twine(FS.getArgIndex() + ArgsOffset) +
565 " has an unsupported format specifier")
566 .str());
567 }
568
569 return true;
570}
571
572/// Append the standard format string equivalent of the passed PrintfSpecifier
573/// to StandardFormatString and store any argument fixes for later application.
574/// @returns true on success, false on failure
575bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
576 const Expr *Arg,
577 std::string &StandardFormatString) {
578 // The specifier must have an associated argument
579 assert(FS.consumesDataArgument());
580
581 StandardFormatString.push_back('{');
582
583 if (FS.usesPositionalArg()) {
584 // std::format argument identifiers are zero-based, whereas printf ones
585 // are one based.
586 assert(FS.getPositionalArgIndex() > 0U);
587 StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
588 }
589
590 // std::format format argument parts to potentially emit:
591 // [[fill]align][sign]["#"]["0"][width]["."precision][type]
592 std::string FormatSpec;
593
594 // printf doesn't support specifying the fill character - it's always a
595 // space, so we never need to generate one.
596
597 emitAlignment(FS, FormatSpec);
598 emitSign(FS, FormatSpec);
599 emitAlternativeForm(FS, FormatSpec);
600
601 if (FS.hasLeadingZeros())
602 FormatSpec.push_back('0');
603
604 emitFieldWidth(FS, FormatSpec);
605 emitPrecision(FS, FormatSpec);
606 maybeRotateArguments(FS);
607
608 if (!emitType(FS, Arg, FormatSpec))
609 return false;
610
611 if (!FormatSpec.empty()) {
612 StandardFormatString.push_back(':');
613 StandardFormatString.append(FormatSpec);
614 }
615
616 StandardFormatString.push_back('}');
617 return true;
618}
619
620/// Called for each format specifier by ParsePrintfString.
621bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
622 const char *StartSpecifier,
623 unsigned SpecifierLen,
624 const TargetInfo &Target) {
625
626 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
627 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
628
629 // Everything before the specifier needs copying verbatim
630 assert(StartSpecifierPos >= PrintfFormatStringPos);
631
632 appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
633 StartSpecifierPos - PrintfFormatStringPos));
634
635 const ConversionSpecifier::Kind ArgKind =
636 FS.getConversionSpecifier().getKind();
637
638 // Skip over specifier
639 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
640 assert(PrintfFormatStringPos <= PrintfFormatString.size());
641
642 FormatStringNeededRewriting = true;
643
644 if (ArgKind == ConversionSpecifier::Kind::nArg) {
645 // std::print doesn't do the equivalent of %n
646 return conversionNotPossible("'%n' is not supported in format string");
647 }
648
649 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
650 // std::print doesn't support %m. In theory we could insert a
651 // strerror(errno) parameter (assuming that libc has a thread-safe
652 // implementation, which glibc does), but that would require keeping track
653 // of the input and output parameter indices for position arguments too.
654 return conversionNotPossible("'%m' is not supported in format string");
655 }
656
657 if (ArgKind == ConversionSpecifier::PercentArg) {
658 StandardFormatString.push_back('%');
659 return true;
660 }
661
662 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
663 if (ArgIndex >= NumArgs) {
664 // Argument index out of range. Give up.
665 return conversionNotPossible(
666 (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
667 .str());
668 }
669
670 return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
671 StandardFormatString);
672}
673
674/// Called at the very end just before applying fixes to capture the last part
675/// of the format string.
676void FormatStringConverter::finalizeFormatText() {
677 appendFormatText(
678 StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
679 PrintfFormatString.size() - PrintfFormatStringPos));
680 PrintfFormatStringPos = PrintfFormatString.size();
681
682 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
683 // than to std::println("Hello\r");
684 // Use StringRef until C++20 std::string::ends_with() is available.
685 const auto StandardFormatStringRef = StringRef(StandardFormatString);
686 if (Config.AllowTrailingNewlineRemoval &&
687 StandardFormatStringRef.ends_with("\\n") &&
688 !StandardFormatStringRef.ends_with("\\\\n") &&
689 !StandardFormatStringRef.ends_with("\\r\\n")) {
690 UsePrintNewlineFunction = true;
691 FormatStringNeededRewriting = true;
692 StandardFormatString.erase(StandardFormatString.end() - 2,
693 StandardFormatString.end());
694 }
695
696 StandardFormatString.push_back('\"');
697}
698
699/// Append literal parts of the format text, reinstating escapes as required.
700void FormatStringConverter::appendFormatText(const StringRef Text) {
701 for (const char Ch : Text) {
702 if (Ch == '\a')
703 StandardFormatString += "\\a";
704 else if (Ch == '\b')
705 StandardFormatString += "\\b";
706 else if (Ch == '\f')
707 StandardFormatString += "\\f";
708 else if (Ch == '\n')
709 StandardFormatString += "\\n";
710 else if (Ch == '\r')
711 StandardFormatString += "\\r";
712 else if (Ch == '\t')
713 StandardFormatString += "\\t";
714 else if (Ch == '\v')
715 StandardFormatString += "\\v";
716 else if (Ch == '\"')
717 StandardFormatString += "\\\"";
718 else if (Ch == '\\')
719 StandardFormatString += "\\\\";
720 else if (Ch == '{') {
721 StandardFormatString += "{{";
722 FormatStringNeededRewriting = true;
723 } else if (Ch == '}') {
724 StandardFormatString += "}}";
725 FormatStringNeededRewriting = true;
726 } else if (Ch < 32) {
727 StandardFormatString += "\\x";
728 StandardFormatString += llvm::hexdigit(Ch >> 4, true);
729 StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
730 } else
731 StandardFormatString += Ch;
732 }
733}
734
735static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
736 ASTContext &Context) {
737 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
738 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
739 const bool Arrow = Member->isArrow();
740 return Arrow ? utils::fixit::formatDereference(*Arg, Context)
741 : tooling::fixit::getText(*Arg, Context).str();
742}
743
744/// Called by the check when it is ready to apply the fixes.
745void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
746 SourceManager &SM) {
747 if (FormatStringNeededRewriting) {
748 Diag << FixItHint::CreateReplacement(
749 CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
750 FormatExpr->getEndLoc()),
751 StandardFormatString);
752 }
753
754 // ArgCount is one less than the number of arguments to be rotated.
755 for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
756 assert(ValueArgIndex < NumArgs);
757 assert(ValueArgIndex > ArgCount);
758
759 // First move the value argument to the right place. But if there's a
760 // pending c_str() removal then we must do that at the same time.
761 if (const auto CStrRemovalMatch =
762 std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
763 [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
764 const BoundNodes &Match) {
765 // This c_str() removal corresponds to the argument
766 // being moved if they start at the same location.
767 const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
768 return ArgStartPos == CStrArg->getBeginLoc();
769 });
770 CStrRemovalMatch != ArgCStrRemovals.end()) {
771 const std::string ArgText =
772 withoutCStrReplacement(*CStrRemovalMatch, *Context);
773 assert(!ArgText.empty());
774
775 Diag << FixItHint::CreateReplacement(
776 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
777
778 // That c_str() removal is now dealt with, so we don't need to do it again
779 ArgCStrRemovals.erase(CStrRemovalMatch);
780 } else
781 Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
782 *Args[ValueArgIndex], *Context);
783
784 // Now shift down the field width and precision (if either are present) to
785 // accommodate it.
786 for (size_t Offset = 0; Offset < ArgCount; ++Offset)
787 Diag << tooling::fixit::createReplacement(
788 *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
789 *Context);
790
791 // Now we need to modify the ArgFix index too so that we fix the right
792 // argument. We don't need to care about the width and precision indices
793 // since they never need fixing.
794 for (auto &ArgFix : ArgFixes) {
795 if (ArgFix.ArgIndex == ValueArgIndex)
796 ArgFix.ArgIndex = ValueArgIndex - ArgCount;
797 }
798 }
799
800 for (const auto &[ArgIndex, Replacement] : ArgFixes) {
801 SourceLocation AfterOtherSide =
802 Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts)
803 ->getLocation();
804
805 Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
806 Replacement, true)
807 << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
808 }
809
810 for (const auto &Match : ArgCStrRemovals) {
811 const auto *Call = Match.getNodeAs<CallExpr>("call");
812 const std::string ArgText = withoutCStrReplacement(Match, *Context);
813 if (!ArgText.empty())
814 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
815 }
816}
817} // namespace clang::tidy::utils
Declaration of the FormatStringConverter class which is used to convert printf format strings to C++ ...
void applyFixes(DiagnosticBuilder &Diag, SourceManager &SM)
Called by the check when it is ready to apply the fixes.
clang::analyze_format_string::ConversionSpecifier ConversionSpecifier
FormatStringConverter(ASTContext *Context, const CallExpr *Call, unsigned FormatArgOffset, Configuration Config, const LangOptions &LO, SourceManager &SM, Preprocessor &PP)
std::vector< std::string > match(const SymbolIndex &I, const FuzzyFindRequest &Req, bool *Incomplete)
std::string formatDereference(const Expr &ExprNode, const ASTContext &Context)
static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, ASTContext &Context)
static std::optional< std::string > getCorrespondingSignedTypeName(const clang::QualType &QT)
If possible, return the text name of the signed type that corresponds to the passed integer type.
static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode)
static bool isRealCharType(const clang::QualType &Ty)
Is the passed type the actual "char" type, whether that be signed or unsigned, rather than explicit s...
static std::optional< std::string > castTypeForArgument(ConversionSpecifier::Kind ArgKind, const clang::QualType &QT)
static std::optional< std::string > getCorrespondingUnsignedTypeName(const clang::QualType &QT)
If possible, return the text name of the unsigned type that corresponds to the passed integer type.
static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, const clang::QualType &ArgType)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static constexpr const char FuncDecl[]