clang-tools 20.0.0git
FormatStringConverter.cpp
Go to the documentation of this file.
1//===--- FormatStringConverter.cpp - clang-tidy----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implementation of the FormatStringConverter class which is used to convert
11/// printf format strings to C++ std::formatter format strings.
12///
13//===----------------------------------------------------------------------===//
14
16#include "../utils/FixItHintUtils.h"
17#include "clang/AST/Expr.h"
18#include "clang/ASTMatchers/ASTMatchFinder.h"
19#include "clang/Basic/LangOptions.h"
20#include "clang/Lex/Lexer.h"
21#include "clang/Lex/Preprocessor.h"
22#include "clang/Tooling/FixIt.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/Support/Debug.h"
25
26using namespace clang::ast_matchers;
27using namespace clang::analyze_printf;
28
29namespace clang::tidy::utils {
30using clang::analyze_format_string::ConversionSpecifier;
31
32/// Is the passed type the actual "char" type, whether that be signed or
33/// unsigned, rather than explicit signed char or unsigned char types.
34static bool isRealCharType(const clang::QualType &Ty) {
35 using namespace clang;
36 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
37 if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
38 return (BT->getKind() == BuiltinType::Char_U ||
39 BT->getKind() == BuiltinType::Char_S);
40 return false;
41}
42
43/// If possible, return the text name of the signed type that corresponds to the
44/// passed integer type. If the passed type is already signed then its name is
45/// just returned. Only supports BuiltinTypes.
46static std::optional<std::string>
47getCorrespondingSignedTypeName(const clang::QualType &QT) {
48 using namespace clang;
49 const auto UQT = QT.getUnqualifiedType();
50 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
51 switch (BT->getKind()) {
52 case BuiltinType::UChar:
53 case BuiltinType::Char_U:
54 case BuiltinType::SChar:
55 case BuiltinType::Char_S:
56 return "signed char";
57 case BuiltinType::UShort:
58 case BuiltinType::Short:
59 return "short";
60 case BuiltinType::UInt:
61 case BuiltinType::Int:
62 return "int";
63 case BuiltinType::ULong:
64 case BuiltinType::Long:
65 return "long";
66 case BuiltinType::ULongLong:
67 case BuiltinType::LongLong:
68 return "long long";
69 default:
70 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
71 << QT.getAsString() << "'\n";
72 return std::nullopt;
73 }
74 }
75
76 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
77 // if the argument type does.
78 const std::string TypeName = UQT.getAsString();
79 StringRef SimplifiedTypeName{TypeName};
80 const bool InStd = SimplifiedTypeName.consume_front("std::");
81 const StringRef Prefix = InStd ? "std::" : "";
82
83 if (SimplifiedTypeName.starts_with("uint") &&
84 SimplifiedTypeName.ends_with("_t"))
85 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
86
87 if (SimplifiedTypeName == "size_t")
88 return (Twine(Prefix) + "ssize_t").str();
89
90 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
91 << UQT.getAsString() << "'\n";
92 return std::nullopt;
93}
94
95/// If possible, return the text name of the unsigned type that corresponds to
96/// the passed integer type. If the passed type is already unsigned then its
97/// name is just returned. Only supports BuiltinTypes.
98static std::optional<std::string>
99getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
100 using namespace clang;
101 const auto UQT = QT.getUnqualifiedType();
102 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
103 switch (BT->getKind()) {
104 case BuiltinType::SChar:
105 case BuiltinType::Char_S:
106 case BuiltinType::UChar:
107 case BuiltinType::Char_U:
108 return "unsigned char";
109 case BuiltinType::Short:
110 case BuiltinType::UShort:
111 return "unsigned short";
112 case BuiltinType::Int:
113 case BuiltinType::UInt:
114 return "unsigned int";
115 case BuiltinType::Long:
116 case BuiltinType::ULong:
117 return "unsigned long";
118 case BuiltinType::LongLong:
119 case BuiltinType::ULongLong:
120 return "unsigned long long";
121 default:
122 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
123 << UQT.getAsString() << "'\n";
124 return std::nullopt;
125 }
126 }
127
128 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
129 // if the argument type does.
130 const std::string TypeName = UQT.getAsString();
131 StringRef SimplifiedTypeName{TypeName};
132 const bool InStd = SimplifiedTypeName.consume_front("std::");
133 const StringRef Prefix = InStd ? "std::" : "";
134
135 if (SimplifiedTypeName.starts_with("int") &&
136 SimplifiedTypeName.ends_with("_t"))
137 return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
138
139 if (SimplifiedTypeName == "ssize_t")
140 return (Twine(Prefix) + "size_t").str();
141 if (SimplifiedTypeName == "ptrdiff_t")
142 return (Twine(Prefix) + "size_t").str();
143
144 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
145 << UQT.getAsString() << "'\n";
146 return std::nullopt;
147}
148
149static std::optional<std::string>
150castTypeForArgument(ConversionSpecifier::Kind ArgKind,
151 const clang::QualType &QT) {
152 if (ArgKind == ConversionSpecifier::Kind::uArg)
155}
156
157static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
158 const clang::QualType &ArgType) {
159 if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
160 // Unadorned char never matches any expected signedness since it
161 // could be signed or unsigned.
162 const auto ArgTypeKind = BT->getKind();
163 if (ArgTypeKind == BuiltinType::Char_U ||
164 ArgTypeKind == BuiltinType::Char_S)
165 return false;
166 }
167
168 if (ArgKind == ConversionSpecifier::Kind::uArg)
169 return ArgType->isUnsignedIntegerType();
170 return ArgType->isSignedIntegerType();
171}
172
173namespace {
174AST_MATCHER(clang::QualType, isRealChar) {
176}
177} // namespace
178
179static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
180 /// For printf-style functions, the signedness of the type printed is
181 /// indicated by the corresponding type in the format string.
182 /// std::print will determine the signedness from the type of the
183 /// argument. This means that it is necessary to generate a cast in
184 /// StrictMode to ensure that the exact behaviour is maintained.
185 /// However, for templated functions like absl::PrintF and
186 /// fmt::printf, the signedness of the type printed is also taken from
187 /// the actual argument like std::print, so such casts are never
188 /// necessary. printf-style functions are variadic, whereas templated
189 /// ones aren't, so we can use that to distinguish between the two
190 /// cases.
191 if (StrictMode) {
192 const FunctionDecl *FuncDecl = Call->getDirectCallee();
193 assert(FuncDecl);
194 return FuncDecl->isVariadic();
195 }
196 return false;
197}
198
200 ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
201 const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
202 Preprocessor &PP)
203 : Context(ContextIn), Config(ConfigIn),
204 CastMismatchedIntegerTypes(
205 castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
206 Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
207 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
208 assert(ArgsOffset <= NumArgs);
209 FormatExpr = llvm::dyn_cast<StringLiteral>(
210 Args[FormatArgOffset]->IgnoreImplicitAsWritten());
211
212 if (!FormatExpr || !FormatExpr->isOrdinary()) {
213 // Function must have a narrow string literal as its first argument.
214 conversionNotPossible("first argument is not a narrow string literal");
215 return;
216 }
217
218 if (const std::optional<StringRef> MaybeMacroName =
219 formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
220 MaybeMacroName) {
221 conversionNotPossible(
222 ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
223 .str());
224 return;
225 }
226
227 PrintfFormatString = FormatExpr->getString();
228
229 // Assume that the output will be approximately the same size as the input,
230 // but perhaps with a few escapes expanded.
231 const size_t EstimatedGrowth = 8;
232 StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
233 StandardFormatString.push_back('\"');
234
235 const bool IsFreeBsdkPrintf = false;
236
237 using clang::analyze_format_string::ParsePrintfString;
238 ParsePrintfString(*this, PrintfFormatString.data(),
239 PrintfFormatString.data() + PrintfFormatString.size(),
240 LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
241 finalizeFormatText();
242}
243
244std::optional<StringRef>
245FormatStringConverter::formatStringContainsUnreplaceableMacro(
246 const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
247 Preprocessor &PP) {
248 // If a macro invocation surrounds the entire call then we don't want that to
249 // inhibit conversion. The whole format string will appear to come from that
250 // macro, as will the function call.
251 std::optional<StringRef> MaybeSurroundingMacroName;
252 if (SourceLocation BeginCallLoc = Call->getBeginLoc();
253 BeginCallLoc.isMacroID())
254 MaybeSurroundingMacroName =
255 Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());
256
257 for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
258 I != E; ++I) {
259 const SourceLocation &TokenLoc = *I;
260 if (TokenLoc.isMacroID()) {
261 const StringRef MacroName =
262 Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());
263
264 if (MaybeSurroundingMacroName != MacroName) {
265 // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
266 // for types that change size so we must look for multiple prefixes.
267 if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
268 return MacroName;
269
270 const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
271 const OptionalFileEntryRef MaybeFileEntry =
272 SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
273 if (!MaybeFileEntry)
274 return MacroName;
275
276 HeaderSearch &HS = PP.getHeaderSearchInfo();
277 // Check if the file is a system header
278 if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
279 llvm::sys::path::filename(MaybeFileEntry->getName()) !=
280 "inttypes.h")
281 return MacroName;
282 }
283 }
284 }
285 return std::nullopt;
286}
287
288void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
289 std::string &FormatSpec) {
290 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
291
292 // We only care about alignment if a field width is specified
293 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
294 if (ArgKind == ConversionSpecifier::sArg) {
295 // Strings are left-aligned by default with std::format, so we only
296 // need to emit an alignment if this one needs to be right aligned.
297 if (!FS.isLeftJustified())
298 FormatSpec.push_back('>');
299 } else {
300 // Numbers are right-aligned by default with std::format, so we only
301 // need to emit an alignment if this one needs to be left aligned.
302 if (FS.isLeftJustified())
303 FormatSpec.push_back('<');
304 }
305 }
306}
307
308void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
309 std::string &FormatSpec) {
310 const ConversionSpecifier Spec = FS.getConversionSpecifier();
311
312 // Ignore on something that isn't numeric. For printf it's would be a
313 // compile-time warning but ignored at runtime, but for std::format it
314 // ought to be a compile-time error.
315 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
316 // + is preferred to ' '
317 if (FS.hasPlusPrefix())
318 FormatSpec.push_back('+');
319 else if (FS.hasSpacePrefix())
320 FormatSpec.push_back(' ');
321 }
322}
323
324void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
325 std::string &FormatSpec) {
326 if (FS.hasAlternativeForm()) {
327 switch (FS.getConversionSpecifier().getKind()) {
328 case ConversionSpecifier::Kind::aArg:
329 case ConversionSpecifier::Kind::AArg:
330 case ConversionSpecifier::Kind::eArg:
331 case ConversionSpecifier::Kind::EArg:
332 case ConversionSpecifier::Kind::fArg:
333 case ConversionSpecifier::Kind::FArg:
334 case ConversionSpecifier::Kind::gArg:
335 case ConversionSpecifier::Kind::GArg:
336 case ConversionSpecifier::Kind::xArg:
337 case ConversionSpecifier::Kind::XArg:
338 case ConversionSpecifier::Kind::oArg:
339 FormatSpec.push_back('#');
340 break;
341 default:
342 // Alternative forms don't exist for other argument kinds
343 break;
344 }
345 }
346}
347
348void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
349 std::string &FormatSpec) {
350 {
351 const OptionalAmount FieldWidth = FS.getFieldWidth();
352 switch (FieldWidth.getHowSpecified()) {
353 case OptionalAmount::NotSpecified:
354 break;
355 case OptionalAmount::Constant:
356 FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
357 break;
358 case OptionalAmount::Arg:
359 FormatSpec.push_back('{');
360 if (FieldWidth.usesPositionalArg()) {
361 // std::format argument identifiers are zero-based, whereas printf
362 // ones are one based.
363 assert(FieldWidth.getPositionalArgIndex() > 0U);
364 FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
365 }
366 FormatSpec.push_back('}');
367 break;
368 case OptionalAmount::Invalid:
369 break;
370 }
371 }
372}
373
374void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
375 std::string &FormatSpec) {
376 const OptionalAmount FieldPrecision = FS.getPrecision();
377 switch (FieldPrecision.getHowSpecified()) {
378 case OptionalAmount::NotSpecified:
379 break;
380 case OptionalAmount::Constant:
381 FormatSpec.push_back('.');
382 FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
383 break;
384 case OptionalAmount::Arg:
385 FormatSpec.push_back('.');
386 FormatSpec.push_back('{');
387 if (FieldPrecision.usesPositionalArg()) {
388 // std::format argument identifiers are zero-based, whereas printf
389 // ones are one based.
390 assert(FieldPrecision.getPositionalArgIndex() > 0U);
391 FormatSpec.append(
392 llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
393 }
394 FormatSpec.push_back('}');
395 break;
396 case OptionalAmount::Invalid:
397 break;
398 }
399}
400
401void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
402 unsigned ArgCount = 0;
403 const OptionalAmount FieldWidth = FS.getFieldWidth();
404 const OptionalAmount FieldPrecision = FS.getPrecision();
405
406 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
407 !FieldWidth.usesPositionalArg())
408 ++ArgCount;
409 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
410 !FieldPrecision.usesPositionalArg())
411 ++ArgCount;
412
413 if (ArgCount)
414 ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
415}
416
417void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
418 const Expr *Arg) {
419 // If the argument is the result of a call to std::string::c_str() or
420 // data() with a return type of char then we can remove that call and
421 // pass the std::string directly. We don't want to do so if the return
422 // type is not a char pointer (though it's unlikely that such code would
423 // compile without warnings anyway.) See RedundantStringCStrCheck.
424
425 if (!StringCStrCallExprMatcher) {
426 // Lazily create the matcher
427 const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
428 hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
429 const auto StringExpr = expr(
430 anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
431
432 StringCStrCallExprMatcher =
433 cxxMemberCallExpr(
434 on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
435 callee(cxxMethodDecl(hasAnyName("c_str", "data"),
436 returns(pointerType(pointee(isRealChar()))))))
437 .bind("call");
438 }
439
440 auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
441 if (CStrMatches.size() == 1)
442 ArgCStrRemovals.push_back(CStrMatches.front());
443 else if (Arg->getType()->isPointerType()) {
444 const QualType Pointee = Arg->getType()->getPointeeType();
445 // printf is happy to print signed char and unsigned char strings, but
446 // std::format only likes char strings.
447 if (Pointee->isCharType() && !isRealCharType(Pointee))
448 ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
449 }
450}
451
452bool FormatStringConverter::emitIntegerArgument(
453 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
454 std::string &FormatSpec) {
455 const clang::QualType &ArgType = Arg->getType();
456 if (ArgType->isBooleanType()) {
457 // std::format will print bool as either "true" or "false" by default,
458 // but printf prints them as "0" or "1". Be compatible with printf by
459 // requesting decimal output.
460 FormatSpec.push_back('d');
461 } else if (ArgType->isEnumeralType()) {
462 // std::format will try to find a specialization to print the enum
463 // (and probably fail), whereas printf would have just expected it to
464 // be passed as its underlying type. However, printf will have forced
465 // the signedness based on the format string, so we need to do the
466 // same.
467 if (const auto *ET = ArgType->getAs<EnumType>()) {
468 if (const std::optional<std::string> MaybeCastType =
469 castTypeForArgument(ArgKind, ET->getDecl()->getIntegerType()))
470 ArgFixes.emplace_back(
471 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
472 else
473 return conversionNotPossible(
474 (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
475 .str());
476 }
477 } else if (CastMismatchedIntegerTypes &&
478 !isMatchingSignedness(ArgKind, ArgType)) {
479 // printf will happily print an unsigned type as signed if told to.
480 // Even -Wformat doesn't warn for this. std::format will format as
481 // unsigned unless we cast it.
482 if (const std::optional<std::string> MaybeCastType =
483 castTypeForArgument(ArgKind, ArgType))
484 ArgFixes.emplace_back(
485 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
486 else
487 return conversionNotPossible(
488 (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
489 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
490 : "signed") +
491 " integer type to match format"
492 " specifier and StrictMode is enabled")
493 .str());
494 } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
495 // Only specify integer if the argument is of a different type
496 FormatSpec.push_back('d');
497 }
498 return true;
499}
500
501/// Append the corresponding standard format string type fragment to FormatSpec,
502/// and store any argument fixes for later application.
503/// @returns true on success, false on failure
504bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
505 std::string &FormatSpec) {
506 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
507 switch (ArgKind) {
508 case ConversionSpecifier::Kind::sArg:
509 emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
510 break;
511 case ConversionSpecifier::Kind::cArg:
512 // The type must be "c" to get a character unless the type is exactly
513 // char (whether that be signed or unsigned for the target.)
514 if (!isRealCharType(Arg->getType()))
515 FormatSpec.push_back('c');
516 break;
517 case ConversionSpecifier::Kind::dArg:
518 case ConversionSpecifier::Kind::iArg:
519 case ConversionSpecifier::Kind::uArg:
520 if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
521 FormatSpec))
522 return false;
523 break;
524 case ConversionSpecifier::Kind::pArg: {
525 const clang::QualType &ArgType = Arg->getType();
526 // std::format knows how to format void pointers and nullptrs
527 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
528 ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
529 "static_cast<const void *>(");
530 break;
531 }
532 case ConversionSpecifier::Kind::xArg:
533 FormatSpec.push_back('x');
534 break;
535 case ConversionSpecifier::Kind::XArg:
536 FormatSpec.push_back('X');
537 break;
538 case ConversionSpecifier::Kind::oArg:
539 FormatSpec.push_back('o');
540 break;
541 case ConversionSpecifier::Kind::aArg:
542 FormatSpec.push_back('a');
543 break;
544 case ConversionSpecifier::Kind::AArg:
545 FormatSpec.push_back('A');
546 break;
547 case ConversionSpecifier::Kind::eArg:
548 FormatSpec.push_back('e');
549 break;
550 case ConversionSpecifier::Kind::EArg:
551 FormatSpec.push_back('E');
552 break;
553 case ConversionSpecifier::Kind::fArg:
554 FormatSpec.push_back('f');
555 break;
556 case ConversionSpecifier::Kind::FArg:
557 FormatSpec.push_back('F');
558 break;
559 case ConversionSpecifier::Kind::gArg:
560 FormatSpec.push_back('g');
561 break;
562 case ConversionSpecifier::Kind::GArg:
563 FormatSpec.push_back('G');
564 break;
565 default:
566 // Something we don't understand
567 return conversionNotPossible((Twine("argument ") +
568 Twine(FS.getArgIndex() + ArgsOffset) +
569 " has an unsupported format specifier")
570 .str());
571 }
572
573 return true;
574}
575
576/// Append the standard format string equivalent of the passed PrintfSpecifier
577/// to StandardFormatString and store any argument fixes for later application.
578/// @returns true on success, false on failure
579bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
580 const Expr *Arg,
581 std::string &StandardFormatString) {
582 // The specifier must have an associated argument
583 assert(FS.consumesDataArgument());
584
585 StandardFormatString.push_back('{');
586
587 if (FS.usesPositionalArg()) {
588 // std::format argument identifiers are zero-based, whereas printf ones
589 // are one based.
590 assert(FS.getPositionalArgIndex() > 0U);
591 StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
592 }
593
594 // std::format format argument parts to potentially emit:
595 // [[fill]align][sign]["#"]["0"][width]["."precision][type]
596 std::string FormatSpec;
597
598 // printf doesn't support specifying the fill character - it's always a
599 // space, so we never need to generate one.
600
601 emitAlignment(FS, FormatSpec);
602 emitSign(FS, FormatSpec);
603 emitAlternativeForm(FS, FormatSpec);
604
605 if (FS.hasLeadingZeros())
606 FormatSpec.push_back('0');
607
608 emitFieldWidth(FS, FormatSpec);
609 emitPrecision(FS, FormatSpec);
610 maybeRotateArguments(FS);
611
612 if (!emitType(FS, Arg, FormatSpec))
613 return false;
614
615 if (!FormatSpec.empty()) {
616 StandardFormatString.push_back(':');
617 StandardFormatString.append(FormatSpec);
618 }
619
620 StandardFormatString.push_back('}');
621 return true;
622}
623
624/// Called for each format specifier by ParsePrintfString.
625bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
626 const char *StartSpecifier,
627 unsigned SpecifierLen,
628 const TargetInfo &Target) {
629
630 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
631 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
632
633 // Everything before the specifier needs copying verbatim
634 assert(StartSpecifierPos >= PrintfFormatStringPos);
635
636 appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
637 StartSpecifierPos - PrintfFormatStringPos));
638
639 const ConversionSpecifier::Kind ArgKind =
640 FS.getConversionSpecifier().getKind();
641
642 // Skip over specifier
643 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
644 assert(PrintfFormatStringPos <= PrintfFormatString.size());
645
646 FormatStringNeededRewriting = true;
647
648 if (ArgKind == ConversionSpecifier::Kind::nArg) {
649 // std::print doesn't do the equivalent of %n
650 return conversionNotPossible("'%n' is not supported in format string");
651 }
652
653 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
654 // std::print doesn't support %m. In theory we could insert a
655 // strerror(errno) parameter (assuming that libc has a thread-safe
656 // implementation, which glibc does), but that would require keeping track
657 // of the input and output parameter indices for position arguments too.
658 return conversionNotPossible("'%m' is not supported in format string");
659 }
660
661 if (ArgKind == ConversionSpecifier::PercentArg) {
662 StandardFormatString.push_back('%');
663 return true;
664 }
665
666 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
667 if (ArgIndex >= NumArgs) {
668 // Argument index out of range. Give up.
669 return conversionNotPossible(
670 (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
671 .str());
672 }
673
674 return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
675 StandardFormatString);
676}
677
678/// Called at the very end just before applying fixes to capture the last part
679/// of the format string.
680void FormatStringConverter::finalizeFormatText() {
681 appendFormatText(
682 StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
683 PrintfFormatString.size() - PrintfFormatStringPos));
684 PrintfFormatStringPos = PrintfFormatString.size();
685
686 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
687 // than to std::println("Hello\r");
688 // Use StringRef until C++20 std::string::ends_with() is available.
689 const auto StandardFormatStringRef = StringRef(StandardFormatString);
690 if (Config.AllowTrailingNewlineRemoval &&
691 StandardFormatStringRef.ends_with("\\n") &&
692 !StandardFormatStringRef.ends_with("\\\\n") &&
693 !StandardFormatStringRef.ends_with("\\r\\n")) {
694 UsePrintNewlineFunction = true;
695 FormatStringNeededRewriting = true;
696 StandardFormatString.erase(StandardFormatString.end() - 2,
697 StandardFormatString.end());
698 }
699
700 StandardFormatString.push_back('\"');
701}
702
703/// Append literal parts of the format text, reinstating escapes as required.
704void FormatStringConverter::appendFormatText(const StringRef Text) {
705 for (const char Ch : Text) {
706 if (Ch == '\a')
707 StandardFormatString += "\\a";
708 else if (Ch == '\b')
709 StandardFormatString += "\\b";
710 else if (Ch == '\f')
711 StandardFormatString += "\\f";
712 else if (Ch == '\n')
713 StandardFormatString += "\\n";
714 else if (Ch == '\r')
715 StandardFormatString += "\\r";
716 else if (Ch == '\t')
717 StandardFormatString += "\\t";
718 else if (Ch == '\v')
719 StandardFormatString += "\\v";
720 else if (Ch == '\"')
721 StandardFormatString += "\\\"";
722 else if (Ch == '\\')
723 StandardFormatString += "\\\\";
724 else if (Ch == '{') {
725 StandardFormatString += "{{";
726 FormatStringNeededRewriting = true;
727 } else if (Ch == '}') {
728 StandardFormatString += "}}";
729 FormatStringNeededRewriting = true;
730 } else if (Ch < 32) {
731 StandardFormatString += "\\x";
732 StandardFormatString += llvm::hexdigit(Ch >> 4, true);
733 StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
734 } else
735 StandardFormatString += Ch;
736 }
737}
738
739static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
740 ASTContext &Context) {
741 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
742 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
743 const bool Arrow = Member->isArrow();
744 return Arrow ? utils::fixit::formatDereference(*Arg, Context)
745 : tooling::fixit::getText(*Arg, Context).str();
746}
747
748/// Called by the check when it is ready to apply the fixes.
749void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
750 SourceManager &SM) {
751 if (FormatStringNeededRewriting) {
752 Diag << FixItHint::CreateReplacement(
753 CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
754 FormatExpr->getEndLoc()),
755 StandardFormatString);
756 }
757
758 // ArgCount is one less than the number of arguments to be rotated.
759 for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
760 assert(ValueArgIndex < NumArgs);
761 assert(ValueArgIndex > ArgCount);
762
763 // First move the value argument to the right place. But if there's a
764 // pending c_str() removal then we must do that at the same time.
765 if (const auto CStrRemovalMatch =
766 std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
767 [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
768 const BoundNodes &Match) {
769 // This c_str() removal corresponds to the argument
770 // being moved if they start at the same location.
771 const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
772 return ArgStartPos == CStrArg->getBeginLoc();
773 });
774 CStrRemovalMatch != ArgCStrRemovals.end()) {
775 const std::string ArgText =
776 withoutCStrReplacement(*CStrRemovalMatch, *Context);
777 assert(!ArgText.empty());
778
779 Diag << FixItHint::CreateReplacement(
780 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
781
782 // That c_str() removal is now dealt with, so we don't need to do it again
783 ArgCStrRemovals.erase(CStrRemovalMatch);
784 } else
785 Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
786 *Args[ValueArgIndex], *Context);
787
788 // Now shift down the field width and precision (if either are present) to
789 // accommodate it.
790 for (size_t Offset = 0; Offset < ArgCount; ++Offset)
791 Diag << tooling::fixit::createReplacement(
792 *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
793 *Context);
794
795 // Now we need to modify the ArgFix index too so that we fix the right
796 // argument. We don't need to care about the width and precision indices
797 // since they never need fixing.
798 for (auto &ArgFix : ArgFixes) {
799 if (ArgFix.ArgIndex == ValueArgIndex)
800 ArgFix.ArgIndex = ValueArgIndex - ArgCount;
801 }
802 }
803
804 for (const auto &[ArgIndex, Replacement] : ArgFixes) {
805 SourceLocation AfterOtherSide =
806 Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts)
807 ->getLocation();
808
809 Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
810 Replacement, true)
811 << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
812 }
813
814 for (const auto &Match : ArgCStrRemovals) {
815 const auto *Call = Match.getNodeAs<CallExpr>("call");
816 const std::string ArgText = withoutCStrReplacement(Match, *Context);
817 if (!ArgText.empty())
818 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
819 }
820}
821} // namespace clang::tidy::utils
const Expr * E
static cl::opt< std::string > Config("config", desc(R"( Specifies a configuration in YAML/JSON format: -config="{Checks:' *', CheckOptions:{x:y}}" When the value is empty, clang-tidy will attempt to find a file named .clang-tidy for each source file in its parent directories. )"), cl::init(""), cl::cat(ClangTidyCategory))
size_t Offset
Declaration of the FormatStringConverter class which is used to convert printf format strings to C++ ...
NodeType Type
std::string Text
std::string MacroName
Definition: Preamble.cpp:240
llvm::json::Object Args
Definition: Trace.cpp:138
void applyFixes(DiagnosticBuilder &Diag, SourceManager &SM)
Called by the check when it is ready to apply the fixes.
clang::analyze_format_string::ConversionSpecifier ConversionSpecifier
FormatStringConverter(ASTContext *Context, const CallExpr *Call, unsigned FormatArgOffset, Configuration Config, const LangOptions &LO, SourceManager &SM, Preprocessor &PP)
std::vector< std::string > match(const SymbolIndex &I, const FuzzyFindRequest &Req, bool *Incomplete)
Definition: TestIndex.cpp:139
std::string formatDereference(const Expr &ExprNode, const ASTContext &Context)
static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, ASTContext &Context)
static std::optional< std::string > getCorrespondingSignedTypeName(const clang::QualType &QT)
If possible, return the text name of the signed type that corresponds to the passed integer type.
static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode)
static bool isRealCharType(const clang::QualType &Ty)
Is the passed type the actual "char" type, whether that be signed or unsigned, rather than explicit s...
static std::optional< std::string > castTypeForArgument(ConversionSpecifier::Kind ArgKind, const clang::QualType &QT)
static std::optional< std::string > getCorrespondingUnsignedTypeName(const clang::QualType &QT)
If possible, return the text name of the unsigned type that corresponds to the passed integer type.
static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, const clang::QualType &ArgType)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static constexpr const char FuncDecl[]