clang-tools 19.0.0git
FormatStringConverter.cpp
Go to the documentation of this file.
1//===--- FormatStringConverter.cpp - clang-tidy----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implementation of the FormatStringConverter class which is used to convert
11/// printf format strings to C++ std::formatter format strings.
12///
13//===----------------------------------------------------------------------===//
14
16#include "../utils/FixItHintUtils.h"
17#include "clang/AST/Expr.h"
18#include "clang/ASTMatchers/ASTMatchFinder.h"
19#include "clang/Basic/LangOptions.h"
20#include "clang/Lex/Lexer.h"
21#include "clang/Tooling/FixIt.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Support/Debug.h"
24
25using namespace clang::ast_matchers;
26using namespace clang::analyze_printf;
27
28namespace clang::tidy::utils {
29using clang::analyze_format_string::ConversionSpecifier;
30
31/// Is the passed type the actual "char" type, whether that be signed or
32/// unsigned, rather than explicit signed char or unsigned char types.
33static bool isRealCharType(const clang::QualType &Ty) {
34 using namespace clang;
35 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
36 if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
37 return (BT->getKind() == BuiltinType::Char_U ||
38 BT->getKind() == BuiltinType::Char_S);
39 return false;
40}
41
42/// If possible, return the text name of the signed type that corresponds to the
43/// passed integer type. If the passed type is already signed then its name is
44/// just returned. Only supports BuiltinTypes.
45static std::optional<std::string>
46getCorrespondingSignedTypeName(const clang::QualType &QT) {
47 using namespace clang;
48 const auto UQT = QT.getUnqualifiedType();
49 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
50 switch (BT->getKind()) {
51 case BuiltinType::UChar:
52 case BuiltinType::Char_U:
53 case BuiltinType::SChar:
54 case BuiltinType::Char_S:
55 return "signed char";
56 case BuiltinType::UShort:
57 case BuiltinType::Short:
58 return "short";
59 case BuiltinType::UInt:
60 case BuiltinType::Int:
61 return "int";
62 case BuiltinType::ULong:
63 case BuiltinType::Long:
64 return "long";
65 case BuiltinType::ULongLong:
66 case BuiltinType::LongLong:
67 return "long long";
68 default:
69 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
70 << QT.getAsString() << "'\n";
71 return std::nullopt;
72 }
73 }
74
75 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
76 // if the argument type does.
77 const std::string TypeName = UQT.getAsString();
78 StringRef SimplifiedTypeName{TypeName};
79 const bool InStd = SimplifiedTypeName.consume_front("std::");
80 const StringRef Prefix = InStd ? "std::" : "";
81
82 if (SimplifiedTypeName.starts_with("uint") &&
83 SimplifiedTypeName.ends_with("_t"))
84 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
85
86 if (SimplifiedTypeName == "size_t")
87 return (Twine(Prefix) + "ssize_t").str();
88
89 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
90 << UQT.getAsString() << "'\n";
91 return std::nullopt;
92}
93
94/// If possible, return the text name of the unsigned type that corresponds to
95/// the passed integer type. If the passed type is already unsigned then its
96/// name is just returned. Only supports BuiltinTypes.
97static std::optional<std::string>
98getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
99 using namespace clang;
100 const auto UQT = QT.getUnqualifiedType();
101 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
102 switch (BT->getKind()) {
103 case BuiltinType::SChar:
104 case BuiltinType::Char_S:
105 case BuiltinType::UChar:
106 case BuiltinType::Char_U:
107 return "unsigned char";
108 case BuiltinType::Short:
109 case BuiltinType::UShort:
110 return "unsigned short";
111 case BuiltinType::Int:
112 case BuiltinType::UInt:
113 return "unsigned int";
114 case BuiltinType::Long:
115 case BuiltinType::ULong:
116 return "unsigned long";
117 case BuiltinType::LongLong:
118 case BuiltinType::ULongLong:
119 return "unsigned long long";
120 default:
121 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
122 << UQT.getAsString() << "'\n";
123 return std::nullopt;
124 }
125 }
126
127 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
128 // if the argument type does.
129 const std::string TypeName = UQT.getAsString();
130 StringRef SimplifiedTypeName{TypeName};
131 const bool InStd = SimplifiedTypeName.consume_front("std::");
132 const StringRef Prefix = InStd ? "std::" : "";
133
134 if (SimplifiedTypeName.starts_with("int") &&
135 SimplifiedTypeName.ends_with("_t"))
136 return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
137
138 if (SimplifiedTypeName == "ssize_t")
139 return (Twine(Prefix) + "size_t").str();
140 if (SimplifiedTypeName == "ptrdiff_t")
141 return (Twine(Prefix) + "size_t").str();
142
143 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
144 << UQT.getAsString() << "'\n";
145 return std::nullopt;
146}
147
148static std::optional<std::string>
149castTypeForArgument(ConversionSpecifier::Kind ArgKind,
150 const clang::QualType &QT) {
151 if (ArgKind == ConversionSpecifier::Kind::uArg)
154}
155
156static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
157 const clang::QualType &ArgType) {
158 if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
159 // Unadorned char never matches any expected signedness since it
160 // could be signed or unsigned.
161 const auto ArgTypeKind = BT->getKind();
162 if (ArgTypeKind == BuiltinType::Char_U ||
163 ArgTypeKind == BuiltinType::Char_S)
164 return false;
165 }
166
167 if (ArgKind == ConversionSpecifier::Kind::uArg)
168 return ArgType->isUnsignedIntegerType();
169 return ArgType->isSignedIntegerType();
170}
171
172namespace {
173AST_MATCHER(clang::QualType, isRealChar) {
175}
176} // namespace
177
178static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
179 /// For printf-style functions, the signedness of the type printed is
180 /// indicated by the corresponding type in the format string.
181 /// std::print will determine the signedness from the type of the
182 /// argument. This means that it is necessary to generate a cast in
183 /// StrictMode to ensure that the exact behaviour is maintained.
184 /// However, for templated functions like absl::PrintF and
185 /// fmt::printf, the signedness of the type printed is also taken from
186 /// the actual argument like std::print, so such casts are never
187 /// necessary. printf-style functions are variadic, whereas templated
188 /// ones aren't, so we can use that to distinguish between the two
189 /// cases.
190 if (StrictMode) {
191 const FunctionDecl *FuncDecl = Call->getDirectCallee();
192 assert(FuncDecl);
193 return FuncDecl->isVariadic();
194 }
195 return false;
196}
197
199 const CallExpr *Call,
200 unsigned FormatArgOffset,
201 const Configuration ConfigIn,
202 const LangOptions &LO)
203 : Context(ContextIn), Config(ConfigIn),
204 CastMismatchedIntegerTypes(
205 castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
206 Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
207 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
208 assert(ArgsOffset <= NumArgs);
209 FormatExpr = llvm::dyn_cast<StringLiteral>(
210 Args[FormatArgOffset]->IgnoreImplicitAsWritten());
211 assert(FormatExpr);
212 if (!FormatExpr->isOrdinary())
213 return; // No wide string support yet
214 PrintfFormatString = FormatExpr->getString();
215
216 // Assume that the output will be approximately the same size as the input,
217 // but perhaps with a few escapes expanded.
218 const size_t EstimatedGrowth = 8;
219 StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
220 StandardFormatString.push_back('\"');
221
222 const bool IsFreeBsdkPrintf = false;
223
224 using clang::analyze_format_string::ParsePrintfString;
225 ParsePrintfString(*this, PrintfFormatString.data(),
226 PrintfFormatString.data() + PrintfFormatString.size(),
227 LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
228 finalizeFormatText();
229}
230
231void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
232 std::string &FormatSpec) {
233 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
234
235 // We only care about alignment if a field width is specified
236 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
237 if (ArgKind == ConversionSpecifier::sArg) {
238 // Strings are left-aligned by default with std::format, so we only
239 // need to emit an alignment if this one needs to be right aligned.
240 if (!FS.isLeftJustified())
241 FormatSpec.push_back('>');
242 } else {
243 // Numbers are right-aligned by default with std::format, so we only
244 // need to emit an alignment if this one needs to be left aligned.
245 if (FS.isLeftJustified())
246 FormatSpec.push_back('<');
247 }
248 }
249}
250
251void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
252 std::string &FormatSpec) {
253 const ConversionSpecifier Spec = FS.getConversionSpecifier();
254
255 // Ignore on something that isn't numeric. For printf it's would be a
256 // compile-time warning but ignored at runtime, but for std::format it
257 // ought to be a compile-time error.
258 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
259 // + is preferred to ' '
260 if (FS.hasPlusPrefix())
261 FormatSpec.push_back('+');
262 else if (FS.hasSpacePrefix())
263 FormatSpec.push_back(' ');
264 }
265}
266
267void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
268 std::string &FormatSpec) {
269 if (FS.hasAlternativeForm()) {
270 switch (FS.getConversionSpecifier().getKind()) {
271 case ConversionSpecifier::Kind::aArg:
272 case ConversionSpecifier::Kind::AArg:
273 case ConversionSpecifier::Kind::eArg:
274 case ConversionSpecifier::Kind::EArg:
275 case ConversionSpecifier::Kind::fArg:
276 case ConversionSpecifier::Kind::FArg:
277 case ConversionSpecifier::Kind::gArg:
278 case ConversionSpecifier::Kind::GArg:
279 case ConversionSpecifier::Kind::xArg:
280 case ConversionSpecifier::Kind::XArg:
281 case ConversionSpecifier::Kind::oArg:
282 FormatSpec.push_back('#');
283 break;
284 default:
285 // Alternative forms don't exist for other argument kinds
286 break;
287 }
288 }
289}
290
291void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
292 std::string &FormatSpec) {
293 {
294 const OptionalAmount FieldWidth = FS.getFieldWidth();
295 switch (FieldWidth.getHowSpecified()) {
296 case OptionalAmount::NotSpecified:
297 break;
298 case OptionalAmount::Constant:
299 FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
300 break;
301 case OptionalAmount::Arg:
302 FormatSpec.push_back('{');
303 if (FieldWidth.usesPositionalArg()) {
304 // std::format argument identifiers are zero-based, whereas printf
305 // ones are one based.
306 assert(FieldWidth.getPositionalArgIndex() > 0U);
307 FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
308 }
309 FormatSpec.push_back('}');
310 break;
311 case OptionalAmount::Invalid:
312 break;
313 }
314 }
315}
316
317void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
318 std::string &FormatSpec) {
319 const OptionalAmount FieldPrecision = FS.getPrecision();
320 switch (FieldPrecision.getHowSpecified()) {
321 case OptionalAmount::NotSpecified:
322 break;
323 case OptionalAmount::Constant:
324 FormatSpec.push_back('.');
325 FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
326 break;
327 case OptionalAmount::Arg:
328 FormatSpec.push_back('.');
329 FormatSpec.push_back('{');
330 if (FieldPrecision.usesPositionalArg()) {
331 // std::format argument identifiers are zero-based, whereas printf
332 // ones are one based.
333 assert(FieldPrecision.getPositionalArgIndex() > 0U);
334 FormatSpec.append(
335 llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
336 }
337 FormatSpec.push_back('}');
338 break;
339 case OptionalAmount::Invalid:
340 break;
341 }
342}
343
344void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
345 unsigned ArgCount = 0;
346 const OptionalAmount FieldWidth = FS.getFieldWidth();
347 const OptionalAmount FieldPrecision = FS.getPrecision();
348
349 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
350 !FieldWidth.usesPositionalArg())
351 ++ArgCount;
352 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
353 !FieldPrecision.usesPositionalArg())
354 ++ArgCount;
355
356 if (ArgCount)
357 ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
358}
359
360void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
361 const Expr *Arg) {
362 // If the argument is the result of a call to std::string::c_str() or
363 // data() with a return type of char then we can remove that call and
364 // pass the std::string directly. We don't want to do so if the return
365 // type is not a char pointer (though it's unlikely that such code would
366 // compile without warnings anyway.) See RedundantStringCStrCheck.
367
368 if (!StringCStrCallExprMatcher) {
369 // Lazily create the matcher
370 const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
371 hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
372 const auto StringExpr = expr(
373 anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
374
375 StringCStrCallExprMatcher =
376 cxxMemberCallExpr(
377 on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
378 callee(cxxMethodDecl(hasAnyName("c_str", "data"),
379 returns(pointerType(pointee(isRealChar()))))))
380 .bind("call");
381 }
382
383 auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
384 if (CStrMatches.size() == 1)
385 ArgCStrRemovals.push_back(CStrMatches.front());
386 else if (Arg->getType()->isPointerType()) {
387 const QualType Pointee = Arg->getType()->getPointeeType();
388 // printf is happy to print signed char and unsigned char strings, but
389 // std::format only likes char strings.
390 if (Pointee->isCharType() && !isRealCharType(Pointee))
391 ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
392 }
393}
394
395bool FormatStringConverter::emitIntegerArgument(
396 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
397 std::string &FormatSpec) {
398 const clang::QualType &ArgType = Arg->getType();
399 if (ArgType->isBooleanType()) {
400 // std::format will print bool as either "true" or "false" by default,
401 // but printf prints them as "0" or "1". Be compatible with printf by
402 // requesting decimal output.
403 FormatSpec.push_back('d');
404 } else if (ArgType->isEnumeralType()) {
405 // std::format will try to find a specialization to print the enum
406 // (and probably fail), whereas printf would have just expected it to
407 // be passed as its underlying type. However, printf will have forced
408 // the signedness based on the format string, so we need to do the
409 // same.
410 if (const auto *ET = ArgType->getAs<EnumType>()) {
411 if (const std::optional<std::string> MaybeCastType =
412 castTypeForArgument(ArgKind, ET->getDecl()->getIntegerType()))
413 ArgFixes.emplace_back(
414 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
415 else
416 return conversionNotPossible(
417 (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
418 .str());
419 }
420 } else if (CastMismatchedIntegerTypes &&
421 !isMatchingSignedness(ArgKind, ArgType)) {
422 // printf will happily print an unsigned type as signed if told to.
423 // Even -Wformat doesn't warn for this. std::format will format as
424 // unsigned unless we cast it.
425 if (const std::optional<std::string> MaybeCastType =
426 castTypeForArgument(ArgKind, ArgType))
427 ArgFixes.emplace_back(
428 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
429 else
430 return conversionNotPossible(
431 (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
432 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
433 : "signed") +
434 " integer type to match format"
435 " specifier and StrictMode is enabled")
436 .str());
437 } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
438 // Only specify integer if the argument is of a different type
439 FormatSpec.push_back('d');
440 }
441 return true;
442}
443
444/// Append the corresponding standard format string type fragment to FormatSpec,
445/// and store any argument fixes for later application.
446/// @returns true on success, false on failure
447bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
448 std::string &FormatSpec) {
449 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
450 switch (ArgKind) {
451 case ConversionSpecifier::Kind::sArg:
452 emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
453 break;
454 case ConversionSpecifier::Kind::cArg:
455 // The type must be "c" to get a character unless the type is exactly
456 // char (whether that be signed or unsigned for the target.)
457 if (!isRealCharType(Arg->getType()))
458 FormatSpec.push_back('c');
459 break;
460 case ConversionSpecifier::Kind::dArg:
461 case ConversionSpecifier::Kind::iArg:
462 case ConversionSpecifier::Kind::uArg:
463 if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
464 FormatSpec))
465 return false;
466 break;
467 case ConversionSpecifier::Kind::pArg: {
468 const clang::QualType &ArgType = Arg->getType();
469 // std::format knows how to format void pointers and nullptrs
470 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
471 ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
472 "static_cast<const void *>(");
473 break;
474 }
475 case ConversionSpecifier::Kind::xArg:
476 FormatSpec.push_back('x');
477 break;
478 case ConversionSpecifier::Kind::XArg:
479 FormatSpec.push_back('X');
480 break;
481 case ConversionSpecifier::Kind::oArg:
482 FormatSpec.push_back('o');
483 break;
484 case ConversionSpecifier::Kind::aArg:
485 FormatSpec.push_back('a');
486 break;
487 case ConversionSpecifier::Kind::AArg:
488 FormatSpec.push_back('A');
489 break;
490 case ConversionSpecifier::Kind::eArg:
491 FormatSpec.push_back('e');
492 break;
493 case ConversionSpecifier::Kind::EArg:
494 FormatSpec.push_back('E');
495 break;
496 case ConversionSpecifier::Kind::fArg:
497 FormatSpec.push_back('f');
498 break;
499 case ConversionSpecifier::Kind::FArg:
500 FormatSpec.push_back('F');
501 break;
502 case ConversionSpecifier::Kind::gArg:
503 FormatSpec.push_back('g');
504 break;
505 case ConversionSpecifier::Kind::GArg:
506 FormatSpec.push_back('G');
507 break;
508 default:
509 // Something we don't understand
510 return conversionNotPossible((Twine("argument ") +
511 Twine(FS.getArgIndex() + ArgsOffset) +
512 " has an unsupported format specifier")
513 .str());
514 }
515
516 return true;
517}
518
519/// Append the standard format string equivalent of the passed PrintfSpecifier
520/// to StandardFormatString and store any argument fixes for later application.
521/// @returns true on success, false on failure
522bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
523 const Expr *Arg,
524 std::string &StandardFormatString) {
525 // The specifier must have an associated argument
526 assert(FS.consumesDataArgument());
527
528 StandardFormatString.push_back('{');
529
530 if (FS.usesPositionalArg()) {
531 // std::format argument identifiers are zero-based, whereas printf ones
532 // are one based.
533 assert(FS.getPositionalArgIndex() > 0U);
534 StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
535 }
536
537 // std::format format argument parts to potentially emit:
538 // [[fill]align][sign]["#"]["0"][width]["."precision][type]
539 std::string FormatSpec;
540
541 // printf doesn't support specifying the fill character - it's always a
542 // space, so we never need to generate one.
543
544 emitAlignment(FS, FormatSpec);
545 emitSign(FS, FormatSpec);
546 emitAlternativeForm(FS, FormatSpec);
547
548 if (FS.hasLeadingZeros())
549 FormatSpec.push_back('0');
550
551 emitFieldWidth(FS, FormatSpec);
552 emitPrecision(FS, FormatSpec);
553 maybeRotateArguments(FS);
554
555 if (!emitType(FS, Arg, FormatSpec))
556 return false;
557
558 if (!FormatSpec.empty()) {
559 StandardFormatString.push_back(':');
560 StandardFormatString.append(FormatSpec);
561 }
562
563 StandardFormatString.push_back('}');
564 return true;
565}
566
567/// Called for each format specifier by ParsePrintfString.
568bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
569 const char *StartSpecifier,
570 unsigned SpecifierLen,
571 const TargetInfo &Target) {
572
573 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
574 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
575
576 // Everything before the specifier needs copying verbatim
577 assert(StartSpecifierPos >= PrintfFormatStringPos);
578
579 appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
580 StartSpecifierPos - PrintfFormatStringPos));
581
582 const ConversionSpecifier::Kind ArgKind =
583 FS.getConversionSpecifier().getKind();
584
585 // Skip over specifier
586 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
587 assert(PrintfFormatStringPos <= PrintfFormatString.size());
588
589 FormatStringNeededRewriting = true;
590
591 if (ArgKind == ConversionSpecifier::Kind::nArg) {
592 // std::print doesn't do the equivalent of %n
593 return conversionNotPossible("'%n' is not supported in format string");
594 }
595
596 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
597 // std::print doesn't support %m. In theory we could insert a
598 // strerror(errno) parameter (assuming that libc has a thread-safe
599 // implementation, which glibc does), but that would require keeping track
600 // of the input and output parameter indices for position arguments too.
601 return conversionNotPossible("'%m' is not supported in format string");
602 }
603
604 if (ArgKind == ConversionSpecifier::PercentArg) {
605 StandardFormatString.push_back('%');
606 return true;
607 }
608
609 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
610 if (ArgIndex >= NumArgs) {
611 // Argument index out of range. Give up.
612 return conversionNotPossible(
613 (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
614 .str());
615 }
616
617 return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
618 StandardFormatString);
619}
620
621/// Called at the very end just before applying fixes to capture the last part
622/// of the format string.
623void FormatStringConverter::finalizeFormatText() {
624 appendFormatText(
625 StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
626 PrintfFormatString.size() - PrintfFormatStringPos));
627 PrintfFormatStringPos = PrintfFormatString.size();
628
629 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
630 // than to std::println("Hello\r");
631 // Use StringRef until C++20 std::string::ends_with() is available.
632 const auto StandardFormatStringRef = StringRef(StandardFormatString);
633 if (Config.AllowTrailingNewlineRemoval &&
634 StandardFormatStringRef.ends_with("\\n") &&
635 !StandardFormatStringRef.ends_with("\\\\n") &&
636 !StandardFormatStringRef.ends_with("\\r\\n")) {
637 UsePrintNewlineFunction = true;
638 FormatStringNeededRewriting = true;
639 StandardFormatString.erase(StandardFormatString.end() - 2,
640 StandardFormatString.end());
641 }
642
643 StandardFormatString.push_back('\"');
644}
645
646/// Append literal parts of the format text, reinstating escapes as required.
647void FormatStringConverter::appendFormatText(const StringRef Text) {
648 for (const char Ch : Text) {
649 if (Ch == '\a')
650 StandardFormatString += "\\a";
651 else if (Ch == '\b')
652 StandardFormatString += "\\b";
653 else if (Ch == '\f')
654 StandardFormatString += "\\f";
655 else if (Ch == '\n')
656 StandardFormatString += "\\n";
657 else if (Ch == '\r')
658 StandardFormatString += "\\r";
659 else if (Ch == '\t')
660 StandardFormatString += "\\t";
661 else if (Ch == '\v')
662 StandardFormatString += "\\v";
663 else if (Ch == '\"')
664 StandardFormatString += "\\\"";
665 else if (Ch == '\\')
666 StandardFormatString += "\\\\";
667 else if (Ch == '{') {
668 StandardFormatString += "{{";
669 FormatStringNeededRewriting = true;
670 } else if (Ch == '}') {
671 StandardFormatString += "}}";
672 FormatStringNeededRewriting = true;
673 } else if (Ch < 32) {
674 StandardFormatString += "\\x";
675 StandardFormatString += llvm::hexdigit(Ch >> 4, true);
676 StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
677 } else
678 StandardFormatString += Ch;
679 }
680}
681
682static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
683 ASTContext &Context) {
684 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
685 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
686 const bool Arrow = Member->isArrow();
687 return Arrow ? utils::fixit::formatDereference(*Arg, Context)
688 : tooling::fixit::getText(*Arg, Context).str();
689}
690
691/// Called by the check when it is ready to apply the fixes.
692void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
693 SourceManager &SM) {
694 if (FormatStringNeededRewriting) {
695 Diag << FixItHint::CreateReplacement(
696 CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
697 FormatExpr->getEndLoc()),
698 StandardFormatString);
699 }
700
701 // ArgCount is one less than the number of arguments to be rotated.
702 for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
703 assert(ValueArgIndex < NumArgs);
704 assert(ValueArgIndex > ArgCount);
705
706 // First move the value argument to the right place. But if there's a
707 // pending c_str() removal then we must do that at the same time.
708 if (const auto CStrRemovalMatch =
709 std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
710 [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
711 const BoundNodes &Match) {
712 // This c_str() removal corresponds to the argument
713 // being moved if they start at the same location.
714 const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
715 return ArgStartPos == CStrArg->getBeginLoc();
716 });
717 CStrRemovalMatch != ArgCStrRemovals.end()) {
718 const std::string ArgText =
719 withoutCStrReplacement(*CStrRemovalMatch, *Context);
720 assert(!ArgText.empty());
721
722 Diag << FixItHint::CreateReplacement(
723 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
724
725 // That c_str() removal is now dealt with, so we don't need to do it again
726 ArgCStrRemovals.erase(CStrRemovalMatch);
727 } else
728 Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
729 *Args[ValueArgIndex], *Context);
730
731 // Now shift down the field width and precision (if either are present) to
732 // accommodate it.
733 for (size_t Offset = 0; Offset < ArgCount; ++Offset)
734 Diag << tooling::fixit::createReplacement(
735 *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
736 *Context);
737
738 // Now we need to modify the ArgFix index too so that we fix the right
739 // argument. We don't need to care about the width and precision indices
740 // since they never need fixing.
741 for (auto &ArgFix : ArgFixes) {
742 if (ArgFix.ArgIndex == ValueArgIndex)
743 ArgFix.ArgIndex = ValueArgIndex - ArgCount;
744 }
745 }
746
747 for (const auto &[ArgIndex, Replacement] : ArgFixes) {
748 SourceLocation AfterOtherSide =
749 Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts)
750 ->getLocation();
751
752 Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
753 Replacement, true)
754 << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
755 }
756
757 for (const auto &Match : ArgCStrRemovals) {
758 const auto *Call = Match.getNodeAs<CallExpr>("call");
759 const std::string ArgText = withoutCStrReplacement(Match, *Context);
760 if (!ArgText.empty())
761 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
762 }
763}
764} // namespace clang::tidy::utils
static cl::opt< std::string > Config("config", desc(R"( Specifies a configuration in YAML/JSON format: -config="{Checks:' *', CheckOptions:{x:y}}" When the value is empty, clang-tidy will attempt to find a file named .clang-tidy for each source file in its parent directories. )"), cl::init(""), cl::cat(ClangTidyCategory))
size_t Offset
Declaration of the FormatStringConverter class which is used to convert printf format strings to C++ ...
NodeType Type
std::string Text
llvm::json::Object Args
Definition: Trace.cpp:138
void applyFixes(DiagnosticBuilder &Diag, SourceManager &SM)
Called by the check when it is ready to apply the fixes.
clang::analyze_format_string::ConversionSpecifier ConversionSpecifier
FormatStringConverter(ASTContext *Context, const CallExpr *Call, unsigned FormatArgOffset, Configuration Config, const LangOptions &LO)
AST_MATCHER(Decl, declHasNoReturnAttr)
matches a Decl if it has a "no return" attribute of any kind
std::vector< std::string > match(const SymbolIndex &I, const FuzzyFindRequest &Req, bool *Incomplete)
Definition: TestIndex.cpp:139
std::string formatDereference(const Expr &ExprNode, const ASTContext &Context)
static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, ASTContext &Context)
static std::optional< std::string > getCorrespondingSignedTypeName(const clang::QualType &QT)
If possible, return the text name of the signed type that corresponds to the passed integer type.
static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode)
static bool isRealCharType(const clang::QualType &Ty)
Is the passed type the actual "char" type, whether that be signed or unsigned, rather than explicit s...
static std::optional< std::string > castTypeForArgument(ConversionSpecifier::Kind ArgKind, const clang::QualType &QT)
static std::optional< std::string > getCorrespondingUnsignedTypeName(const clang::QualType &QT)
If possible, return the text name of the unsigned type that corresponds to the passed integer type.
static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, const clang::QualType &ArgType)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//