clang-tools 22.0.0git
RawStringLiteralCheck.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "clang/AST/ASTContext.h"
11#include "clang/ASTMatchers/ASTMatchFinder.h"
12#include "clang/Basic/LangOptions.h"
13#include "clang/Basic/SourceManager.h"
14#include "clang/Lex/Lexer.h"
15#include "llvm/ADT/StringRef.h"
16#include <optional>
17
18using namespace clang::ast_matchers;
19
20namespace clang::tidy::modernize {
21
22static bool containsEscapes(StringRef HayStack, StringRef Escapes) {
23 size_t BackSlash = HayStack.find('\\');
24 if (BackSlash == StringRef::npos)
25 return false;
26
27 while (BackSlash != StringRef::npos) {
28 if (!Escapes.contains(HayStack[BackSlash + 1]))
29 return false;
30 BackSlash = HayStack.find('\\', BackSlash + 2);
31 }
32
33 return true;
34}
35
36static bool isRawStringLiteral(StringRef Text) {
37 // Already a raw string literal if R comes before ".
38 const size_t QuotePos = Text.find('"');
39 assert(QuotePos != StringRef::npos);
40 return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
41}
42
43static bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
44 const StringLiteral *Literal,
45 const CharsBitSet &DisallowedChars) {
46 // FIXME: Handle L"", u8"", u"" and U"" literals.
47 if (!Literal->isOrdinary())
48 return false;
49
50 for (const unsigned char C : Literal->getBytes())
51 if (DisallowedChars.test(C))
52 return false;
53
54 CharSourceRange CharRange = Lexer::makeFileCharRange(
55 CharSourceRange::getTokenRange(Literal->getSourceRange()),
56 *Result.SourceManager, Result.Context->getLangOpts());
57 StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
58 Result.Context->getLangOpts());
59 if (Text.empty() || isRawStringLiteral(Text))
60 return false;
61
62 return containsEscapes(Text, R"('\"?x01)");
63}
64
65static bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
66 return Bytes.find(Delimiter.empty()
67 ? std::string(R"lit()")lit")
68 : (")" + Delimiter + R"(")")) != StringRef::npos;
69}
70
72 ClangTidyContext *Context)
73 : ClangTidyCheck(Name, Context),
74 DelimiterStem(Options.get("DelimiterStem", "lit")),
75 ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
76 // Non-printing characters are disallowed:
77 // \007 = \a bell
78 // \010 = \b backspace
79 // \011 = \t horizontal tab
80 // \012 = \n new line
81 // \013 = \v vertical tab
82 // \014 = \f form feed
83 // \015 = \r carriage return
84 // \177 = delete
85 for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
86 "\b\t\n\v\f\r\016\017"
87 "\020\021\022\023\024\025\026\027"
88 "\030\031\032\033\034\035\036\037"
89 "\177",
90 33))
91 DisallowedChars.set(C);
92
93 // Non-ASCII are disallowed too.
94 for (unsigned int C = 0x80U; C <= 0xFFU; ++C)
95 DisallowedChars.set(static_cast<unsigned char>(C));
96}
97
99 Options.store(Opts, "DelimiterStem", DelimiterStem);
100 Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals);
101}
102
104 Finder->addMatcher(
105 stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
106}
107
108static std::optional<StringRef>
109createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM,
110 const LangOptions &LangOpts) {
111 const CharSourceRange TokenRange =
112 CharSourceRange::getTokenRange(Literal->getSourceRange());
113 Token T;
114 if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, LangOpts))
115 return std::nullopt;
116 const CharSourceRange CharRange =
117 Lexer::makeFileCharRange(TokenRange, SM, LangOpts);
118 if (T.hasUDSuffix()) {
119 StringRef Text = Lexer::getSourceText(CharRange, SM, LangOpts);
120 const size_t UDSuffixPos = Text.find_last_of('"');
121 if (UDSuffixPos == StringRef::npos)
122 return std::nullopt;
123 return Text.slice(UDSuffixPos + 1, Text.size());
124 }
125 return std::nullopt;
126}
127
128static std::string createRawStringLiteral(const StringLiteral *Literal,
129 const std::string &DelimiterStem,
130 const SourceManager &SM,
131 const LangOptions &LangOpts) {
132 const StringRef Bytes = Literal->getBytes();
133 std::string Delimiter;
134 for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
135 Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
136 }
137
138 std::optional<StringRef> UserDefinedSuffix =
139 createUserDefinedSuffix(Literal, SM, LangOpts);
140
141 if (Delimiter.empty())
142 return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or(""))
143 .str();
144
145 return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" +
146 UserDefinedSuffix.value_or(""))
147 .str();
148}
149
150static bool compareStringLength(StringRef Replacement,
151 const StringLiteral *Literal,
152 const SourceManager &SM,
153 const LangOptions &LangOpts) {
154 return Replacement.size() <=
155 Lexer::MeasureTokenLength(Literal->getBeginLoc(), SM, LangOpts);
156}
157
158void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
159 const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
160 if (Literal->getBeginLoc().isMacroID())
161 return;
162 const SourceManager &SM = *Result.SourceManager;
163 const LangOptions &LangOpts = getLangOpts();
164 if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
165 const std::string Replacement =
166 createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts);
167 if (ReplaceShorterLiterals ||
168 compareStringLength(Replacement, Literal, SM, LangOpts)) {
169 diag(Literal->getBeginLoc(),
170 "escaped string literal can be written as a raw string literal")
171 << FixItHint::CreateReplacement(Literal->getSourceRange(),
172 Replacement);
173 }
174 }
175}
176
177} // namespace clang::tidy::modernize
Every ClangTidyCheck reports errors through a DiagnosticsEngine provided by this context.
void storeOptions(ClangTidyOptions::OptionMap &Opts) override
RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context)
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
void registerMatchers(ast_matchers::MatchFinder *Finder) override
static bool containsEscapedCharacters(const MatchFinder::MatchResult &Result, const StringLiteral *Literal, const CharsBitSet &DisallowedChars)
static bool compareStringLength(StringRef Replacement, const StringLiteral *Literal, const SourceManager &SM, const LangOptions &LangOpts)
static std::string createRawStringLiteral(const StringLiteral *Literal, const std::string &DelimiterStem, const SourceManager &SM, const LangOptions &LangOpts)
static std::optional< StringRef > createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM, const LangOptions &LangOpts)
static bool isRawStringLiteral(StringRef Text)
static bool containsEscapes(StringRef HayStack, StringRef Escapes)
std::bitset< 1<< CHAR_BIT > CharsBitSet
static bool containsDelimiter(StringRef Bytes, const std::string &Delimiter)
llvm::StringMap< ClangTidyValue > OptionMap