clang-tools 23.0.0git
RawStringLiteralCheck.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "clang/AST/ASTContext.h"
11#include "clang/ASTMatchers/ASTMatchFinder.h"
12#include "clang/Basic/LangOptions.h"
13#include "clang/Basic/SourceManager.h"
14#include "clang/Lex/Lexer.h"
15#include "llvm/ADT/StringRef.h"
16#include <optional>
17
18using namespace clang::ast_matchers;
19
20namespace clang::tidy::modernize {
21
22static bool containsEscapes(StringRef HayStack, StringRef Escapes) {
23 size_t BackSlash = HayStack.find('\\');
24 if (BackSlash == StringRef::npos)
25 return false;
26
27 while (BackSlash != StringRef::npos) {
28 if (!Escapes.contains(HayStack[BackSlash + 1]))
29 return false;
30 BackSlash = HayStack.find('\\', BackSlash + 2);
31 }
32
33 return true;
34}
35
36static bool isRawStringLiteral(StringRef Text) {
37 // Already a raw string literal if R comes before ".
38 const size_t QuotePos = Text.find('"');
39 assert(QuotePos != StringRef::npos);
40 return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
41}
42
43static bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
44 const StringLiteral *Literal,
45 const CharsBitSet &DisallowedChars) {
46 // FIXME: Handle L"", u8"", u"" and U"" literals.
47 if (!Literal->isOrdinary())
48 return false;
49
50 for (const unsigned char C : Literal->getBytes())
51 if (DisallowedChars.test(C))
52 return false;
53
54 const CharSourceRange CharRange = Lexer::makeFileCharRange(
55 CharSourceRange::getTokenRange(Literal->getSourceRange()),
56 *Result.SourceManager, Result.Context->getLangOpts());
57 const StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
58 Result.Context->getLangOpts());
59 if (Text.empty() || isRawStringLiteral(Text))
60 return false;
61
62 return containsEscapes(Text, R"('\"?x01)");
63}
64
65static bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
66 return Bytes.contains(Delimiter.empty() ? std::string(R"lit()")lit")
67 : (")" + Delimiter + R"(")"));
68}
69
71 ClangTidyContext *Context)
72 : ClangTidyCheck(Name, Context),
73 DelimiterStem(Options.get("DelimiterStem", "lit")),
74 ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
75 // Non-printing characters are disallowed:
76 // \007 = \a bell
77 // \010 = \b backspace
78 // \011 = \t horizontal tab
79 // \012 = \n new line
80 // \013 = \v vertical tab
81 // \014 = \f form feed
82 // \015 = \r carriage return
83 // \177 = delete
84 for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
85 "\b\t\n\v\f\r\016\017"
86 "\020\021\022\023\024\025\026\027"
87 "\030\031\032\033\034\035\036\037"
88 "\177",
89 33))
90 DisallowedChars.set(C);
91
92 // Non-ASCII are disallowed too.
93 for (unsigned int C = 0x80U; C <= 0xFFU; ++C)
94 DisallowedChars.set(static_cast<unsigned char>(C));
95}
96
98 Options.store(Opts, "DelimiterStem", DelimiterStem);
99 Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals);
100}
101
103 Finder->addMatcher(
104 stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
105}
106
107static std::optional<StringRef>
108createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM,
109 const LangOptions &LangOpts) {
110 const CharSourceRange TokenRange =
111 CharSourceRange::getTokenRange(Literal->getSourceRange());
112 Token T;
113 if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, LangOpts))
114 return std::nullopt;
115 const CharSourceRange CharRange =
116 Lexer::makeFileCharRange(TokenRange, SM, LangOpts);
117 if (T.hasUDSuffix()) {
118 const StringRef Text = Lexer::getSourceText(CharRange, SM, LangOpts);
119 const size_t UDSuffixPos = Text.find_last_of('"');
120 if (UDSuffixPos == StringRef::npos)
121 return std::nullopt;
122 return Text.slice(UDSuffixPos + 1, Text.size());
123 }
124 return std::nullopt;
125}
126
127static std::string createRawStringLiteral(const StringLiteral *Literal,
128 const std::string &DelimiterStem,
129 const SourceManager &SM,
130 const LangOptions &LangOpts) {
131 const StringRef Bytes = Literal->getBytes();
132 std::string Delimiter;
133 for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I)
134 Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
135
136 const std::optional<StringRef> UserDefinedSuffix =
137 createUserDefinedSuffix(Literal, SM, LangOpts);
138
139 if (Delimiter.empty())
140 return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or(""))
141 .str();
142
143 return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" +
144 UserDefinedSuffix.value_or(""))
145 .str();
146}
147
148static bool compareStringLength(StringRef Replacement,
149 const StringLiteral *Literal,
150 const SourceManager &SM,
151 const LangOptions &LangOpts) {
152 return Replacement.size() <=
153 Lexer::MeasureTokenLength(Literal->getBeginLoc(), SM, LangOpts);
154}
155
156void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
157 const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
158 if (Literal->getBeginLoc().isMacroID())
159 return;
160 const SourceManager &SM = *Result.SourceManager;
161 const LangOptions &LangOpts = getLangOpts();
162 if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
163 const std::string Replacement =
164 createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts);
165 if (ReplaceShorterLiterals ||
166 compareStringLength(Replacement, Literal, SM, LangOpts)) {
167 diag(Literal->getBeginLoc(),
168 "escaped string literal can be written as a raw string literal")
169 << FixItHint::CreateReplacement(Literal->getSourceRange(),
170 Replacement);
171 }
172 }
173}
174
175} // namespace clang::tidy::modernize
Every ClangTidyCheck reports errors through a DiagnosticsEngine provided by this context.
void storeOptions(ClangTidyOptions::OptionMap &Opts) override
RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context)
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
void registerMatchers(ast_matchers::MatchFinder *Finder) override
static bool containsEscapedCharacters(const MatchFinder::MatchResult &Result, const StringLiteral *Literal, const CharsBitSet &DisallowedChars)
static bool compareStringLength(StringRef Replacement, const StringLiteral *Literal, const SourceManager &SM, const LangOptions &LangOpts)
static std::string createRawStringLiteral(const StringLiteral *Literal, const std::string &DelimiterStem, const SourceManager &SM, const LangOptions &LangOpts)
static std::optional< StringRef > createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM, const LangOptions &LangOpts)
static bool isRawStringLiteral(StringRef Text)
static bool containsEscapes(StringRef HayStack, StringRef Escapes)
std::bitset< 1<< CHAR_BIT > CharsBitSet
static bool containsDelimiter(StringRef Bytes, const std::string &Delimiter)
llvm::StringMap< ClangTidyValue > OptionMap