clang-tools 20.0.0git
MisleadingBidirectional.cpp
Go to the documentation of this file.
1//===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "clang/Frontend/CompilerInstance.h"
12#include "clang/Lex/Preprocessor.h"
13#include "llvm/Support/ConvertUTF.h"
14#include <optional>
15
16using namespace clang;
17using namespace clang::tidy::misc;
18
19static bool containsMisleadingBidi(StringRef Buffer,
20 bool HonorLineBreaks = true) {
21 const char *CurPtr = Buffer.begin();
22
23 enum BidiChar {
24 PS = 0x2029,
25 RLO = 0x202E,
26 RLE = 0x202B,
27 LRO = 0x202D,
28 LRE = 0x202A,
29 PDF = 0x202C,
30 RLI = 0x2067,
31 LRI = 0x2066,
32 FSI = 0x2068,
33 PDI = 0x2069
34 };
35
36 SmallVector<BidiChar> BidiContexts;
37
38 // Scan each character while maintaining a stack of opened bidi context.
39 // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
40 // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
41 //
42 // Warn if we end up with an unclosed context.
43 while (CurPtr < Buffer.end()) {
44 unsigned char C = *CurPtr;
45 if (isASCII(C)) {
46 ++CurPtr;
47 bool IsParagrapSep =
48 (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
49 bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
50 if (IsParagrapSep || IsSegmentSep)
51 BidiContexts.clear();
52 continue;
53 }
54 llvm::UTF32 CodePoint = 0;
55 llvm::ConversionResult Result = llvm::convertUTF8Sequence(
56 (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
57 &CodePoint, llvm::strictConversion);
58
59 // If conversion fails, utf-8 is designed so that we can just try next char.
60 if (Result != llvm::conversionOK) {
61 ++CurPtr;
62 continue;
63 }
64
65 // Open a PDF context.
66 if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
67 CodePoint == LRE)
68 BidiContexts.push_back(PDF);
69 // Close PDF Context.
70 else if (CodePoint == PDF) {
71 if (!BidiContexts.empty() && BidiContexts.back() == PDF)
72 BidiContexts.pop_back();
73 }
74 // Open a PDI Context.
75 else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
76 BidiContexts.push_back(PDI);
77 // Close a PDI Context.
78 else if (CodePoint == PDI) {
79 auto R = llvm::find(llvm::reverse(BidiContexts), PDI);
80 if (R != BidiContexts.rend())
81 BidiContexts.resize(BidiContexts.rend() - R - 1);
82 }
83 // Line break or equivalent
84 else if (CodePoint == PS)
85 BidiContexts.clear();
86 }
87 return !BidiContexts.empty();
88}
89
91 : public CommentHandler {
92public:
94 : Check(Check) {}
95
96 bool HandleComment(Preprocessor &PP, SourceRange Range) override {
97 // FIXME: check that we are in a /* */ comment
98 StringRef Text =
99 Lexer::getSourceText(CharSourceRange::getCharRange(Range),
100 PP.getSourceManager(), PP.getLangOpts());
101
102 if (containsMisleadingBidi(Text, true))
103 Check.diag(
104 Range.getBegin(),
105 "comment contains misleading bidirectional Unicode characters");
106 return false;
107 }
108
109private:
111};
112
114 StringRef Name, ClangTidyContext *Context)
115 : ClangTidyCheck(Name, Context),
116 Handler(std::make_unique<MisleadingBidirectionalHandler>(*this)) {}
117
119
121 const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
122 PP->addCommentHandler(Handler.get());
123}
124
126 const ast_matchers::MatchFinder::MatchResult &Result) {
127 if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
128 StringRef Literal = SL->getBytes();
129 if (containsMisleadingBidi(Literal, false))
130 diag(SL->getBeginLoc(), "string literal contains misleading "
131 "bidirectional Unicode characters");
132 }
133}
134
136 ast_matchers::MatchFinder *Finder) {
137 Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
138}
llvm::SmallString< 256U > Name
const Criteria C
std::string Text
CharSourceRange Range
SourceRange for the file name.
static bool containsMisleadingBidi(StringRef Buffer, bool HonorLineBreaks=true)
Base class for all clang-tidy checks.
DiagnosticBuilder diag(SourceLocation Loc, StringRef Description, DiagnosticIDs::Level Level=DiagnosticIDs::Warning)
Add a diagnostic with the check's name.
Every ClangTidyCheck reports errors through a DiagnosticsEngine provided by this context.
MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context)
void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) override
Override this to register PPCallbacks in the preprocessor.
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
ClangTidyChecks that register ASTMatchers should do the actual work in here.
void registerMatchers(ast_matchers::MatchFinder *Finder) override
Override this to register AST matchers with Finder.
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//