clang-tools 22.0.0git
MisleadingBidirectional.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "clang/Frontend/CompilerInstance.h"
12#include "clang/Lex/Preprocessor.h"
13#include "llvm/Support/ConvertUTF.h"
14
15using namespace clang;
16using namespace clang::tidy::misc;
17
18static bool containsMisleadingBidi(StringRef Buffer,
19 bool HonorLineBreaks = true) {
20 const char *CurPtr = Buffer.begin();
21
22 enum BidiChar {
23 PS = 0x2029,
24 RLO = 0x202E,
25 RLE = 0x202B,
26 LRO = 0x202D,
27 LRE = 0x202A,
28 PDF = 0x202C,
29 RLI = 0x2067,
30 LRI = 0x2066,
31 FSI = 0x2068,
32 PDI = 0x2069
33 };
34
35 SmallVector<BidiChar> BidiContexts;
36
37 // Scan each character while maintaining a stack of opened bidi context.
38 // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
39 // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
40 //
41 // Warn if we end up with an unclosed context.
42 while (CurPtr < Buffer.end()) {
43 unsigned char C = *CurPtr;
44 if (isASCII(C)) {
45 ++CurPtr;
46 bool IsParagrapSep =
47 (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
48 bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
49 if (IsParagrapSep || IsSegmentSep)
50 BidiContexts.clear();
51 continue;
52 }
53 llvm::UTF32 CodePoint = 0;
54 llvm::ConversionResult Result = llvm::convertUTF8Sequence(
55 (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
56 &CodePoint, llvm::strictConversion);
57
58 // If conversion fails, utf-8 is designed so that we can just try next char.
59 if (Result != llvm::conversionOK) {
60 ++CurPtr;
61 continue;
62 }
63
64 // Open a PDF context.
65 if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
66 CodePoint == LRE)
67 BidiContexts.push_back(PDF);
68 // Close PDF Context.
69 else if (CodePoint == PDF) {
70 if (!BidiContexts.empty() && BidiContexts.back() == PDF)
71 BidiContexts.pop_back();
72 }
73 // Open a PDI Context.
74 else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
75 BidiContexts.push_back(PDI);
76 // Close a PDI Context.
77 else if (CodePoint == PDI) {
78 auto R = llvm::find(llvm::reverse(BidiContexts), PDI);
79 if (R != BidiContexts.rend())
80 BidiContexts.resize(BidiContexts.rend() - R - 1);
81 }
82 // Line break or equivalent
83 else if (CodePoint == PS)
84 BidiContexts.clear();
85 }
86 return !BidiContexts.empty();
87}
88
90 : public CommentHandler {
91public:
94
95 bool HandleComment(Preprocessor &PP, SourceRange Range) override {
96 // FIXME: check that we are in a /* */ comment
97 StringRef Text =
98 Lexer::getSourceText(CharSourceRange::getCharRange(Range),
99 PP.getSourceManager(), PP.getLangOpts());
100
101 if (containsMisleadingBidi(Text, true))
102 Check.diag(
103 Range.getBegin(),
104 "comment contains misleading bidirectional Unicode characters");
105 return false;
106 }
107
108private:
110};
111
113 StringRef Name, ClangTidyContext *Context)
114 : ClangTidyCheck(Name, Context),
115 Handler(std::make_unique<MisleadingBidirectionalHandler>(*this)) {}
116
118
120 const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
121 PP->addCommentHandler(Handler.get());
122}
123
125 const ast_matchers::MatchFinder::MatchResult &Result) {
126 if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
127 StringRef Literal = SL->getBytes();
128 if (containsMisleadingBidi(Literal, false))
129 diag(SL->getBeginLoc(), "string literal contains misleading "
130 "bidirectional Unicode characters");
131 }
132}
133
135 ast_matchers::MatchFinder *Finder) {
136 Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
137}
static bool containsMisleadingBidi(StringRef Buffer, bool HonorLineBreaks=true)
Every ClangTidyCheck reports errors through a DiagnosticsEngine provided by this context.
MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context)
void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) override
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
void registerMatchers(ast_matchers::MatchFinder *Finder) override
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//