clang-tools  15.0.0git
MisleadingBidirectional.cpp
Go to the documentation of this file.
1 //===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "llvm/Support/ConvertUTF.h"
14 
15 using namespace clang;
16 using namespace clang::tidy::misc;
17 
18 static bool containsMisleadingBidi(StringRef Buffer,
19  bool HonorLineBreaks = true) {
20  const char *CurPtr = Buffer.begin();
21 
22  enum BidiChar {
23  PS = 0x2029,
24  RLO = 0x202E,
25  RLE = 0x202B,
26  LRO = 0x202D,
27  LRE = 0x202A,
28  PDF = 0x202C,
29  RLI = 0x2067,
30  LRI = 0x2066,
31  FSI = 0x2068,
32  PDI = 0x2069
33  };
34 
35  SmallVector<BidiChar> BidiContexts;
36 
37  // Scan each character while maintaining a stack of opened bidi context.
38  // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
39  // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
40  //
41  // Warn if we end up with an unclosed context.
42  while (CurPtr < Buffer.end()) {
43  unsigned char C = *CurPtr;
44  if (isASCII(C)) {
45  ++CurPtr;
46  bool IsParagrapSep =
47  (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
48  bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
49  if (IsParagrapSep || IsSegmentSep)
50  BidiContexts.clear();
51  continue;
52  }
53  llvm::UTF32 CodePoint;
54  llvm::ConversionResult Result = llvm::convertUTF8Sequence(
55  (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
56  &CodePoint, llvm::strictConversion);
57 
58  // If conversion fails, utf-8 is designed so that we can just try next char.
59  if (Result != llvm::conversionOK) {
60  ++CurPtr;
61  continue;
62  }
63 
64  // Open a PDF context.
65  if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
66  CodePoint == LRE)
67  BidiContexts.push_back(PDF);
68  // Close PDF Context.
69  else if (CodePoint == PDF) {
70  if (!BidiContexts.empty() && BidiContexts.back() == PDF)
71  BidiContexts.pop_back();
72  }
73  // Open a PDI Context.
74  else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
75  BidiContexts.push_back(PDI);
76  // Close a PDI Context.
77  else if (CodePoint == PDI) {
78  auto R = std::find(BidiContexts.rbegin(), BidiContexts.rend(), PDI);
79  if (R != BidiContexts.rend())
80  BidiContexts.resize(BidiContexts.rend() - R - 1);
81  }
82  // Line break or equivalent
83  else if (CodePoint == PS)
84  BidiContexts.clear();
85  }
86  return !BidiContexts.empty();
87 }
88 
90  : public CommentHandler {
91 public:
93  llvm::Optional<std::string> User)
94  : Check(Check) {}
95 
96  bool HandleComment(Preprocessor &PP, SourceRange Range) override {
97  // FIXME: check that we are in a /* */ comment
98  StringRef Text =
99  Lexer::getSourceText(CharSourceRange::getCharRange(Range),
100  PP.getSourceManager(), PP.getLangOpts());
101 
102  if (containsMisleadingBidi(Text, true))
103  Check.diag(
104  Range.getBegin(),
105  "comment contains misleading bidirectional Unicode characters");
106  return false;
107  }
108 
109 private:
111 };
112 
114  StringRef Name, ClangTidyContext *Context)
115  : ClangTidyCheck(Name, Context),
116  Handler(std::make_unique<MisleadingBidirectionalHandler>(
117  *this, Context->getOptions().User)) {}
118 
120 
122  const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
123  PP->addCommentHandler(Handler.get());
124 }
125 
127  const ast_matchers::MatchFinder::MatchResult &Result) {
128  if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
129  StringRef Literal = SL->getBytes();
130  if (containsMisleadingBidi(Literal, false))
131  diag(SL->getBeginLoc(), "string literal contains misleading "
132  "bidirectional Unicode characters");
133  }
134 }
135 
137  ast_matchers::MatchFinder *Finder) {
138  Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
139 }
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:38
clang::tidy::misc::MisleadingBidirectionalCheck
Definition: MisleadingBidirectional.h:18
clang::tidy::misc::MisleadingBidirectionalCheck::MisleadingBidirectionalHandler::MisleadingBidirectionalHandler
MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check, llvm::Optional< std::string > User)
Definition: MisleadingBidirectional.cpp:92
clang::tidy::cppcoreguidelines::getSourceText
static std::string getSourceText(const CXXDestructorDecl &Destructor)
Definition: VirtualClassDestructorCheck.cpp:112
Text
std::string Text
Definition: HTMLGenerator.cpp:80
clang::tidy::ClangTidyCheck
Base class for all clang-tidy checks.
Definition: ClangTidyCheck.h:53
MisleadingBidirectional.h
clang::tidy::misc::MisleadingBidirectionalCheck::MisleadingBidirectionalCheck
MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context)
Definition: MisleadingBidirectional.cpp:113
clang::tidy::misc::MisleadingBidirectionalCheck::MisleadingBidirectionalHandler::HandleComment
bool HandleComment(Preprocessor &PP, SourceRange Range) override
Definition: MisleadingBidirectional.cpp:96
clang::tidy::misc::MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
Definition: MisleadingBidirectional.cpp:89
containsMisleadingBidi
static bool containsMisleadingBidi(StringRef Buffer, bool HonorLineBreaks=true)
Definition: MisleadingBidirectional.cpp:18
clang::tidy::ClangTidyContext
Every ClangTidyCheck reports errors through a DiagnosticsEngine provided by this context.
Definition: ClangTidyDiagnosticConsumer.h:66
clang::tidy::misc::MisleadingBidirectionalCheck::check
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
ClangTidyChecks that register ASTMatchers should do the actual work in here.
Definition: MisleadingBidirectional.cpp:126
clang::tidy::misc::MisleadingBidirectionalCheck::registerMatchers
void registerMatchers(ast_matchers::MatchFinder *Finder) override
Override this to register AST matchers with Finder.
Definition: MisleadingBidirectional.cpp:136
clang::tidy::misc::MisleadingBidirectionalCheck::registerPPCallbacks
void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) override
Override this to register PPCallbacks in the preprocessor.
Definition: MisleadingBidirectional.cpp:121
clang::tidy::ClangTidyCheck::diag
DiagnosticBuilder diag(SourceLocation Loc, StringRef Description, DiagnosticIDs::Level Level=DiagnosticIDs::Warning)
Add a diagnostic with the check's name.
Definition: ClangTidyCheck.cpp:25
Name
Token Name
Definition: MacroToEnumCheck.cpp:89
clang::tidy::bugprone::PP
static Preprocessor * PP
Definition: BadSignalToKillThreadCheck.cpp:29
clang::tidy::misc::MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck
~MisleadingBidirectionalCheck()
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::tidy::misc
Definition: DefinitionsInHeadersCheck.cpp:17