clang-tools 22.0.0git
ConfusableIdentifierCheck.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "clang/ASTMatchers/ASTMatchers.h"
12#include "clang/Lex/Preprocessor.h"
13#include "llvm/ADT/SmallString.h"
14#include "llvm/Support/ConvertUTF.h"
15
16namespace {
17// Preprocessed version of
18// https://www.unicode.org/Public/security/latest/confusables.txt
19//
20// This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
21#include "Confusables.inc"
22} // namespace
23
25
29
31
32// Build a skeleton out of the Original identifier, inspired by the algorithm
33// described in https://www.unicode.org/reports/tr39/#def-skeleton
34//
35// FIXME: TR39 mandates:
36//
37// For an input string X, define skeleton(X) to be the following transformation
38// on the string:
39//
40// 1. Convert X to NFD format, as described in [UAX15].
41// 2. Concatenate the prototypes for each character in X according to the
42// specified data, producing a string of exemplar characters.
43// 3. Reapply NFD.
44//
45// We're skipping 1. and 3. for the sake of simplicity, but this can lead to
46// false positive.
47
48static llvm::SmallString<64U> skeleton(StringRef Name) {
49 using namespace llvm;
50 SmallString<64U> Skeleton;
51 Skeleton.reserve(1U + Name.size());
52
53 const char *Curr = Name.data();
54 const char *End = Curr + Name.size();
55 while (Curr < End) {
56
57 const char *Prev = Curr;
58 UTF32 CodePoint = 0;
59 ConversionResult Result = convertUTF8Sequence(
60 reinterpret_cast<const UTF8 **>(&Curr),
61 reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion);
62 if (Result != conversionOK) {
63 errs() << "Unicode conversion issue\n";
64 break;
65 }
66
67 StringRef Key(Prev, Curr - Prev);
68 auto *Where = llvm::lower_bound(ConfusableEntries, CodePoint,
69 [](decltype(ConfusableEntries[0]) X,
70 UTF32 Y) { return X.codepoint < Y; });
71 if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
72 Skeleton.append(Prev, Curr);
73 } else {
74 UTF8 Buffer[32];
75 UTF8 *BufferStart = std::begin(Buffer);
76 UTF8 *IBuffer = BufferStart;
77 const UTF32 *ValuesStart = std::begin(Where->values);
78 const UTF32 *ValuesEnd = llvm::find(Where->values, '\0');
79 if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
80 std::end(Buffer),
81 strictConversion) != conversionOK) {
82 errs() << "Unicode conversion issue\n";
83 break;
84 }
85 Skeleton.append((char *)BufferStart, (char *)IBuffer);
86 }
87 }
88 return Skeleton;
89}
90
91namespace {
92struct Entry {
93 const NamedDecl *ND;
94 const Decl *Parent;
95 bool FromDerivedClass;
96};
97} // namespace
98
99// Map from a context to the declarations in that context with the current
100// skeleton. At most one entry per distinct identifier is tracked. The
101// context is usually a `DeclContext`, but can also be a template declaration
102// that has no corresponding context, such as an alias template or variable
103// template.
105 llvm::DenseMap<const Decl *, llvm::SmallVector<Entry, 1>>;
106
107static bool addToContext(DeclsWithinContextMap &DeclsWithinContext,
108 const Decl *Context, Entry E) {
109 auto &Decls = DeclsWithinContext[Context];
110 if (!Decls.empty() &&
111 Decls.back().ND->getIdentifier() == E.ND->getIdentifier()) {
112 // Already have a declaration with this identifier in this context. Don't
113 // track another one. This means that if an outer name is confusable with an
114 // inner name, we'll only diagnose the outer name once, pointing at the
115 // first inner declaration with that name.
116 if (Decls.back().FromDerivedClass && !E.FromDerivedClass) {
117 // Prefer the declaration that's not from the derived class, because that
118 // conflicts with more declarations.
119 Decls.back() = E;
120 return true;
121 }
122 return false;
123 }
124 Decls.push_back(E);
125 return true;
126}
127
128static void addToEnclosingContexts(DeclsWithinContextMap &DeclsWithinContext,
129 const Decl *Parent, const NamedDecl *ND) {
130 const Decl *Outer = Parent;
131 while (Outer) {
132 if (const auto *NS = dyn_cast<NamespaceDecl>(Outer))
133 Outer = NS->getCanonicalDecl();
134
135 if (!addToContext(DeclsWithinContext, Outer, {ND, Parent, false}))
136 return;
137
138 if (const auto *RD = dyn_cast<CXXRecordDecl>(Outer)) {
139 RD = RD->getDefinition();
140 if (RD) {
141 RD->forallBases([&](const CXXRecordDecl *Base) {
142 addToContext(DeclsWithinContext, Base, {ND, Parent, true});
143 return true;
144 });
145 }
146 }
147
148 auto *OuterDC = Outer->getDeclContext();
149 if (!OuterDC)
150 break;
151 Outer = cast_or_null<Decl>(OuterDC->getNonTransparentContext());
152 }
153}
154
156 const ast_matchers::MatchFinder::MatchResult &Result) {
157 const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl");
158 if (!ND)
159 return;
160
161 addDeclToCheck(ND,
162 cast<Decl>(ND->getDeclContext()->getNonTransparentContext()));
163
164 // Associate template parameters with this declaration of this template.
165 if (const auto *TD = dyn_cast<TemplateDecl>(ND)) {
166 for (const NamedDecl *Param : *TD->getTemplateParameters())
167 addDeclToCheck(Param, TD->getTemplatedDecl());
168 }
169
170 // Associate function parameters with this declaration of this function.
171 if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
172 for (const NamedDecl *Param : FD->parameters())
173 addDeclToCheck(Param, ND);
174 }
175}
176
177void ConfusableIdentifierCheck::addDeclToCheck(const NamedDecl *ND,
178 const Decl *Parent) {
179 if (!ND || !Parent)
180 return;
181
182 const IdentifierInfo *NDII = ND->getIdentifier();
183 if (!NDII)
184 return;
185
186 StringRef NDName = NDII->getName();
187 if (NDName.empty())
188 return;
189
190 NameToDecls[NDII].push_back({ND, Parent});
191}
192
194 llvm::StringMap<llvm::SmallVector<const IdentifierInfo *, 1>> SkeletonToNames;
195 // Compute the skeleton for each identifier.
196 for (auto &[Ident, Decls] : NameToDecls) {
197 SkeletonToNames[skeleton(Ident->getName())].push_back(Ident);
198 }
199
200 // Visit each skeleton with more than one identifier.
201 for (auto &[Skel, Idents] : SkeletonToNames) {
202 if (Idents.size() < 2) {
203 continue;
204 }
205
206 // Find the declaration contexts that transitively contain each identifier.
207 DeclsWithinContextMap DeclsWithinContext;
208 for (const IdentifierInfo *II : Idents) {
209 for (auto [ND, Parent] : NameToDecls[II]) {
210 addToEnclosingContexts(DeclsWithinContext, Parent, ND);
211 }
212 }
213
214 // Check to see if any declaration is declared in a context that
215 // transitively contains another declaration with a different identifier but
216 // the same skeleton.
217 for (const IdentifierInfo *II : Idents) {
218 for (auto [OuterND, OuterParent] : NameToDecls[II]) {
219 for (Entry Inner : DeclsWithinContext[OuterParent]) {
220 // Don't complain if the identifiers are the same.
221 if (OuterND->getIdentifier() == Inner.ND->getIdentifier())
222 continue;
223
224 // Don't complain about a derived-class name shadowing a base class
225 // private member.
226 if (OuterND->getAccess() == AS_private && Inner.FromDerivedClass)
227 continue;
228
229 // If the declarations are in the same context, only diagnose the
230 // later one.
231 if (OuterParent == Inner.Parent &&
232 Inner.ND->getASTContext()
233 .getSourceManager()
234 .isBeforeInTranslationUnit(Inner.ND->getLocation(),
235 OuterND->getLocation()))
236 continue;
237
238 diag(Inner.ND->getLocation(), "%0 is confusable with %1")
239 << Inner.ND << OuterND;
240 diag(OuterND->getLocation(), "other declaration found here",
241 DiagnosticIDs::Note);
242 }
243 }
244 }
245 }
246
247 NameToDecls.clear();
248}
249
251 ast_matchers::MatchFinder *Finder) {
252 // Parameter declarations sometimes use the translation unit or some outer
253 // enclosing context as their `DeclContext`, instead of their parent, so
254 // we handle them specially in `check`.
255 auto AnyParamDecl = ast_matchers::anyOf(
256 ast_matchers::parmVarDecl(), ast_matchers::templateTypeParmDecl(),
257 ast_matchers::nonTypeTemplateParmDecl(),
258 ast_matchers::templateTemplateParmDecl());
259 Finder->addMatcher(ast_matchers::namedDecl(ast_matchers::unless(AnyParamDecl))
260 .bind("nameddecl"),
261 this);
262}
263
264} // namespace clang::tidy::misc
Every ClangTidyCheck reports errors through a DiagnosticsEngine provided by this context.
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
ConfusableIdentifierCheck(StringRef Name, ClangTidyContext *Context)
void registerMatchers(ast_matchers::MatchFinder *Finder) override
static bool addToContext(DeclsWithinContextMap &DeclsWithinContext, const Decl *Context, Entry E)
static void addToEnclosingContexts(DeclsWithinContextMap &DeclsWithinContext, const Decl *Parent, const NamedDecl *ND)
static llvm::SmallString< 64U > skeleton(StringRef Name)
llvm::DenseMap< const Decl *, llvm::SmallVector< Entry, 1 > > DeclsWithinContextMap
static ClangTidyModuleRegistry::Add< altera::AlteraModule > X("altera-module", "Adds Altera FPGA OpenCL lint checks.")
Some operations such as code completion produce a set of candidates.
Definition Generators.h:66