clang-tools 22.0.0git
ConfusableIdentifierCheck.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "clang/ASTMatchers/ASTMatchers.h"
12#include "clang/Lex/Preprocessor.h"
13#include "llvm/ADT/SmallString.h"
14#include "llvm/Support/ConvertUTF.h"
15
16namespace {
17// Preprocessed version of
18// https://www.unicode.org/Public/security/latest/confusables.txt
19//
20// This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
21#include "Confusables.inc"
22} // namespace
23
25
29
31
32// Build a skeleton out of the Original identifier, inspired by the algorithm
33// described in https://www.unicode.org/reports/tr39/#def-skeleton
34//
35// FIXME: TR39 mandates:
36//
37// For an input string X, define skeleton(X) to be the following transformation
38// on the string:
39//
40// 1. Convert X to NFD format, as described in [UAX15].
41// 2. Concatenate the prototypes for each character in X according to the
42// specified data, producing a string of exemplar characters.
43// 3. Reapply NFD.
44//
45// We're skipping 1. and 3. for the sake of simplicity, but this can lead to
46// false positive.
47
48static llvm::SmallString<64U> skeleton(StringRef Name) {
49 using namespace llvm;
50 SmallString<64U> Skeleton;
51 Skeleton.reserve(1U + Name.size());
52
53 const char *Curr = Name.data();
54 const char *End = Curr + Name.size();
55 while (Curr < End) {
56 const char *Prev = Curr;
57 UTF32 CodePoint = 0;
58 const ConversionResult Result = convertUTF8Sequence(
59 reinterpret_cast<const UTF8 **>(&Curr),
60 reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion);
61 if (Result != conversionOK) {
62 errs() << "Unicode conversion issue\n";
63 break;
64 }
65
66 const StringRef Key(Prev, Curr - Prev);
67 auto *Where = llvm::lower_bound(ConfusableEntries, CodePoint,
68 [](decltype(ConfusableEntries[0]) X,
69 UTF32 Y) { return X.codepoint < Y; });
70 if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
71 Skeleton.append(Prev, Curr);
72 } else {
73 UTF8 Buffer[32];
74 UTF8 *BufferStart = std::begin(Buffer);
75 UTF8 *IBuffer = BufferStart;
76 const UTF32 *ValuesStart = std::begin(Where->values);
77 const UTF32 *ValuesEnd = llvm::find(Where->values, '\0');
78 if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
79 std::end(Buffer),
80 strictConversion) != conversionOK) {
81 errs() << "Unicode conversion issue\n";
82 break;
83 }
84 Skeleton.append((char *)BufferStart, (char *)IBuffer);
85 }
86 }
87 return Skeleton;
88}
89
90namespace {
91struct Entry {
92 const NamedDecl *ND;
93 const Decl *Parent;
94 bool FromDerivedClass;
95};
96} // namespace
97
98// Map from a context to the declarations in that context with the current
99// skeleton. At most one entry per distinct identifier is tracked. The
100// context is usually a `DeclContext`, but can also be a template declaration
101// that has no corresponding context, such as an alias template or variable
102// template.
104 llvm::DenseMap<const Decl *, llvm::SmallVector<Entry, 1>>;
105
106static bool addToContext(DeclsWithinContextMap &DeclsWithinContext,
107 const Decl *Context, Entry E) {
108 auto &Decls = DeclsWithinContext[Context];
109 if (!Decls.empty() &&
110 Decls.back().ND->getIdentifier() == E.ND->getIdentifier()) {
111 // Already have a declaration with this identifier in this context. Don't
112 // track another one. This means that if an outer name is confusable with an
113 // inner name, we'll only diagnose the outer name once, pointing at the
114 // first inner declaration with that name.
115 if (Decls.back().FromDerivedClass && !E.FromDerivedClass) {
116 // Prefer the declaration that's not from the derived class, because that
117 // conflicts with more declarations.
118 Decls.back() = E;
119 return true;
120 }
121 return false;
122 }
123 Decls.push_back(E);
124 return true;
125}
126
127static void addToEnclosingContexts(DeclsWithinContextMap &DeclsWithinContext,
128 const Decl *Parent, const NamedDecl *ND) {
129 const Decl *Outer = Parent;
130 while (Outer) {
131 if (const auto *NS = dyn_cast<NamespaceDecl>(Outer))
132 Outer = NS->getCanonicalDecl();
133
134 if (!addToContext(DeclsWithinContext, Outer, {ND, Parent, false}))
135 return;
136
137 if (const auto *RD = dyn_cast<CXXRecordDecl>(Outer)) {
138 RD = RD->getDefinition();
139 if (RD) {
140 RD->forallBases([&](const CXXRecordDecl *Base) {
141 addToContext(DeclsWithinContext, Base, {ND, Parent, true});
142 return true;
143 });
144 }
145 }
146
147 auto *OuterDC = Outer->getDeclContext();
148 if (!OuterDC)
149 break;
150 Outer = cast_or_null<Decl>(OuterDC->getNonTransparentContext());
151 }
152}
153
155 const ast_matchers::MatchFinder::MatchResult &Result) {
156 const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl");
157 if (!ND)
158 return;
159
160 addDeclToCheck(ND,
161 cast<Decl>(ND->getDeclContext()->getNonTransparentContext()));
162
163 // Associate template parameters with this declaration of this template.
164 if (const auto *TD = dyn_cast<TemplateDecl>(ND)) {
165 for (const NamedDecl *Param : *TD->getTemplateParameters())
166 addDeclToCheck(Param, TD->getTemplatedDecl());
167 }
168
169 // Associate function parameters with this declaration of this function.
170 if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
171 for (const NamedDecl *Param : FD->parameters())
172 addDeclToCheck(Param, ND);
173 }
174}
175
176void ConfusableIdentifierCheck::addDeclToCheck(const NamedDecl *ND,
177 const Decl *Parent) {
178 if (!ND || !Parent)
179 return;
180
181 const IdentifierInfo *NDII = ND->getIdentifier();
182 if (!NDII)
183 return;
184
185 const StringRef NDName = NDII->getName();
186 if (NDName.empty())
187 return;
188
189 NameToDecls[NDII].push_back({ND, Parent});
190}
191
193 llvm::StringMap<llvm::SmallVector<const IdentifierInfo *, 1>> SkeletonToNames;
194 // Compute the skeleton for each identifier.
195 for (auto &[Ident, Decls] : NameToDecls) {
196 SkeletonToNames[skeleton(Ident->getName())].push_back(Ident);
197 }
198
199 // Visit each skeleton with more than one identifier.
200 for (auto &[Skel, Idents] : SkeletonToNames) {
201 if (Idents.size() < 2) {
202 continue;
203 }
204
205 // Find the declaration contexts that transitively contain each identifier.
206 DeclsWithinContextMap DeclsWithinContext;
207 for (const IdentifierInfo *II : Idents) {
208 for (auto [ND, Parent] : NameToDecls[II]) {
209 addToEnclosingContexts(DeclsWithinContext, Parent, ND);
210 }
211 }
212
213 // Check to see if any declaration is declared in a context that
214 // transitively contains another declaration with a different identifier but
215 // the same skeleton.
216 for (const IdentifierInfo *II : Idents) {
217 for (auto [OuterND, OuterParent] : NameToDecls[II]) {
218 for (const Entry Inner : DeclsWithinContext[OuterParent]) {
219 // Don't complain if the identifiers are the same.
220 if (OuterND->getIdentifier() == Inner.ND->getIdentifier())
221 continue;
222
223 // Don't complain about a derived-class name shadowing a base class
224 // private member.
225 if (OuterND->getAccess() == AS_private && Inner.FromDerivedClass)
226 continue;
227
228 // If the declarations are in the same context, only diagnose the
229 // later one.
230 if (OuterParent == Inner.Parent &&
231 Inner.ND->getASTContext()
232 .getSourceManager()
233 .isBeforeInTranslationUnit(Inner.ND->getLocation(),
234 OuterND->getLocation()))
235 continue;
236
237 diag(Inner.ND->getLocation(), "%0 is confusable with %1")
238 << Inner.ND << OuterND;
239 diag(OuterND->getLocation(), "other declaration found here",
240 DiagnosticIDs::Note);
241 }
242 }
243 }
244 }
245
246 NameToDecls.clear();
247}
248
250 ast_matchers::MatchFinder *Finder) {
251 // Parameter declarations sometimes use the translation unit or some outer
252 // enclosing context as their `DeclContext`, instead of their parent, so
253 // we handle them specially in `check`.
254 auto AnyParamDecl = ast_matchers::anyOf(
255 ast_matchers::parmVarDecl(), ast_matchers::templateTypeParmDecl(),
256 ast_matchers::nonTypeTemplateParmDecl(),
257 ast_matchers::templateTemplateParmDecl());
258 Finder->addMatcher(ast_matchers::namedDecl(ast_matchers::unless(AnyParamDecl))
259 .bind("nameddecl"),
260 this);
261}
262
263} // namespace clang::tidy::misc
Every ClangTidyCheck reports errors through a DiagnosticsEngine provided by this context.
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
ConfusableIdentifierCheck(StringRef Name, ClangTidyContext *Context)
void registerMatchers(ast_matchers::MatchFinder *Finder) override
static bool addToContext(DeclsWithinContextMap &DeclsWithinContext, const Decl *Context, Entry E)
static void addToEnclosingContexts(DeclsWithinContextMap &DeclsWithinContext, const Decl *Parent, const NamedDecl *ND)
static llvm::SmallString< 64U > skeleton(StringRef Name)
llvm::DenseMap< const Decl *, llvm::SmallVector< Entry, 1 > > DeclsWithinContextMap
static ClangTidyModuleRegistry::Add< altera::AlteraModule > X("altera-module", "Adds Altera FPGA OpenCL lint checks.")
Some operations such as code completion produce a set of candidates.
Definition Generators.h:146