clang-tools 22.0.0git
BranchCloneCheck.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BranchCloneCheck.h"
10#include "../utils/ASTUtils.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/AST/RecursiveASTVisitor.h"
13#include "clang/ASTMatchers/ASTMatchFinder.h"
14#include "clang/Analysis/CloneDetection.h"
15#include "clang/Lex/Lexer.h"
16
17using namespace clang;
18using namespace clang::ast_matchers;
19
20namespace {
21/// A branch in a switch may consist of several statements; while a branch in
22/// an if/else if/else chain is one statement (which may be a CompoundStmt).
23using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
24} // anonymous namespace
25
26/// Determines if the bodies of two branches in a switch statements are Type I
27/// clones of each other. This function only examines the body of the branch
28/// and ignores the `case X:` or `default:` at the start of the branch.
29static bool areSwitchBranchesIdentical(const SwitchBranch &LHS,
30 const SwitchBranch &RHS,
31 const ASTContext &Context) {
32 if (LHS.size() != RHS.size())
33 return false;
34
35 for (size_t I = 0, Size = LHS.size(); I < Size; I++) {
36 // NOTE: We strip goto labels and annotations in addition to stripping
37 // the `case X:` or `default:` labels, but it is very unlikely that this
38 // would cause false positives in real-world code.
39 if (!tidy::utils::areStatementsIdentical(LHS[I]->stripLabelLikeStatements(),
40 RHS[I]->stripLabelLikeStatements(),
41 Context)) {
42 return false;
43 }
44 }
45
46 return true;
47}
48
49static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) {
50 struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> {
51 using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue;
52
53 bool TraverseLambdaExpr(LambdaExpr *, DataRecursionQueue * = nullptr) {
54 return true; // Ignore lambdas
55 }
56
57 bool TraverseDecl(Decl *) {
58 return true; // No need to check declarations
59 }
60
61 bool TraverseSwitchStmt(SwitchStmt *, DataRecursionQueue * = nullptr) {
62 return true; // Ignore sub-switches
63 }
64
65 // NOLINTNEXTLINE(readability-identifier-naming) - FIXME
66 bool TraverseSwitchCase(SwitchCase *, DataRecursionQueue * = nullptr) {
67 return true; // Ignore cases
68 }
69
70 bool TraverseDefaultStmt(DefaultStmt *, DataRecursionQueue * = nullptr) {
71 return true; // Ignore defaults
72 }
73
74 bool TraverseAttributedStmt(AttributedStmt *S) {
75 if (!S)
76 return true;
77
78 return llvm::all_of(S->getAttrs(), [](const Attr *A) {
79 return !isa<FallThroughAttr>(A);
80 });
81 }
82 } Visitor;
83
84 for (const Stmt *Elem : Branch) {
85 if (!Visitor.TraverseStmt(const_cast<Stmt *>(Elem)))
86 return true;
87 }
88 return false;
89}
90
91namespace clang::tidy::bugprone {
92
93void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
94 Finder->addMatcher(
95 ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
96 stmt().bind("if"),
97 hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
98 hasElse(stmt().bind("else"))),
99 this);
100 Finder->addMatcher(switchStmt().bind("switch"), this);
101 Finder->addMatcher(conditionalOperator().bind("condOp"), this);
102 Finder->addMatcher(
103 ifStmt((hasThen(hasDescendant(ifStmt())))).bind("ifWithDescendantIf"),
104 this);
105}
106
107/// Determines whether two statement trees are identical regarding
108/// operators and symbols.
109///
110/// Exceptions: expressions containing macros or functions with possible side
111/// effects are never considered identical.
112/// Limitations: (t + u) and (u + t) are not considered identical.
113/// t*(u + t) and t*u + t*t are not considered identical.
114///
115static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1,
116 const Stmt *Stmt2, bool IgnoreSideEffects) {
117 if (!Stmt1 || !Stmt2)
118 return !Stmt1 && !Stmt2;
119
120 // If Stmt1 & Stmt2 are of different class then they are not
121 // identical statements.
122 if (Stmt1->getStmtClass() != Stmt2->getStmtClass())
123 return false;
124
125 const auto *Expr1 = dyn_cast<Expr>(Stmt1);
126 const auto *Expr2 = dyn_cast<Expr>(Stmt2);
127
128 if (Expr1 && Expr2) {
129 // If Stmt1 has side effects then don't warn even if expressions
130 // are identical.
131 if (!IgnoreSideEffects && Expr1->HasSideEffects(Ctx) &&
132 Expr2->HasSideEffects(Ctx))
133 return false;
134 // If either expression comes from a macro then don't warn even if
135 // the expressions are identical.
136 if ((Expr1->getExprLoc().isMacroID()) || (Expr2->getExprLoc().isMacroID()))
137 return false;
138
139 // If all children of two expressions are identical, return true.
140 Expr::const_child_iterator I1 = Expr1->child_begin();
141 Expr::const_child_iterator I2 = Expr2->child_begin();
142 while (I1 != Expr1->child_end() && I2 != Expr2->child_end()) {
143 if (!isIdenticalStmt(Ctx, *I1, *I2, IgnoreSideEffects))
144 return false;
145 ++I1;
146 ++I2;
147 }
148 // If there are different number of children in the statements, return
149 // false.
150 if (I1 != Expr1->child_end())
151 return false;
152 if (I2 != Expr2->child_end())
153 return false;
154 }
155
156 switch (Stmt1->getStmtClass()) {
157 default:
158 return false;
159 case Stmt::CallExprClass:
160 case Stmt::ArraySubscriptExprClass:
161 case Stmt::ArraySectionExprClass:
162 case Stmt::OMPArrayShapingExprClass:
163 case Stmt::OMPIteratorExprClass:
164 case Stmt::ImplicitCastExprClass:
165 case Stmt::ParenExprClass:
166 case Stmt::BreakStmtClass:
167 case Stmt::ContinueStmtClass:
168 case Stmt::NullStmtClass:
169 return true;
170 case Stmt::CStyleCastExprClass: {
171 const auto *CastExpr1 = cast<CStyleCastExpr>(Stmt1);
172 const auto *CastExpr2 = cast<CStyleCastExpr>(Stmt2);
173
174 return CastExpr1->getTypeAsWritten() == CastExpr2->getTypeAsWritten();
175 }
176 case Stmt::ReturnStmtClass: {
177 const auto *ReturnStmt1 = cast<ReturnStmt>(Stmt1);
178 const auto *ReturnStmt2 = cast<ReturnStmt>(Stmt2);
179
180 return isIdenticalStmt(Ctx, ReturnStmt1->getRetValue(),
181 ReturnStmt2->getRetValue(), IgnoreSideEffects);
182 }
183 case Stmt::ForStmtClass: {
184 const auto *ForStmt1 = cast<ForStmt>(Stmt1);
185 const auto *ForStmt2 = cast<ForStmt>(Stmt2);
186
187 if (!isIdenticalStmt(Ctx, ForStmt1->getInit(), ForStmt2->getInit(),
188 IgnoreSideEffects))
189 return false;
190 if (!isIdenticalStmt(Ctx, ForStmt1->getCond(), ForStmt2->getCond(),
191 IgnoreSideEffects))
192 return false;
193 if (!isIdenticalStmt(Ctx, ForStmt1->getInc(), ForStmt2->getInc(),
194 IgnoreSideEffects))
195 return false;
196 if (!isIdenticalStmt(Ctx, ForStmt1->getBody(), ForStmt2->getBody(),
197 IgnoreSideEffects))
198 return false;
199 return true;
200 }
201 case Stmt::DoStmtClass: {
202 const auto *DStmt1 = cast<DoStmt>(Stmt1);
203 const auto *DStmt2 = cast<DoStmt>(Stmt2);
204
205 if (!isIdenticalStmt(Ctx, DStmt1->getCond(), DStmt2->getCond(),
206 IgnoreSideEffects))
207 return false;
208 if (!isIdenticalStmt(Ctx, DStmt1->getBody(), DStmt2->getBody(),
209 IgnoreSideEffects))
210 return false;
211 return true;
212 }
213 case Stmt::WhileStmtClass: {
214 const auto *WStmt1 = cast<WhileStmt>(Stmt1);
215 const auto *WStmt2 = cast<WhileStmt>(Stmt2);
216
217 if (!isIdenticalStmt(Ctx, WStmt1->getCond(), WStmt2->getCond(),
218 IgnoreSideEffects))
219 return false;
220 if (!isIdenticalStmt(Ctx, WStmt1->getBody(), WStmt2->getBody(),
221 IgnoreSideEffects))
222 return false;
223 return true;
224 }
225 case Stmt::IfStmtClass: {
226 const auto *IStmt1 = cast<IfStmt>(Stmt1);
227 const auto *IStmt2 = cast<IfStmt>(Stmt2);
228
229 if (!isIdenticalStmt(Ctx, IStmt1->getCond(), IStmt2->getCond(),
230 IgnoreSideEffects))
231 return false;
232 if (!isIdenticalStmt(Ctx, IStmt1->getThen(), IStmt2->getThen(),
233 IgnoreSideEffects))
234 return false;
235 if (!isIdenticalStmt(Ctx, IStmt1->getElse(), IStmt2->getElse(),
236 IgnoreSideEffects))
237 return false;
238 return true;
239 }
240 case Stmt::CompoundStmtClass: {
241 const auto *CompStmt1 = cast<CompoundStmt>(Stmt1);
242 const auto *CompStmt2 = cast<CompoundStmt>(Stmt2);
243
244 if (CompStmt1->size() != CompStmt2->size())
245 return false;
246
247 if (!llvm::all_of(llvm::zip(CompStmt1->body(), CompStmt2->body()),
248 [&Ctx, IgnoreSideEffects](
249 std::tuple<const Stmt *, const Stmt *> StmtPair) {
250 const Stmt *Stmt0 = std::get<0>(StmtPair);
251 const Stmt *Stmt1 = std::get<1>(StmtPair);
252 return isIdenticalStmt(Ctx, Stmt0, Stmt1,
253 IgnoreSideEffects);
254 })) {
255 return false;
256 }
257
258 return true;
259 }
260 case Stmt::CompoundAssignOperatorClass:
261 case Stmt::BinaryOperatorClass: {
262 const auto *BinOp1 = cast<BinaryOperator>(Stmt1);
263 const auto *BinOp2 = cast<BinaryOperator>(Stmt2);
264 return BinOp1->getOpcode() == BinOp2->getOpcode();
265 }
266 case Stmt::CharacterLiteralClass: {
267 const auto *CharLit1 = cast<CharacterLiteral>(Stmt1);
268 const auto *CharLit2 = cast<CharacterLiteral>(Stmt2);
269 return CharLit1->getValue() == CharLit2->getValue();
270 }
271 case Stmt::DeclRefExprClass: {
272 const auto *DeclRef1 = cast<DeclRefExpr>(Stmt1);
273 const auto *DeclRef2 = cast<DeclRefExpr>(Stmt2);
274 return DeclRef1->getDecl() == DeclRef2->getDecl();
275 }
276 case Stmt::IntegerLiteralClass: {
277 const auto *IntLit1 = cast<IntegerLiteral>(Stmt1);
278 const auto *IntLit2 = cast<IntegerLiteral>(Stmt2);
279
280 const llvm::APInt I1 = IntLit1->getValue();
281 const llvm::APInt I2 = IntLit2->getValue();
282 if (I1.getBitWidth() != I2.getBitWidth())
283 return false;
284 return I1 == I2;
285 }
286 case Stmt::FloatingLiteralClass: {
287 const auto *FloatLit1 = cast<FloatingLiteral>(Stmt1);
288 const auto *FloatLit2 = cast<FloatingLiteral>(Stmt2);
289 return FloatLit1->getValue().bitwiseIsEqual(FloatLit2->getValue());
290 }
291 case Stmt::StringLiteralClass: {
292 const auto *StringLit1 = cast<StringLiteral>(Stmt1);
293 const auto *StringLit2 = cast<StringLiteral>(Stmt2);
294 return StringLit1->getBytes() == StringLit2->getBytes();
295 }
296 case Stmt::MemberExprClass: {
297 const auto *MemberStmt1 = cast<MemberExpr>(Stmt1);
298 const auto *MemberStmt2 = cast<MemberExpr>(Stmt2);
299 return MemberStmt1->getMemberDecl() == MemberStmt2->getMemberDecl();
300 }
301 case Stmt::UnaryOperatorClass: {
302 const auto *UnaryOp1 = cast<UnaryOperator>(Stmt1);
303 const auto *UnaryOp2 = cast<UnaryOperator>(Stmt2);
304 return UnaryOp1->getOpcode() == UnaryOp2->getOpcode();
305 }
306 }
307}
308
309void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
310 const ASTContext &Context = *Result.Context;
311
312 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
313 const Stmt *Then = IS->getThen();
314 assert(Then && "An IfStmt must have a `then` branch!");
315
316 const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
317 assert(Else && "We only look for `if` statements with an `else` branch!");
318
319 if (!isa<IfStmt>(Else)) {
320 // Just a simple if with no `else if` branch.
321 if (utils::areStatementsIdentical(Then->IgnoreContainers(),
322 Else->IgnoreContainers(), Context)) {
323 diag(IS->getBeginLoc(), "if with identical then and else branches");
324 diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
325 }
326 return;
327 }
328
329 // This is the complicated case when we start an if/else if/else chain.
330 // To find all the duplicates, we collect all the branches into a vector.
331 llvm::SmallVector<const Stmt *, 4> Branches;
332 const IfStmt *Cur = IS;
333 while (true) {
334 // Store the `then` branch.
335 Branches.push_back(Cur->getThen());
336
337 Else = Cur->getElse();
338 // The chain ends if there is no `else` branch.
339 if (!Else)
340 break;
341
342 // Check if there is another `else if`...
343 Cur = dyn_cast<IfStmt>(Else);
344 if (!Cur) {
345 // ...this is just a plain `else` branch at the end of the chain.
346 Branches.push_back(Else);
347 break;
348 }
349 }
350
351 const size_t N = Branches.size();
352 llvm::BitVector KnownAsClone(N);
353
354 for (size_t I = 0; I + 1 < N; I++) {
355 // We have already seen Branches[i] as a clone of an earlier branch.
356 if (KnownAsClone[I])
357 continue;
358
359 int NumCopies = 1;
360
361 for (size_t J = I + 1; J < N; J++) {
362 if (KnownAsClone[J] || !utils::areStatementsIdentical(
363 Branches[I]->IgnoreContainers(),
364 Branches[J]->IgnoreContainers(), Context))
365 continue;
366
367 NumCopies++;
368 KnownAsClone[J] = true;
369
370 if (NumCopies == 2) {
371 // We report the first occurrence only when we find the second one.
372 diag(Branches[I]->getBeginLoc(),
373 "repeated branch body in conditional chain");
374 const SourceLocation End =
375 Lexer::getLocForEndOfToken(Branches[I]->getEndLoc(), 0,
376 *Result.SourceManager, getLangOpts());
377 if (End.isValid()) {
378 diag(End, "end of the original", DiagnosticIDs::Note);
379 }
380 }
381
382 diag(Branches[J]->getBeginLoc(), "clone %0 starts here",
383 DiagnosticIDs::Note)
384 << (NumCopies - 1);
385 }
386 }
387 return;
388 }
389
390 if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
391 // We do not try to detect chains of ?: operators.
392 if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(),
393 Context))
394 diag(CO->getQuestionLoc(),
395 "conditional operator with identical true and false expressions");
396
397 return;
398 }
399
400 if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
401 const auto *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
402
403 // Code like
404 // switch (x) case 0: case 1: foobar();
405 // is legal and calls foobar() if and only if x is either 0 or 1;
406 // but we do not try to distinguish branches in such code.
407 if (!Body)
408 return;
409
410 // We will first collect the branches of the switch statements. For the
411 // sake of simplicity we say that branches are delimited by the SwitchCase
412 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
413 // `default:` labels embedded inside other statements and we do not follow
414 // the effects of `break` and other manipulation of the control-flow.
415 llvm::SmallVector<SwitchBranch, 4> Branches;
416 for (const Stmt *S : Body->body()) {
417 // If this is a `case` or `default`, we start a new, empty branch.
418 if (isa<SwitchCase>(S))
419 Branches.emplace_back();
420
421 // There may be code before the first branch (which can be dead code
422 // and can be code reached either through goto or through case labels
423 // that are embedded inside e.g. inner compound statements); we do not
424 // store those statements in branches.
425 if (!Branches.empty())
426 Branches.back().push_back(S);
427 }
428
429 auto *End = Branches.end();
430 auto *BeginCurrent = Branches.begin();
431 while (BeginCurrent < End) {
432 if (isFallthroughSwitchBranch(*BeginCurrent)) {
433 ++BeginCurrent;
434 continue;
435 }
436
437 auto *EndCurrent = BeginCurrent + 1;
438 while (EndCurrent < End &&
439 areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
440 ++EndCurrent;
441 }
442 // At this point the iterator range {BeginCurrent, EndCurrent} contains a
443 // complete family of consecutive identical branches.
444
445 if (EndCurrent == (BeginCurrent + 1)) {
446 // No consecutive identical branches that start on BeginCurrent
447 BeginCurrent = EndCurrent;
448 continue;
449 }
450
451 diag(BeginCurrent->front()->getBeginLoc(),
452 "switch has %0 consecutive identical branches")
453 << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
454
455 SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
456 // If the case statement is generated from a macro, it's SourceLocation
457 // may be invalid, resulting in an assertion failure down the line.
458 // While not optimal, try the begin location in this case, it's still
459 // better then nothing.
460 if (EndLoc.isInvalid())
461 EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
462 if (EndLoc.isMacroID())
463 EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
464 EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
465 getLangOpts());
466 if (EndLoc.isValid()) {
467 diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
468 }
469 BeginCurrent = EndCurrent;
470 }
471 return;
472 }
473
474 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("ifWithDescendantIf")) {
475 const Stmt *Then = IS->getThen();
476 const auto *CS = dyn_cast<CompoundStmt>(Then);
477 if (CS && (!CS->body_empty())) {
478 const auto *InnerIf = dyn_cast<IfStmt>(*CS->body_begin());
479 if (InnerIf && isIdenticalStmt(Context, IS->getCond(), InnerIf->getCond(),
480 /*IgnoreSideEffects=*/false)) {
481 diag(IS->getBeginLoc(), "if with identical inner if statement");
482 diag(InnerIf->getBeginLoc(), "inner if starts here",
483 DiagnosticIDs::Note);
484 }
485 }
486 return;
487 }
488
489 llvm_unreachable("No if statement and no switch statement.");
490}
491
492} // namespace clang::tidy::bugprone
static bool isFallthroughSwitchBranch(const SwitchBranch &Branch)
static bool areSwitchBranchesIdentical(const SwitchBranch &LHS, const SwitchBranch &RHS, const ASTContext &Context)
Determines if the bodies of two branches in a switch statements are Type I clones of each other.
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
void registerMatchers(ast_matchers::MatchFinder *Finder) override
static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1, const Stmt *Stmt2, bool IgnoreSideEffects)
Determines whether two statement trees are identical regarding operators and symbols.
bool areStatementsIdentical(const Stmt *FirstStmt, const Stmt *SecondStmt, const ASTContext &Context, bool Canonical)
Definition ASTUtils.cpp:91
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//