clang-tools 22.0.0git
BranchCloneCheck.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BranchCloneCheck.h"
10#include "../utils/ASTUtils.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/AST/RecursiveASTVisitor.h"
13#include "clang/ASTMatchers/ASTMatchFinder.h"
14#include "clang/Analysis/CloneDetection.h"
15#include "clang/Lex/Lexer.h"
16
17using namespace clang;
18using namespace clang::ast_matchers;
19
20namespace {
21/// A branch in a switch may consist of several statements; while a branch in
22/// an if/else if/else chain is one statement (which may be a CompoundStmt).
23using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
24} // anonymous namespace
25
26/// Determines if the bodies of two branches in a switch statements are Type I
27/// clones of each other. This function only examines the body of the branch
28/// and ignores the `case X:` or `default:` at the start of the branch.
29static bool areSwitchBranchesIdentical(const SwitchBranch &LHS,
30 const SwitchBranch &RHS,
31 const ASTContext &Context) {
32 return llvm::equal(LHS, RHS, [&](const Stmt *S1, const Stmt *S2) {
33 // NOTE: We strip goto labels and annotations in addition to stripping
34 // the `case X:` or `default:` labels, but it is very unlikely that this
35 // would cause false positives in real-world code.
36 return tidy::utils::areStatementsIdentical(S1->stripLabelLikeStatements(),
37 S2->stripLabelLikeStatements(),
38 Context);
39 });
40}
41
42static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) {
43 struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> {
44 using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue;
45
46 bool TraverseLambdaExpr(LambdaExpr *, DataRecursionQueue * = nullptr) {
47 return true; // Ignore lambdas
48 }
49
50 bool TraverseDecl(Decl *) {
51 return true; // No need to check declarations
52 }
53
54 bool TraverseSwitchStmt(SwitchStmt *, DataRecursionQueue * = nullptr) {
55 return true; // Ignore sub-switches
56 }
57
58 // NOLINTNEXTLINE(readability-identifier-naming) - FIXME
59 bool TraverseSwitchCase(SwitchCase *, DataRecursionQueue * = nullptr) {
60 return true; // Ignore cases
61 }
62
63 bool TraverseDefaultStmt(DefaultStmt *, DataRecursionQueue * = nullptr) {
64 return true; // Ignore defaults
65 }
66
67 bool TraverseAttributedStmt(AttributedStmt *S) {
68 if (!S)
69 return true;
70
71 return llvm::all_of(S->getAttrs(), [](const Attr *A) {
72 return !isa<FallThroughAttr>(A);
73 });
74 }
75 } Visitor;
76
77 for (const Stmt *Elem : Branch) {
78 if (!Visitor.TraverseStmt(const_cast<Stmt *>(Elem)))
79 return true;
80 }
81 return false;
82}
83
84namespace clang::tidy::bugprone {
85
86void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
87 Finder->addMatcher(
88 ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
89 stmt().bind("if"),
90 hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
91 hasElse(stmt().bind("else"))),
92 this);
93 Finder->addMatcher(switchStmt().bind("switch"), this);
94 Finder->addMatcher(conditionalOperator().bind("condOp"), this);
95 Finder->addMatcher(
96 ifStmt((hasThen(hasDescendant(ifStmt())))).bind("ifWithDescendantIf"),
97 this);
98}
99
100/// Determines whether two statement trees are identical regarding
101/// operators and symbols.
102///
103/// Exceptions: expressions containing macros or functions with possible side
104/// effects are never considered identical.
105/// Limitations: (t + u) and (u + t) are not considered identical.
106/// t*(u + t) and t*u + t*t are not considered identical.
107///
108static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1,
109 const Stmt *Stmt2, bool IgnoreSideEffects) {
110 if (!Stmt1 || !Stmt2)
111 return !Stmt1 && !Stmt2;
112
113 // If Stmt1 & Stmt2 are of different class then they are not
114 // identical statements.
115 if (Stmt1->getStmtClass() != Stmt2->getStmtClass())
116 return false;
117
118 const auto *Expr1 = dyn_cast<Expr>(Stmt1);
119 const auto *Expr2 = dyn_cast<Expr>(Stmt2);
120
121 if (Expr1 && Expr2) {
122 // If Stmt1 has side effects then don't warn even if expressions
123 // are identical.
124 if (!IgnoreSideEffects && Expr1->HasSideEffects(Ctx) &&
125 Expr2->HasSideEffects(Ctx))
126 return false;
127 // If either expression comes from a macro then don't warn even if
128 // the expressions are identical.
129 if ((Expr1->getExprLoc().isMacroID()) || (Expr2->getExprLoc().isMacroID()))
130 return false;
131
132 // If all children of two expressions are identical, return true.
133 if (!llvm::equal(Expr1->children(), Expr2->children(),
134 [&](const Stmt *S1, const Stmt *S2) {
135 return isIdenticalStmt(Ctx, S1, S2, IgnoreSideEffects);
136 }))
137 return false;
138 }
139
140 switch (Stmt1->getStmtClass()) {
141 default:
142 return false;
143 case Stmt::CallExprClass:
144 case Stmt::ArraySubscriptExprClass:
145 case Stmt::ArraySectionExprClass:
146 case Stmt::OMPArrayShapingExprClass:
147 case Stmt::OMPIteratorExprClass:
148 case Stmt::ImplicitCastExprClass:
149 case Stmt::ParenExprClass:
150 case Stmt::BreakStmtClass:
151 case Stmt::ContinueStmtClass:
152 case Stmt::NullStmtClass:
153 return true;
154 case Stmt::CStyleCastExprClass: {
155 const auto *CastExpr1 = cast<CStyleCastExpr>(Stmt1);
156 const auto *CastExpr2 = cast<CStyleCastExpr>(Stmt2);
157
158 return CastExpr1->getTypeAsWritten() == CastExpr2->getTypeAsWritten();
159 }
160 case Stmt::ReturnStmtClass: {
161 const auto *ReturnStmt1 = cast<ReturnStmt>(Stmt1);
162 const auto *ReturnStmt2 = cast<ReturnStmt>(Stmt2);
163
164 return isIdenticalStmt(Ctx, ReturnStmt1->getRetValue(),
165 ReturnStmt2->getRetValue(), IgnoreSideEffects);
166 }
167 case Stmt::ForStmtClass: {
168 const auto *ForStmt1 = cast<ForStmt>(Stmt1);
169 const auto *ForStmt2 = cast<ForStmt>(Stmt2);
170
171 if (!isIdenticalStmt(Ctx, ForStmt1->getInit(), ForStmt2->getInit(),
172 IgnoreSideEffects))
173 return false;
174 if (!isIdenticalStmt(Ctx, ForStmt1->getCond(), ForStmt2->getCond(),
175 IgnoreSideEffects))
176 return false;
177 if (!isIdenticalStmt(Ctx, ForStmt1->getInc(), ForStmt2->getInc(),
178 IgnoreSideEffects))
179 return false;
180 if (!isIdenticalStmt(Ctx, ForStmt1->getBody(), ForStmt2->getBody(),
181 IgnoreSideEffects))
182 return false;
183 return true;
184 }
185 case Stmt::DoStmtClass: {
186 const auto *DStmt1 = cast<DoStmt>(Stmt1);
187 const auto *DStmt2 = cast<DoStmt>(Stmt2);
188
189 if (!isIdenticalStmt(Ctx, DStmt1->getCond(), DStmt2->getCond(),
190 IgnoreSideEffects))
191 return false;
192 if (!isIdenticalStmt(Ctx, DStmt1->getBody(), DStmt2->getBody(),
193 IgnoreSideEffects))
194 return false;
195 return true;
196 }
197 case Stmt::WhileStmtClass: {
198 const auto *WStmt1 = cast<WhileStmt>(Stmt1);
199 const auto *WStmt2 = cast<WhileStmt>(Stmt2);
200
201 if (!isIdenticalStmt(Ctx, WStmt1->getCond(), WStmt2->getCond(),
202 IgnoreSideEffects))
203 return false;
204 if (!isIdenticalStmt(Ctx, WStmt1->getBody(), WStmt2->getBody(),
205 IgnoreSideEffects))
206 return false;
207 return true;
208 }
209 case Stmt::IfStmtClass: {
210 const auto *IStmt1 = cast<IfStmt>(Stmt1);
211 const auto *IStmt2 = cast<IfStmt>(Stmt2);
212
213 if (!isIdenticalStmt(Ctx, IStmt1->getCond(), IStmt2->getCond(),
214 IgnoreSideEffects))
215 return false;
216 if (!isIdenticalStmt(Ctx, IStmt1->getThen(), IStmt2->getThen(),
217 IgnoreSideEffects))
218 return false;
219 if (!isIdenticalStmt(Ctx, IStmt1->getElse(), IStmt2->getElse(),
220 IgnoreSideEffects))
221 return false;
222 return true;
223 }
224 case Stmt::DeferStmtClass: {
225 const auto *DefStmt1 = cast<DeferStmt>(Stmt1);
226 const auto *DefStmt2 = cast<DeferStmt>(Stmt2);
227 return isIdenticalStmt(Ctx, DefStmt1->getBody(), DefStmt2->getBody(),
228 IgnoreSideEffects);
229 }
230 case Stmt::CompoundStmtClass: {
231 const auto *CompStmt1 = cast<CompoundStmt>(Stmt1);
232 const auto *CompStmt2 = cast<CompoundStmt>(Stmt2);
233 return llvm::equal(CompStmt1->body(), CompStmt2->body(),
234 [&](const Stmt *S1, const Stmt *S2) {
235 return isIdenticalStmt(Ctx, S1, S2, IgnoreSideEffects);
236 });
237 }
238 case Stmt::CompoundAssignOperatorClass:
239 case Stmt::BinaryOperatorClass: {
240 const auto *BinOp1 = cast<BinaryOperator>(Stmt1);
241 const auto *BinOp2 = cast<BinaryOperator>(Stmt2);
242 return BinOp1->getOpcode() == BinOp2->getOpcode();
243 }
244 case Stmt::CharacterLiteralClass: {
245 const auto *CharLit1 = cast<CharacterLiteral>(Stmt1);
246 const auto *CharLit2 = cast<CharacterLiteral>(Stmt2);
247 return CharLit1->getValue() == CharLit2->getValue();
248 }
249 case Stmt::DeclRefExprClass: {
250 const auto *DeclRef1 = cast<DeclRefExpr>(Stmt1);
251 const auto *DeclRef2 = cast<DeclRefExpr>(Stmt2);
252 return DeclRef1->getDecl() == DeclRef2->getDecl();
253 }
254 case Stmt::IntegerLiteralClass: {
255 const auto *IntLit1 = cast<IntegerLiteral>(Stmt1);
256 const auto *IntLit2 = cast<IntegerLiteral>(Stmt2);
257
258 const llvm::APInt I1 = IntLit1->getValue();
259 const llvm::APInt I2 = IntLit2->getValue();
260 if (I1.getBitWidth() != I2.getBitWidth())
261 return false;
262 return I1 == I2;
263 }
264 case Stmt::FloatingLiteralClass: {
265 const auto *FloatLit1 = cast<FloatingLiteral>(Stmt1);
266 const auto *FloatLit2 = cast<FloatingLiteral>(Stmt2);
267 return FloatLit1->getValue().bitwiseIsEqual(FloatLit2->getValue());
268 }
269 case Stmt::StringLiteralClass: {
270 const auto *StringLit1 = cast<StringLiteral>(Stmt1);
271 const auto *StringLit2 = cast<StringLiteral>(Stmt2);
272 return StringLit1->getBytes() == StringLit2->getBytes();
273 }
274 case Stmt::MemberExprClass: {
275 const auto *MemberStmt1 = cast<MemberExpr>(Stmt1);
276 const auto *MemberStmt2 = cast<MemberExpr>(Stmt2);
277 return MemberStmt1->getMemberDecl() == MemberStmt2->getMemberDecl();
278 }
279 case Stmt::UnaryOperatorClass: {
280 const auto *UnaryOp1 = cast<UnaryOperator>(Stmt1);
281 const auto *UnaryOp2 = cast<UnaryOperator>(Stmt2);
282 return UnaryOp1->getOpcode() == UnaryOp2->getOpcode();
283 }
284 }
285}
286
287void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
288 const ASTContext &Context = *Result.Context;
289
290 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
291 const Stmt *Then = IS->getThen();
292 assert(Then && "An IfStmt must have a `then` branch!");
293
294 const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
295 assert(Else && "We only look for `if` statements with an `else` branch!");
296
297 if (!isa<IfStmt>(Else)) {
298 // Just a simple if with no `else if` branch.
299 if (utils::areStatementsIdentical(Then->IgnoreContainers(),
300 Else->IgnoreContainers(), Context)) {
301 diag(IS->getBeginLoc(), "if with identical then and else branches");
302 diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
303 }
304 return;
305 }
306
307 // This is the complicated case when we start an if/else if/else chain.
308 // To find all the duplicates, we collect all the branches into a vector.
309 llvm::SmallVector<const Stmt *, 4> Branches;
310 const IfStmt *Cur = IS;
311 while (true) {
312 // Store the `then` branch.
313 Branches.push_back(Cur->getThen());
314
315 Else = Cur->getElse();
316 // The chain ends if there is no `else` branch.
317 if (!Else)
318 break;
319
320 // Check if there is another `else if`...
321 Cur = dyn_cast<IfStmt>(Else);
322 if (!Cur) {
323 // ...this is just a plain `else` branch at the end of the chain.
324 Branches.push_back(Else);
325 break;
326 }
327 }
328
329 const size_t N = Branches.size();
330 llvm::BitVector KnownAsClone(N);
331
332 for (size_t I = 0; I + 1 < N; I++) {
333 // We have already seen Branches[i] as a clone of an earlier branch.
334 if (KnownAsClone[I])
335 continue;
336
337 int NumCopies = 1;
338
339 for (size_t J = I + 1; J < N; J++) {
340 if (KnownAsClone[J] || !utils::areStatementsIdentical(
341 Branches[I]->IgnoreContainers(),
342 Branches[J]->IgnoreContainers(), Context))
343 continue;
344
345 NumCopies++;
346 KnownAsClone[J] = true;
347
348 if (NumCopies == 2) {
349 // We report the first occurrence only when we find the second one.
350 diag(Branches[I]->getBeginLoc(),
351 "repeated branch body in conditional chain");
352 const SourceLocation End =
353 Lexer::getLocForEndOfToken(Branches[I]->getEndLoc(), 0,
354 *Result.SourceManager, getLangOpts());
355 if (End.isValid()) {
356 diag(End, "end of the original", DiagnosticIDs::Note);
357 }
358 }
359
360 diag(Branches[J]->getBeginLoc(), "clone %0 starts here",
361 DiagnosticIDs::Note)
362 << (NumCopies - 1);
363 }
364 }
365 return;
366 }
367
368 if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
369 // We do not try to detect chains of ?: operators.
370 if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(),
371 Context))
372 diag(CO->getQuestionLoc(),
373 "conditional operator with identical true and false expressions");
374
375 return;
376 }
377
378 if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
379 const auto *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
380
381 // Code like
382 // switch (x) case 0: case 1: foobar();
383 // is legal and calls foobar() if and only if x is either 0 or 1;
384 // but we do not try to distinguish branches in such code.
385 if (!Body)
386 return;
387
388 // We will first collect the branches of the switch statements. For the
389 // sake of simplicity we say that branches are delimited by the SwitchCase
390 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
391 // `default:` labels embedded inside other statements and we do not follow
392 // the effects of `break` and other manipulation of the control-flow.
393 llvm::SmallVector<SwitchBranch, 4> Branches;
394 for (const Stmt *S : Body->body()) {
395 // If this is a `case` or `default`, we start a new, empty branch.
396 if (isa<SwitchCase>(S))
397 Branches.emplace_back();
398
399 // There may be code before the first branch (which can be dead code
400 // and can be code reached either through goto or through case labels
401 // that are embedded inside e.g. inner compound statements); we do not
402 // store those statements in branches.
403 if (!Branches.empty())
404 Branches.back().push_back(S);
405 }
406
407 auto *End = Branches.end();
408 auto *BeginCurrent = Branches.begin();
409 while (BeginCurrent < End) {
410 if (isFallthroughSwitchBranch(*BeginCurrent)) {
411 ++BeginCurrent;
412 continue;
413 }
414
415 auto *EndCurrent = BeginCurrent + 1;
416 while (EndCurrent < End &&
417 areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
418 ++EndCurrent;
419 }
420 // At this point the iterator range {BeginCurrent, EndCurrent} contains a
421 // complete family of consecutive identical branches.
422
423 if (EndCurrent == (BeginCurrent + 1)) {
424 // No consecutive identical branches that start on BeginCurrent
425 BeginCurrent = EndCurrent;
426 continue;
427 }
428
429 diag(BeginCurrent->front()->getBeginLoc(),
430 "switch has %0 consecutive identical branches")
431 << std::distance(BeginCurrent, EndCurrent);
432
433 SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
434 // If the case statement is generated from a macro, it's SourceLocation
435 // may be invalid, resulting in an assertion failure down the line.
436 // While not optimal, try the begin location in this case, it's still
437 // better then nothing.
438 if (EndLoc.isInvalid())
439 EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
440 if (EndLoc.isMacroID())
441 EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
442 EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
443 getLangOpts());
444 if (EndLoc.isValid()) {
445 diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
446 }
447 BeginCurrent = EndCurrent;
448 }
449 return;
450 }
451
452 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("ifWithDescendantIf")) {
453 const Stmt *Then = IS->getThen();
454 const auto *CS = dyn_cast<CompoundStmt>(Then);
455 if (CS && (!CS->body_empty())) {
456 const auto *InnerIf = dyn_cast<IfStmt>(*CS->body_begin());
457 if (InnerIf && isIdenticalStmt(Context, IS->getCond(), InnerIf->getCond(),
458 /*IgnoreSideEffects=*/false)) {
459 diag(IS->getBeginLoc(), "if with identical inner if statement");
460 diag(InnerIf->getBeginLoc(), "inner if starts here",
461 DiagnosticIDs::Note);
462 }
463 }
464 return;
465 }
466
467 llvm_unreachable("No if statement and no switch statement.");
468}
469
470} // namespace clang::tidy::bugprone
static bool isFallthroughSwitchBranch(const SwitchBranch &Branch)
static bool areSwitchBranchesIdentical(const SwitchBranch &LHS, const SwitchBranch &RHS, const ASTContext &Context)
Determines if the bodies of two branches in a switch statements are Type I clones of each other.
void check(const ast_matchers::MatchFinder::MatchResult &Result) override
void registerMatchers(ast_matchers::MatchFinder *Finder) override
static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1, const Stmt *Stmt2, bool IgnoreSideEffects)
Determines whether two statement trees are identical regarding operators and symbols.
bool areStatementsIdentical(const Stmt *FirstStmt, const Stmt *SecondStmt, const ASTContext &Context, bool Canonical)
Definition ASTUtils.cpp:91
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//