clang-tools  14.0.0git
BranchCloneCheck.cpp
Go to the documentation of this file.
1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BranchCloneCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Analysis/CloneDetection.h"
13 #include "clang/Lex/Lexer.h"
14 #include "llvm/Support/Casting.h"
15 
16 using namespace clang;
17 using namespace clang::ast_matchers;
18 
19 /// Returns true when the statements are Type I clones of each other.
20 static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS,
21  const ASTContext &Context) {
22  llvm::FoldingSetNodeID DataLHS, DataRHS;
23  LHS->Profile(DataLHS, Context, false);
24  RHS->Profile(DataRHS, Context, false);
25  return (DataLHS == DataRHS);
26 }
27 
28 namespace {
29 /// A branch in a switch may consist of several statements; while a branch in
30 /// an if/else if/else chain is one statement (which may be a CompoundStmt).
31 using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
32 } // anonymous namespace
33 
34 /// Determines if the bodies of two branches in a switch statements are Type I
35 /// clones of each other. This function only examines the body of the branch
36 /// and ignores the `case X:` or `default:` at the start of the branch.
37 static bool areSwitchBranchesIdentical(const SwitchBranch LHS,
38  const SwitchBranch RHS,
39  const ASTContext &Context) {
40  if (LHS.size() != RHS.size())
41  return false;
42 
43  for (size_t I = 0, Size = LHS.size(); I < Size; I++) {
44  // NOTE: We strip goto labels and annotations in addition to stripping
45  // the `case X:` or `default:` labels, but it is very unlikely that this
46  // would cause false positives in real-world code.
47  if (!areStatementsIdentical(LHS[I]->stripLabelLikeStatements(),
48  RHS[I]->stripLabelLikeStatements(), Context)) {
49  return false;
50  }
51  }
52 
53  return true;
54 }
55 
56 namespace clang {
57 namespace tidy {
58 namespace bugprone {
59 
60 void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
61  Finder->addMatcher(
62  ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
63  stmt().bind("if"),
64  hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
65  hasElse(stmt().bind("else"))),
66  this);
67  Finder->addMatcher(switchStmt().bind("switch"), this);
68  Finder->addMatcher(conditionalOperator().bind("condOp"), this);
69 }
70 
71 void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
72  const ASTContext &Context = *Result.Context;
73 
74  if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
75  const Stmt *Then = IS->getThen();
76  assert(Then && "An IfStmt must have a `then` branch!");
77 
78  const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
79  assert(Else && "We only look for `if` statements with an `else` branch!");
80 
81  if (!isa<IfStmt>(Else)) {
82  // Just a simple if with no `else if` branch.
83  if (areStatementsIdentical(Then->IgnoreContainers(),
84  Else->IgnoreContainers(), Context)) {
85  diag(IS->getBeginLoc(), "if with identical then and else branches");
86  diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
87  }
88  return;
89  }
90 
91  // This is the complicated case when we start an if/else if/else chain.
92  // To find all the duplicates, we collect all the branches into a vector.
93  llvm::SmallVector<const Stmt *, 4> Branches;
94  const IfStmt *Cur = IS;
95  while (true) {
96  // Store the `then` branch.
97  Branches.push_back(Cur->getThen());
98 
99  Else = Cur->getElse();
100  // The chain ends if there is no `else` branch.
101  if (!Else)
102  break;
103 
104  // Check if there is another `else if`...
105  Cur = dyn_cast<IfStmt>(Else);
106  if (!Cur) {
107  // ...this is just a plain `else` branch at the end of the chain.
108  Branches.push_back(Else);
109  break;
110  }
111  }
112 
113  size_t N = Branches.size();
114  llvm::BitVector KnownAsClone(N);
115 
116  for (size_t I = 0; I + 1 < N; I++) {
117  // We have already seen Branches[i] as a clone of an earlier branch.
118  if (KnownAsClone[I])
119  continue;
120 
121  int NumCopies = 1;
122 
123  for (size_t J = I + 1; J < N; J++) {
124  if (KnownAsClone[J] ||
125  !areStatementsIdentical(Branches[I]->IgnoreContainers(),
126  Branches[J]->IgnoreContainers(), Context))
127  continue;
128 
129  NumCopies++;
130  KnownAsClone[J] = true;
131 
132  if (NumCopies == 2) {
133  // We report the first occurrence only when we find the second one.
134  diag(Branches[I]->getBeginLoc(),
135  "repeated branch in conditional chain");
136  SourceLocation End =
137  Lexer::getLocForEndOfToken(Branches[I]->getEndLoc(), 0,
138  *Result.SourceManager, getLangOpts());
139  if (End.isValid()) {
140  diag(End, "end of the original", DiagnosticIDs::Note);
141  }
142  }
143 
144  diag(Branches[J]->getBeginLoc(), "clone %0 starts here",
145  DiagnosticIDs::Note)
146  << (NumCopies - 1);
147  }
148  }
149  return;
150  }
151 
152  if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
153  // We do not try to detect chains of ?: operators.
154  if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context))
155  diag(CO->getQuestionLoc(),
156  "conditional operator with identical true and false expressions");
157 
158  return;
159  }
160 
161  if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
162  const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
163 
164  // Code like
165  // switch (x) case 0: case 1: foobar();
166  // is legal and calls foobar() if and only if x is either 0 or 1;
167  // but we do not try to distinguish branches in such code.
168  if (!Body)
169  return;
170 
171  // We will first collect the branches of the switch statements. For the
172  // sake of simplicity we say that branches are delimited by the SwitchCase
173  // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
174  // `default:` labels embedded inside other statements and we do not follow
175  // the effects of `break` and other manipulation of the control-flow.
176  llvm::SmallVector<SwitchBranch, 4> Branches;
177  for (const Stmt *S : Body->body()) {
178  // If this is a `case` or `default`, we start a new, empty branch.
179  if (isa<SwitchCase>(S))
180  Branches.emplace_back();
181 
182  // There may be code before the first branch (which can be dead code
183  // and can be code reached either through goto or through case labels
184  // that are embedded inside e.g. inner compound statements); we do not
185  // store those statements in branches.
186  if (!Branches.empty())
187  Branches.back().push_back(S);
188  }
189 
190  auto *End = Branches.end();
191  auto *BeginCurrent = Branches.begin();
192  while (BeginCurrent < End) {
193  auto *EndCurrent = BeginCurrent + 1;
194  while (EndCurrent < End &&
195  areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
196  ++EndCurrent;
197  }
198  // At this point the iterator range {BeginCurrent, EndCurrent} contains a
199  // complete family of consecutive identical branches.
200  if (EndCurrent > BeginCurrent + 1) {
201  diag(BeginCurrent->front()->getBeginLoc(),
202  "switch has %0 consecutive identical branches")
203  << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
204 
205  SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
206  // If the case statement is generated from a macro, it's SourceLocation
207  // may be invalid, resulting in an assertion failure down the line.
208  // While not optimal, try the begin location in this case, it's still
209  // better then nothing.
210  if (EndLoc.isInvalid())
211  EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
212 
213  if (EndLoc.isMacroID())
214  EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
215  EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
216  getLangOpts());
217 
218  if (EndLoc.isValid()) {
219  diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
220  }
221  }
222  BeginCurrent = EndCurrent;
223  }
224  return;
225  }
226 
227  llvm_unreachable("No if statement and no switch statement.");
228 }
229 
230 } // namespace bugprone
231 } // namespace tidy
232 } // namespace clang
Branches
unsigned Branches
Definition: FunctionSizeCheck.cpp:115
clang::ast_matchers
Definition: AbseilMatcher.h:14
areStatementsIdentical
static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS, const ASTContext &Context)
Returns true when the statements are Type I clones of each other.
Definition: BranchCloneCheck.cpp:20
clang::clangd::check
bool check(llvm::StringRef File, llvm::function_ref< bool(const Position &)> ShouldCheckLine, const ThreadsafeFS &TFS, const ClangdLSPServer::Options &Opts, bool EnableCodeCompletion)
Definition: Check.cpp:259
areSwitchBranchesIdentical
static bool areSwitchBranchesIdentical(const SwitchBranch LHS, const SwitchBranch RHS, const ASTContext &Context)
Determines if the bodies of two branches in a switch statements are Type I clones of each other.
Definition: BranchCloneCheck.cpp:37
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
BranchCloneCheck.h