18#include "llvm/Support/MD5.h"
19#include "llvm/Support/Path.h"
24 unsigned StartIndex,
unsigned EndIndex)
25 : S(
Stmt),
D(
D), StartIndex(StartIndex), EndIndex(EndIndex) {
26 assert(
Stmt &&
"Stmt must not be a nullptr");
27 assert(StartIndex < EndIndex &&
"Given array should not be empty");
28 assert(EndIndex <= Stmt->
size() &&
"Given array too big for this Stmt");
32 : S(
Stmt),
D(
D), StartIndex(0), EndIndex(0) {}
35 : S(nullptr),
D(nullptr), StartIndex(0), EndIndex(0) {}
47 bool StartIsInBounds =
63 auto CS = cast<CompoundStmt>(S);
64 return CS->body_begin() + StartIndex;
71 auto CS = cast<CompoundStmt>(S);
72 return CS->body_begin() + EndIndex;
102 if (
Seq.contains(GroupSeq))
116 if (Group.size() < OtherGroup.size())
127 std::vector<CloneDetector::CloneGroup> &
Result) {
128 std::vector<unsigned> IndexesToRemove;
134 for (
unsigned i = 0; i <
Result.size(); ++i) {
135 for (
unsigned j = 0; j <
Result.size(); ++j) {
141 IndexesToRemove.push_back(i);
150 for (
unsigned I : llvm::reverse(IndexesToRemove))
162 StringRef
Filename = llvm::sys::path::filename(
163 SM.getFilename(S.getContainingDecl()->getLocation()));
180class CloneTypeIIStmtDataCollector
186 template <
class Ty>
void addData(
const Ty &
Data) {
193 : Context(Context), DataConsumer(DataConsumer) {
200#define DEF_ADD_DATA(CLASS, CODE) \
201 template <class = void> void Visit##CLASS(const CLASS *S) { \
203 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
206#include "clang/AST/StmtDataCollectors.inc"
210 void Visit##CLASS(const CLASS *S) { \
211 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
228 llvm::MD5::MD5Result HashResult;
229 Hash.final(HashResult);
233 std::memcpy(&HashCode, &HashResult,
234 std::min(
sizeof(HashCode),
sizeof(HashResult)));
250 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
254 CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
256 auto CS = dyn_cast<CompoundStmt>(S);
259 for (
const Stmt *Child : S->children()) {
260 if (Child ==
nullptr) {
261 ChildHashes.push_back(0);
264 size_t ChildHash =
saveHash(Child,
D, StmtsByHash);
266 StringRef(
reinterpret_cast<char *
>(&ChildHash),
sizeof(ChildHash)));
267 ChildHashes.push_back(ChildHash);
274 for (
unsigned Pos = 0; Pos < CS->size(); ++Pos) {
279 for (
unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
282 size_t ChildHash = ChildHashes[Pos + Length - 1];
284 StringRef(
reinterpret_cast<char *
>(&ChildHash),
sizeof(ChildHash)));
288 llvm::MD5 SubHash = Hash;
289 StmtsByHash.push_back(std::make_pair(
297 StmtsByHash.push_back(std::make_pair(HashCode,
StmtSequence(S,
D)));
304class FoldingSetNodeIDWrapper {
306 llvm::FoldingSetNodeID &FS;
309 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
311 void update(StringRef Str) { FS.AddString(Str); }
318 FoldingSetNodeIDWrapper &OutputData) {
319 for (
const Stmt *S : Sequence) {
320 CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
323 for (
const Stmt *Child : S->children()) {
340 llvm::FoldingSetNodeID DataLHS, DataRHS;
341 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
342 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
347 return DataLHS == DataRHS;
351 std::vector<CloneDetector::CloneGroup> &Sequences) {
353 std::vector<CloneDetector::CloneGroup>
Result;
361 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
365 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
369 llvm::stable_sort(StmtsByHash, llvm::less_first());
375 for (
unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
376 const auto Current = StmtsByHash[i];
383 size_t PrototypeHash = Current.first;
385 for (; i < StmtsByHash.size(); ++i) {
387 if (PrototypeHash != StmtsByHash[i].first) {
398 NewGroup.push_back(StmtsByHash[i].second);
403 Result.push_back(NewGroup);
411 std::vector<CloneDetector::CloneGroup> &Sequences) {
420 const std::string &ParentMacroStack) {
424 size_t Complexity = 1;
429 std::string MacroStack =
440 if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {
446 if (
Seq.holdsSequence()) {
450 if (Complexity >= Limit)
454 for (
const Stmt *S :
Seq.front()->children()) {
457 if (Complexity >= Limit)
465 std::vector<CloneDetector::CloneGroup> &CloneGroups) {
475 std::vector<CloneDetector::CloneGroup> &CloneGroups,
478 std::vector<CloneDetector::CloneGroup>
Result;
479 for (
auto &HashGroup : CloneGroups) {
482 std::vector<char> Indexes;
483 Indexes.resize(HashGroup.size());
485 for (
unsigned i = 0; i < HashGroup.size(); ++i) {
499 for (
unsigned j = i + 1; j < HashGroup.size(); ++j) {
510 PotentialGroup.push_back(Candidate);
517 Result.push_back(PotentialGroup);
520 assert(llvm::all_of(Indexes, [](
char c) {
return c == 1; }));
526 const Stmt *Mention) {
528 for (
size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
529 if (Variables[KindIndex] ==
VarDecl) {
532 Occurences.emplace_back(KindIndex, Mention);
538 Occurences.emplace_back(Variables.size(), Mention);
542void VariablePattern::addVariables(
const Stmt *S) {
550 if (
auto D = dyn_cast<DeclRefExpr>(S)) {
552 addVariableOccurence(VD,
D);
556 for (
const Stmt *Child : S->children()) {
564 unsigned NumberOfDifferences = 0;
566 assert(
Other.Occurences.size() == Occurences.size());
567 for (
unsigned i = 0; i < Occurences.size(); ++i) {
568 auto ThisOccurence = Occurences[i];
569 auto OtherOccurence =
Other.Occurences[i];
570 if (ThisOccurence.KindID == OtherOccurence.KindID)
573 ++NumberOfDifferences;
577 if (FirstMismatch ==
nullptr)
582 if (NumberOfDifferences != 1)
585 const VarDecl *FirstSuggestion =
nullptr;
589 if (OtherOccurence.KindID < Variables.size())
590 FirstSuggestion = Variables[OtherOccurence.KindID];
595 Variables[ThisOccurence.KindID], ThisOccurence.Mention,
601 const VarDecl *SecondSuggestion =
nullptr;
602 if (ThisOccurence.KindID <
Other.Variables.size())
603 SecondSuggestion =
Other.Variables[ThisOccurence.KindID];
608 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
623 return NumberOfDifferences;
static bool containsAnyInGroup(StmtSequence &Seq, CloneDetector::CloneGroup &Group)
Returns true if and only if Stmt contains at least one other sequence in the Group.
static size_t createHash(llvm::MD5 &Hash)
static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData)
Writes the relevant data from all statements and child statements in the given StmtSequence into the ...
static size_t saveHash(const Stmt *S, const Decl *D, std::vector< std::pair< size_t, StmtSequence > > &StmtsByHash)
Generates and saves a hash code for the given Stmt.
static bool containsGroup(CloneDetector::CloneGroup &Group, CloneDetector::CloneGroup &OtherGroup)
Returns true if and only if all sequences in OtherGroup are contained by a sequence in Group.
static bool areSequencesClones(const StmtSequence &LHS, const StmtSequence &RHS)
Returns true if both sequences are clones of each other.
This file defines classes for searching and analyzing source code clones.
This file declares helper methods for collecting data from AST nodes.
Defines the C++ template declaration subclasses.
Defines the SourceManager interface.
__device__ __2f16 float c
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
A boolean literal, per ([C++ lex.bool] Boolean literals).
static void splitCloneGroups(std::vector< CloneDetector::CloneGroup > &CloneGroups, llvm::function_ref< bool(const StmtSequence &, const StmtSequence &)> Compare)
Splits the given CloneGroups until the given Compare function returns true for all clones in a single...
void analyzeCodeBody(const Decl *D)
Generates and stores search data for all statements in the body of the given Decl.
CompoundStmt - This represents a group of statements like { stmt stmt }.
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
A reference to a declared variable, function, enum, etc.
Decl - This represents one declaration (or definition), e.g.
ASTContext & getASTContext() const LLVM_READONLY
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit, const std::string &ParentMacroStack="")
Calculates the complexity of the given StmtSequence.
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
ASTContext & getASTContext() const
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Identifies a list of statements.
bool contains(const StmtSequence &Other) const
Returns true if and only if this sequence covers a source range that contains the source range of the...
iterator begin() const
Returns an iterator pointing to the first statement in this sequence.
const Stmt *const * iterator
const Decl * getContainingDecl() const
Returns the declaration that contains the stored Stmts.
StmtSequence()
Constructs an empty StmtSequence.
ASTContext & getASTContext() const
Returns the related ASTContext for the stored Stmts.
unsigned size() const
Returns the number of statements this object holds.
iterator end() const
Returns an iterator pointing behind the last statement in this sequence.
SourceLocation getEndLoc() const
Returns the end sourcelocation of the last statement in this sequence.
const Stmt * front() const
Returns the first statement in this sequence.
bool holdsSequence() const
Returns true if this objects holds a list of statements.
SourceLocation getBeginLoc() const
Returns the start sourcelocation of the first statement in this sequence.
SourceRange getSourceRange() const
Returns the source range of the whole sequence - from the beginning of the first statement to the end...
const Stmt * back() const
Returns the last statement in this sequence.
Stmt - This represents one statement.
SourceLocation getEndLoc() const LLVM_READONLY
SourceLocation getBeginLoc() const LLVM_READONLY
StringLiteral - This represents a string literal expression, e.g.
Represents a variable declaration or definition.
Analyzes the pattern of the referenced variables in a statement.
unsigned countPatternDifferences(const VariablePattern &Other, VariablePattern::SuspiciousClonePair *FirstMismatch=nullptr)
Counts the differences between this pattern and the given one.
void addDataToConsumer(T &DataConsumer, llvm::StringRef Str)
Utility functions for implementing addData() for a consumer that has a method update(StringRef)
std::string getMacroStack(SourceLocation Loc, ASTContext &Context)
Returns a string that represents all macro expansions that expanded into the given SourceLocation.
The JSON file list parser is used to communicate input to InstallAPI.
@ Seq
'seq' clause, allowed on 'loop' and 'routine' directives.
@ Result
The result type of a method or function.
const FunctionProtoType * T
@ Other
Other implicit parameter.
std::shared_ptr< llvm::Regex > IgnoredFilesRegex
bool isAutoGenerated(const CloneDetector::CloneGroup &Group)
StringRef IgnoredFilesPattern
void constrain(std::vector< CloneDetector::CloneGroup > &CloneGroups)
void constrain(std::vector< CloneDetector::CloneGroup > &Result)
Utility class holding the relevant information about a single clone in this pair.
const VarDecl * Suggestion
The variable that should have been referenced to follow the pattern.
Describes two clones that reference their variables in a different pattern which could indicate a pro...
SuspiciousCloneInfo SecondCloneInfo
This other clone in the pair which can have a suggested variable.
SuspiciousCloneInfo FirstCloneInfo
The first clone in the pair which always has a suggested variable.