18 #include "llvm/ADT/PriorityQueue.h"
22 #include <unordered_set>
25 using namespace clang;
35 Mapping(Mapping &&Other) =
default;
36 Mapping &operator=(Mapping &&Other) =
default;
38 Mapping(
size_t Size) {
39 SrcToDst = std::make_unique<NodeId[]>(Size);
40 DstToSrc = std::make_unique<NodeId[]>(Size);
43 void link(NodeId Src, NodeId Dst) {
44 SrcToDst[Src] = Dst, DstToSrc[Dst] = Src;
47 NodeId getDst(NodeId Src)
const {
return SrcToDst[Src]; }
48 NodeId getSrc(NodeId Dst)
const {
return DstToSrc[Dst]; }
49 bool hasSrc(NodeId Src)
const {
return getDst(Src).isValid(); }
50 bool hasDst(NodeId Dst)
const {
return getSrc(Dst).isValid(); }
53 std::unique_ptr<NodeId[]> SrcToDst, DstToSrc;
66 void computeMapping();
69 void computeChangeKinds(Mapping &M);
74 return TheMapping.getDst(
Id);
75 assert(&*
Tree == &T2 &&
"Invalid tree.");
76 return TheMapping.getSrc(
Id);
87 bool haveSameParents(
const Mapping &M,
NodeId Id1,
NodeId Id2)
const;
91 void addOptimalMapping(Mapping &M,
NodeId Id1,
NodeId Id2)
const;
95 double getJaccardSimilarity(
const Mapping &M,
NodeId Id1,
NodeId Id2)
const;
98 NodeId findCandidate(
const Mapping &M,
NodeId Id1)
const;
101 Mapping matchTopDown()
const;
104 void matchBottomUp(Mapping &M)
const;
120 std::enable_if_t<std::is_base_of<Stmt, T>::value, T> *
Node,
125 std::enable_if_t<std::is_base_of<Decl, T>::value, T> *
Node,
148 int getNumberOfDescendants(
NodeId Id)
const;
150 int findPositionInParent(
NodeId Id,
bool Shifted =
false)
const;
163 void setLeftMostDescendants();
197 template <
class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) {
199 Tree.Nodes.emplace_back();
200 Node &N =
Tree.getMutableNode(MyId);
204 assert(!N.ASTNode.getNodeKind().isNone() &&
205 "Expected nodes to have a valid kind.");
208 P.Children.push_back(MyId);
213 return std::make_tuple(MyId,
Tree.getNode(MyId).Parent);
215 void PostTraverse(std::tuple<NodeId, NodeId>
State) {
216 NodeId MyId, PreviousParent;
217 std::tie(MyId, PreviousParent) =
State;
218 assert(MyId.isValid() &&
"Expecting to only traverse valid nodes.");
221 Node &N =
Tree.getMutableNode(MyId);
222 N.RightMostDescendant =
Id - 1;
223 assert(N.RightMostDescendant >= 0 &&
224 N.RightMostDescendant <
Tree.getSize() &&
225 "Rightmost descendant must be a valid tree node.");
227 Tree.Leaves.push_back(MyId);
229 for (NodeId Child : N.Children)
230 N.Height =
std::max(N.Height, 1 +
Tree.getNode(Child).Height);
232 bool TraverseDecl(
Decl *D) {
235 auto SavedState = PreTraverse(D);
237 PostTraverse(SavedState);
240 bool TraverseStmt(
Stmt *S) {
241 if (
auto *E = dyn_cast_or_null<Expr>(S))
242 S = E->IgnoreImplicit();
245 auto SavedState = PreTraverse(S);
247 PostTraverse(SavedState);
250 bool TraverseType(
QualType T) {
return true; }
254 auto SavedState = PreTraverse(Init);
256 PostTraverse(SavedState);
269 PreorderVisitor PreorderWalker(*
this);
270 PreorderWalker.TraverseDecl(N);
276 PreorderVisitor PreorderWalker(*
this);
277 PreorderWalker.TraverseStmt(N);
283 std::vector<NodeId> Postorder;
288 Postorder.push_back(
Id);
296 std::vector<NodeId> Ids;
299 while (Expanded < Ids.size())
300 for (
NodeId Child :
Tree.getNode(Ids[Expanded++]).Children)
301 Ids.push_back(Child);
305 void SyntaxTree::Impl::initTree() {
306 setLeftMostDescendants();
308 PostorderIds.resize(
getSize());
309 std::function<void(NodeId)> PostorderTraverse = [&](NodeId
Id) {
311 PostorderTraverse(Child);
312 PostorderIds[
Id] = PostorderId;
319 void SyntaxTree::Impl::setLeftMostDescendants() {
320 for (NodeId Leaf : Leaves) {
321 getMutableNode(Leaf).LeftMostDescendant = Leaf;
322 NodeId
Parent, Cur = Leaf;
326 getMutableNode(Cur).LeftMostDescendant = Leaf;
345 for (
size_t I = 0, E = Siblings.size(); I < E; ++I) {
348 if (Siblings[I] ==
Id) {
353 llvm_unreachable(
"Node not found in parent's children.");
365 if (
auto *Namespace = dyn_cast<NamespaceDecl>(Context))
366 ContextPrefix = Namespace->getQualifiedNameAsString();
367 else if (
auto *Record = dyn_cast<RecordDecl>(Context))
368 ContextPrefix = Record->getQualifiedNameAsString();
369 else if (AST.getLangOpts().CPlusPlus11)
370 if (
auto *Tag = dyn_cast<TagDecl>(Context))
371 ContextPrefix = Tag->getQualifiedNameAsString();
375 if (!ContextPrefix.empty() && StringRef(Val).startswith(ContextPrefix))
376 Val = Val.substr(ContextPrefix.size() + 1);
390 const auto &
P = Parents[0];
391 if (
const auto *D =
P.get<
Decl>())
400 if (Init->isAnyMemberInitializer())
401 return std::string(Init->getAnyMember()->getName());
402 if (Init->isBaseInitializer())
404 if (Init->isDelegatingInitializer())
405 return Init->getTypeSourceInfo()->getType().getAsString(TypePP);
406 llvm_unreachable(
"Unknown initializer type");
415 if (
auto *S = DTN.get<
Stmt>())
416 return getStmtValue(S);
417 if (
auto *D = DTN.get<
Decl>())
418 return getDeclValue(D);
421 llvm_unreachable(
"Fatal: unhandled AST node.\n");
426 if (
auto *
V = dyn_cast<ValueDecl>(D))
427 return getRelativeName(
V) +
"(" +
V->getType().getAsString(TypePP) +
")";
428 if (
auto *N = dyn_cast<NamedDecl>(D))
429 Value += getRelativeName(N) +
";";
430 if (
auto *T = dyn_cast<TypedefNameDecl>(D))
431 return Value + T->getUnderlyingType().getAsString(TypePP) +
";";
432 if (
auto *T = dyn_cast<TypeDecl>(D))
433 if (T->getTypeForDecl())
435 T->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TypePP) +
437 if (
auto *
U = dyn_cast<UsingDirectiveDecl>(D))
438 return std::string(
U->getNominatedNamespace()->getName());
439 if (
auto *A = dyn_cast<AccessSpecDecl>(D)) {
448 if (
auto *
U = dyn_cast<UnaryOperator>(S))
450 if (
auto *B = dyn_cast<BinaryOperator>(S))
452 if (
auto *M = dyn_cast<MemberExpr>(S))
453 return getRelativeName(M->getMemberDecl());
454 if (
auto *I = dyn_cast<IntegerLiteral>(S)) {
456 I->getValue().toString(Str, 10,
false);
459 if (
auto *F = dyn_cast<FloatingLiteral>(S)) {
461 F->getValue().toString(Str);
464 if (
auto *D = dyn_cast<DeclRefExpr>(S))
466 if (
auto *String = dyn_cast<StringLiteral>(S))
468 if (
auto *B = dyn_cast<CXXBoolLiteralExpr>(S))
469 return B->getValue() ?
"true" :
"false";
491 std::vector<NodeId> RootIds;
493 std::vector<SNodeId> LeftMostDescendants;
500 int NumLeaves = setLeftMostDescendants();
501 computeKeyRoots(NumLeaves);
503 int getSize()
const {
return RootIds.size(); }
505 assert(
Id > 0 &&
Id <=
getSize() &&
"Invalid subtree node index.");
506 return RootIds[
Id - 1];
509 return Tree.getNode(getIdInRoot(
Id));
512 assert(
Id > 0 &&
Id <=
getSize() &&
"Invalid subtree node index.");
513 return LeftMostDescendants[
Id - 1];
520 return Tree.getNodeValue(getIdInRoot(
Id));
525 int setLeftMostDescendants() {
527 LeftMostDescendants.resize(
getSize());
528 for (
int I = 0; I <
getSize(); ++I) {
532 assert(I ==
Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() &&
533 "Postorder traversal in subtree should correspond to traversal in "
534 "the root tree by a constant offset.");
536 getPostorderOffset());
540 void computeKeyRoots(
int Leaves) {
541 KeyRoots.resize(Leaves);
542 std::unordered_set<int> Visited;
544 for (SNodeId I(
getSize()); I > 0; --I) {
545 SNodeId LeftDesc = getLeftMostDescendant(I);
546 if (Visited.count(LeftDesc))
548 assert(K >= 0 &&
"K should be non-negative");
550 Visited.insert(LeftDesc);
563 std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist;
568 : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) {
569 TreeDist = std::make_unique<std::unique_ptr<double[]>[]>(
571 ForestDist = std::make_unique<std::unique_ptr<double[]>[]>(
573 for (
int I = 0, E = S1.
getSize() + 1; I < E; ++I) {
574 TreeDist[I] = std::make_unique<double[]>(
size_t(S2.
getSize()) + 1);
575 ForestDist[I] = std::make_unique<double[]>(
size_t(S2.
getSize()) + 1);
580 std::vector<std::pair<NodeId, NodeId>> Matches;
581 std::vector<std::pair<SNodeId, SNodeId>> TreePairs;
585 bool RootNodePair =
true;
589 while (!TreePairs.empty()) {
590 SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col;
591 std::tie(LastRow, LastCol) = TreePairs.back();
592 TreePairs.pop_back();
595 computeForestDist(LastRow, LastCol);
598 RootNodePair =
false;
606 while (Row > FirstRow || Col > FirstCol) {
607 if (Row > FirstRow &&
608 ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) {
610 }
else if (Col > FirstCol &&
611 ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) {
620 assert(DiffImpl.isMatchingPossible(Id1, Id2) &&
621 "These nodes must not be matched.");
622 Matches.emplace_back(Id1, Id2);
626 TreePairs.emplace_back(Row, Col);
642 static constexpr
double DeletionCost = 1;
643 static constexpr
double InsertionCost = 1;
651 void computeTreeDist() {
654 computeForestDist(Id1, Id2);
657 void computeForestDist(SNodeId Id1, SNodeId Id2) {
658 assert(Id1 > 0 && Id2 > 0 &&
"Expecting offsets greater than 0.");
662 ForestDist[LMD1][LMD2] = 0;
663 for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) {
664 ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost;
665 for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) {
666 ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost;
669 if (DLMD1 == LMD1 && DLMD2 == LMD2) {
670 double UpdateCost = getUpdateCost(D1, D2);
672 std::min({ForestDist[D1 - 1][D2] + DeletionCost,
673 ForestDist[D1][D2 - 1] + InsertionCost,
674 ForestDist[D1 - 1][D2 - 1] + UpdateCost});
675 TreeDist[D1][D2] = ForestDist[D1][D2];
678 std::min({ForestDist[D1 - 1][D2] + DeletionCost,
679 ForestDist[D1][D2 - 1] + InsertionCost,
680 ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]});
692 if (
auto *ND = ASTNode.get<
NamedDecl>()) {
693 if (ND->getDeclName().isIdentifier())
694 return ND->getQualifiedNameAsString();
700 if (
auto *ND = ASTNode.get<
NamedDecl>()) {
701 if (ND->getDeclName().isIdentifier())
702 return ND->getName();
712 bool operator()(NodeId Id1, NodeId Id2)
const {
713 return Tree.getNode(Id1).Height <
Tree.getNode(Id2).Height;
721 const SyntaxTree::Impl &
Tree;
723 std::vector<NodeId> Container;
724 PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List;
727 PriorityList(
const SyntaxTree::Impl &
Tree)
730 void push(NodeId
id) { List.push(
id); }
732 std::vector<NodeId> pop() {
734 std::vector<NodeId> Result;
737 while (peekMax() == Max) {
738 Result.push_back(List.top());
745 int peekMax()
const {
748 return Tree.getNode(List.top()).Height;
750 void open(NodeId
Id) {
751 for (NodeId Child :
Tree.getNode(
Id).Children)
757 bool ASTDiff::Impl::identical(NodeId Id1, NodeId Id2)
const {
758 const Node &N1 = T1.getNode(Id1);
759 const Node &N2 = T2.getNode(Id2);
760 if (N1.Children.size() != N2.Children.size() ||
761 !isMatchingPossible(Id1, Id2) ||
762 T1.getNodeValue(Id1) != T2.getNodeValue(Id2))
764 for (
size_t Id = 0, E = N1.Children.size();
Id < E; ++
Id)
765 if (!identical(N1.Children[
Id], N2.Children[
Id]))
770 bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2)
const {
771 return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2));
774 bool ASTDiff::Impl::haveSameParents(
const Mapping &M, NodeId Id1,
776 NodeId P1 = T1.getNode(Id1).Parent;
777 NodeId P2 = T2.getNode(Id2).Parent;
778 return (P1.isInvalid() && P2.isInvalid()) ||
779 (P1.isValid() && P2.isValid() && M.getDst(P1) == P2);
782 void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,
784 if (
std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) >
787 ZhangShashaMatcher Matcher(*
this, T1, T2, Id1, Id2);
788 std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes();
789 for (
const auto &Tuple : R) {
790 NodeId Src = Tuple.first;
791 NodeId Dst = Tuple.second;
792 if (!M.hasSrc(Src) && !M.hasDst(Dst))
797 double ASTDiff::Impl::getJaccardSimilarity(
const Mapping &M, NodeId Id1,
799 int CommonDescendants = 0;
800 const Node &N1 = T1.getNode(Id1);
802 for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) {
803 NodeId Dst = M.getDst(Src);
804 CommonDescendants +=
int(Dst.isValid() && T2.isInSubtree(Dst, Id2));
807 double Denominator = T1.getNumberOfDescendants(Id1) - 1 +
808 T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants;
810 assert(Denominator >= 0 &&
"Expected non-negative denominator.");
811 if (Denominator == 0)
813 return CommonDescendants / Denominator;
816 NodeId ASTDiff::Impl::findCandidate(
const Mapping &M, NodeId Id1)
const {
818 double HighestSimilarity = 0.0;
819 for (NodeId Id2 : T2) {
820 if (!isMatchingPossible(Id1, Id2))
824 double Similarity = getJaccardSimilarity(M, Id1, Id2);
825 if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) {
826 HighestSimilarity = Similarity;
833 void ASTDiff::Impl::matchBottomUp(Mapping &M)
const {
835 for (NodeId Id1 : Postorder) {
836 if (Id1 == T1.getRootId() && !M.hasSrc(T1.getRootId()) &&
837 !M.hasDst(T2.getRootId())) {
838 if (isMatchingPossible(T1.getRootId(), T2.getRootId())) {
839 M.link(T1.getRootId(), T2.getRootId());
840 addOptimalMapping(M, T1.getRootId(), T2.getRootId());
844 bool Matched = M.hasSrc(Id1);
845 const Node &N1 = T1.getNode(Id1);
846 bool MatchedChildren = llvm::any_of(
847 N1.Children, [&](NodeId Child) { return M.hasSrc(Child); });
848 if (Matched || !MatchedChildren)
850 NodeId Id2 = findCandidate(M, Id1);
853 addOptimalMapping(M, Id1, Id2);
858 Mapping ASTDiff::Impl::matchTopDown()
const {
862 Mapping M(T1.getSize() + T2.getSize());
864 L1.push(T1.getRootId());
865 L2.push(T2.getRootId());
868 while (
std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) >
871 for (NodeId
Id : L1.pop())
876 for (NodeId
Id : L2.pop())
880 std::vector<NodeId> H1, H2;
883 for (NodeId Id1 : H1) {
884 for (NodeId Id2 : H2) {
885 if (identical(Id1, Id2) && !M.hasSrc(Id1) && !M.hasDst(Id2)) {
886 for (
int I = 0, E = T1.getNumberOfDescendants(Id1); I < E; ++I)
887 M.link(Id1 + I, Id2 + I);
891 for (NodeId Id1 : H1) {
895 for (NodeId Id2 : H2) {
905 : T1(T1), T2(T2), Options(Options) {
911 TheMapping = matchTopDown();
912 if (Options.StopAfterTopDown)
914 matchBottomUp(TheMapping);
919 if (!M.hasSrc(Id1)) {
920 T1.getMutableNode(Id1).Change =
Delete;
921 T1.getMutableNode(Id1).Shift -= 1;
925 if (!M.hasDst(Id2)) {
926 T2.getMutableNode(Id2).Change =
Insert;
927 T2.getMutableNode(Id2).Shift -= 1;
930 for (
NodeId Id1 : T1.NodesBfs) {
931 NodeId Id2 = M.getDst(Id1);
934 if (!haveSameParents(M, Id1, Id2) ||
935 T1.findPositionInParent(Id1,
true) !=
936 T2.findPositionInParent(Id2,
true)) {
937 T1.getMutableNode(Id1).Shift -= 1;
938 T2.getMutableNode(Id2).Shift -= 1;
941 for (
NodeId Id2 : T2.NodesBfs) {
942 NodeId Id1 = M.getSrc(Id2);
945 Node &N1 = T1.getMutableNode(Id1);
946 Node &N2 = T2.getMutableNode(Id2);
949 if (!haveSameParents(M, Id1, Id2) ||
950 T1.findPositionInParent(Id1,
true) !=
951 T2.findPositionInParent(Id2,
true)) {
954 if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) {
962 : DiffImpl(
std::make_unique<
Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {}
972 this, AST.getTranslationUnitDecl(), AST)) {}
993 std::pair<unsigned, unsigned>
999 Range.getEnd(), 0, SrcMgr,
TreeImpl->AST.getLangOpts());
1001 if (ThisExpr->isImplicit())