clang  10.0.0svn
BuildTree.cpp
Go to the documentation of this file.
1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
10 #include "clang/AST/Stmt.h"
11 #include "clang/Basic/LLVM.h"
14 #include "clang/Basic/TokenKinds.h"
15 #include "clang/Lex/Lexer.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Support/Allocator.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <map>
27 
28 using namespace clang;
29 
30 /// A helper class for constructing the syntax tree while traversing a clang
31 /// AST.
32 ///
33 /// At each point of the traversal we maintain a list of pending nodes.
34 /// Initially all tokens are added as pending nodes. When processing a clang AST
35 /// node, the clients need to:
36 /// - create a corresponding syntax node,
37 /// - assign roles to all pending child nodes with 'markChild' and
38 /// 'markChildToken',
39 /// - replace the child nodes with the new syntax node in the pending list
40 /// with 'foldNode'.
41 ///
42 /// Note that all children are expected to be processed when building a node.
43 ///
44 /// Call finalize() to finish building the tree and consume the root node.
46 public:
47  TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {}
48 
49  llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
50 
51  /// Populate children for \p New node, assuming it covers tokens from \p
52  /// Range.
53  void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
54 
55  /// Set role for a token starting at \p Loc.
56  void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
57 
58  /// Finish building the tree and consume the root node.
60  auto Tokens = Arena.tokenBuffer().expandedTokens();
61  assert(!Tokens.empty());
62  assert(Tokens.back().kind() == tok::eof);
63 
64  // Build the root of the tree, consuming all the children.
65  Pending.foldChildren(Tokens.drop_back(),
66  new (Arena.allocator()) syntax::TranslationUnit);
67 
68  return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
69  }
70 
71  /// getRange() finds the syntax tokens corresponding to the passed source
72  /// locations.
73  /// \p First is the start position of the first token and \p Last is the start
74  /// position of the last token.
76  SourceLocation Last) const {
77  assert(First.isValid());
78  assert(Last.isValid());
79  assert(First == Last ||
80  Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
81  return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
82  }
84  return getRange(D->getBeginLoc(), D->getEndLoc());
85  }
87  return getRange(S->getBeginLoc(), S->getEndLoc());
88  }
89 
90 private:
91  /// Finds a token starting at \p L. The token must exist.
92  const syntax::Token *findToken(SourceLocation L) const;
93 
94  /// A collection of trees covering the input tokens.
95  /// When created, each tree corresponds to a single token in the file.
96  /// Clients call 'foldChildren' to attach one or more subtrees to a parent
97  /// node and update the list of trees accordingly.
98  ///
99  /// Ensures that added nodes properly nest and cover the whole token stream.
100  struct Forest {
101  Forest(syntax::Arena &A) {
102  assert(!A.tokenBuffer().expandedTokens().empty());
103  assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
104  // Create all leaf nodes.
105  // Note that we do not have 'eof' in the tree.
106  for (auto &T : A.tokenBuffer().expandedTokens().drop_back())
107  Trees.insert(Trees.end(),
108  {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
109  }
110 
111  void assignRole(llvm::ArrayRef<syntax::Token> Range,
112  syntax::NodeRole Role) {
113  assert(!Range.empty());
114  auto It = Trees.lower_bound(Range.begin());
115  assert(It != Trees.end() && "no node found");
116  assert(It->first == Range.begin() && "no child with the specified range");
117  assert((std::next(It) == Trees.end() ||
118  std::next(It)->first == Range.end()) &&
119  "no child with the specified range");
120  It->second.Role = Role;
121  }
122 
123  /// Add \p Node to the forest and fill its children nodes based on the \p
124  /// NodeRange.
125  void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
126  syntax::Tree *Node) {
127  assert(!NodeTokens.empty());
128  assert(Node->firstChild() == nullptr && "node already has children");
129 
130  auto *FirstToken = NodeTokens.begin();
131  auto BeginChildren = Trees.lower_bound(FirstToken);
132  assert(BeginChildren != Trees.end() &&
133  BeginChildren->first == FirstToken &&
134  "fold crosses boundaries of existing subtrees");
135  auto EndChildren = Trees.lower_bound(NodeTokens.end());
136  assert((EndChildren == Trees.end() ||
137  EndChildren->first == NodeTokens.end()) &&
138  "fold crosses boundaries of existing subtrees");
139 
140  // (!) we need to go in reverse order, because we can only prepend.
141  for (auto It = EndChildren; It != BeginChildren; --It)
142  Node->prependChildLowLevel(std::prev(It)->second.Node,
143  std::prev(It)->second.Role);
144 
145  Trees.erase(BeginChildren, EndChildren);
146  Trees.insert({FirstToken, NodeAndRole(Node)});
147  }
148 
149  // EXPECTS: all tokens were consumed and are owned by a single root node.
150  syntax::Node *finalize() && {
151  assert(Trees.size() == 1);
152  auto *Root = Trees.begin()->second.Node;
153  Trees = {};
154  return Root;
155  }
156 
157  std::string str(const syntax::Arena &A) const {
158  std::string R;
159  for (auto It = Trees.begin(); It != Trees.end(); ++It) {
160  unsigned CoveredTokens =
161  It != Trees.end()
162  ? (std::next(It)->first - It->first)
163  : A.tokenBuffer().expandedTokens().end() - It->first;
164 
165  R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
166  It->second.Node->kind(),
167  It->first->text(A.sourceManager()), CoveredTokens);
168  R += It->second.Node->dump(A);
169  }
170  return R;
171  }
172 
173  private:
174  /// A with a role that should be assigned to it when adding to a parent.
175  struct NodeAndRole {
176  explicit NodeAndRole(syntax::Node *Node)
177  : Node(Node), Role(NodeRole::Unknown) {}
178 
180  NodeRole Role;
181  };
182 
183  /// Maps from the start token to a subtree starting at that token.
184  /// FIXME: storing the end tokens is redundant.
185  /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
186  std::map<const syntax::Token *, NodeAndRole> Trees;
187  };
188 
189  /// For debugging purposes.
190  std::string str() { return Pending.str(Arena); }
191 
192  syntax::Arena &Arena;
193  Forest Pending;
194 };
195 
196 namespace {
197 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
198 public:
199  explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
200  : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
201 
202  bool shouldTraversePostOrder() const { return true; }
203 
204  bool TraverseDecl(Decl *D) {
205  if (!D || isa<TranslationUnitDecl>(D))
207  if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
208  return true; // Only build top-level decls for now, do not recurse.
210  }
211 
212  bool VisitDecl(Decl *D) {
213  assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
214  "expected a top-level decl");
215  assert(!D->isImplicit());
216  Builder.foldNode(Builder.getRange(D),
217  new (allocator()) syntax::TopLevelDeclaration());
218  return true;
219  }
220 
221  bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
222  // (!) we do not want to call VisitDecl(), the declaration for translation
223  // unit is built by finalize().
224  return true;
225  }
226 
227  bool WalkUpFromCompoundStmt(CompoundStmt *S) {
228  using NodeRole = syntax::NodeRole;
229 
230  Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
232  Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
234 
235  Builder.foldNode(Builder.getRange(S),
236  new (allocator()) syntax::CompoundStatement);
237  return true;
238  }
239 
240 private:
241  /// A small helper to save some typing.
242  llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
243 
244  syntax::TreeBuilder &Builder;
245  const LangOptions &LangOpts;
246 };
247 } // namespace
248 
250  syntax::Tree *New) {
251  Pending.foldChildren(Range, New);
252 }
253 
255  tok::TokenKind Kind, NodeRole Role) {
256  if (Loc.isInvalid())
257  return;
258  Pending.assignRole(*findToken(Loc), Role);
259 }
260 
261 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
262  auto Tokens = Arena.tokenBuffer().expandedTokens();
263  auto &SM = Arena.sourceManager();
264  auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) {
265  return SM.isBeforeInTranslationUnit(T.location(), L);
266  });
267  assert(It != Tokens.end());
268  assert(It->location() == L);
269  return &*It;
270 }
271 
274  TreeBuilder Builder(A);
275  BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
276  return std::move(Builder).finalize();
277 }
SourceLocation getRBracLoc() const
Definition: Stmt.h:1428
Stmt - This represents one statement.
Definition: Stmt.h:66
Defines the SourceManager interface.
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:88
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:421
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
syntax::TranslationUnit * buildSyntaxTree(Arena &A, const clang::TranslationUnitDecl &TU)
Build a syntax tree for the main file.
Definition: BuildTree.cpp:273
SourceLocation getEndLoc() const LLVM_READONLY
Definition: DeclBase.h:425
void finalize(TemplateInstantiationCallbackPtrs &Callbacks, const Sema &TheSema)
void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R)
Set role for a token starting at Loc.
Definition: BuildTree.cpp:254
void foldNode(llvm::ArrayRef< syntax::Token > Range, syntax::Tree *New)
Populate children for New node, assuming it covers tokens from Range.
Definition: BuildTree.cpp:249
bool TraverseDecl(Decl *D)
Recursively visit a declaration, by dispatching to Traverse*Decl() based on the argument&#39;s dynamic ty...
llvm::BumpPtrAllocator & allocator()
Definition: BuildTree.cpp:49
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:160
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:274
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getLBracLoc() const
Definition: Stmt.h:1427
ASTContext & getASTContext() const
Definition: Decl.h:119
llvm::ArrayRef< syntax::Token > getRange(const Stmt *S) const
Definition: BuildTree.cpp:86
A class that does preorder or postorder depth-first traversal on the entire Clang AST and visits each...
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition: Stmt.h:1320
A memory arena for syntax trees.
Definition: Tree.h:39
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:558
DeclContext * getDeclContext()
Definition: DeclBase.h:438
const SourceManager & SM
Definition: Format.cpp:1667
SourceLocation getEndLoc() const LLVM_READONLY
Definition: Stmt.cpp:287
Kind
Encodes a location in the source.
A helper class for constructing the syntax tree while traversing a clang AST.
Definition: BuildTree.cpp:45
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:24
syntax::TranslationUnit * finalize() &&
Finish building the tree and consume the root node.
Definition: BuildTree.cpp:59
ast_type_traits::DynTypedNode Node
NodeRole
A relation between a parent and child node. Used for implementing accessors.
Definition: Nodes.h:35
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
SyntaxTree::Impl & Tree
Definition: ASTDiff.cpp:192
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
llvm::ArrayRef< syntax::Token > getRange(const Decl *D) const
Definition: BuildTree.cpp:83
The top declaration context.
Definition: Decl.h:107
TreeBuilder(syntax::Arena &Arena)
Definition: BuildTree.cpp:47
const LangOptions & getLangOpts() const
Definition: ASTContext.h:723
llvm::ArrayRef< syntax::Token > getRange(SourceLocation First, SourceLocation Last) const
getRange() finds the syntax tokens corresponding to the passed source locations.
Definition: BuildTree.cpp:75