clang  14.0.0git
Tree.h
Go to the documentation of this file.
1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 // - leaf nodes correspond to a token in the expanded token stream,
10 // - tree nodes correspond to language grammar constructs.
11 //
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
17 //
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
23 
27 #include "clang/Basic/TokenKinds.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/iterator.h"
32 #include "llvm/Support/Allocator.h"
33 #include <cstdint>
34 #include <iterator>
35 
36 namespace clang {
37 namespace syntax {
38 
39 /// A memory arena for syntax trees. Also tracks the underlying token buffers,
40 /// source manager, etc.
41 class Arena {
42 public:
43  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
44  const TokenBuffer &Tokens);
45 
46  const SourceManager &getSourceManager() const { return SourceMgr; }
47  const LangOptions &getLangOptions() const { return LangOpts; }
48 
49  const TokenBuffer &getTokenBuffer() const;
50  llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
51 
52 private:
53  /// Add \p Buffer to the underlying source manager, tokenize it and store the
54  /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
55  /// that were not written in user code.
56  std::pair<FileID, ArrayRef<Token>>
57  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
58  friend class FactoryImpl;
59 
60 private:
61  SourceManager &SourceMgr;
62  const LangOptions &LangOpts;
63  const TokenBuffer &Tokens;
64  /// IDs and storage for additional tokenized files.
65  llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
66  /// Keeps all the allocated nodes and their intermediate data structures.
67  llvm::BumpPtrAllocator Allocator;
68 };
69 
70 class Tree;
71 class TreeBuilder;
72 class FactoryImpl;
73 class MutationsImpl;
74 
75 enum class NodeKind : uint16_t;
76 enum class NodeRole : uint8_t;
77 
78 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
79 /// a Tree (representing language constructrs).
80 class Node {
81 protected:
82  /// Newly created nodes are detached from a tree, parent and sibling links are
83  /// set when the node is added as a child to another one.
85  /// Nodes are allocated on Arenas; the destructor is never called.
86  ~Node() = default;
87 
88 public:
89  /// Nodes cannot simply be copied without violating tree invariants.
90  Node(const Node &) = delete;
91  Node &operator=(const Node &) = delete;
92  /// Idiomatically, nodes are allocated on an Arena and never moved.
93  Node(Node &&) = delete;
94  Node &operator=(Node &&) = delete;
95 
96  NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
97  NodeRole getRole() const { return static_cast<NodeRole>(Role); }
98 
99  /// Whether the node is detached from a tree, i.e. does not have a parent.
100  bool isDetached() const;
101  /// Whether the node was created from the AST backed by the source code
102  /// rather than added later through mutation APIs or created with factory
103  /// functions.
104  /// When this flag is true, all subtrees are also original.
105  /// This flag is set to false on any modifications to the node or any of its
106  /// subtrees, even if this simply involves swapping existing subtrees.
107  bool isOriginal() const { return Original; }
108  /// If this function return false, the tree cannot be modified because there
109  /// is no reasonable way to produce the corresponding textual replacements.
110  /// This can happen when the node crosses macro expansion boundaries.
111  ///
112  /// Note that even if the node is not modifiable, its child nodes can be
113  /// modifiable.
114  bool canModify() const { return CanModify; }
115 
116  const Tree *getParent() const { return Parent; }
117  Tree *getParent() { return Parent; }
118 
119  const Node *getNextSibling() const { return NextSibling; }
120  Node *getNextSibling() { return NextSibling; }
121  const Node *getPreviousSibling() const { return PreviousSibling; }
122  Node *getPreviousSibling() { return PreviousSibling; }
123 
124  /// Dumps the structure of a subtree. For debugging and testing purposes.
125  std::string dump(const SourceManager &SM) const;
126  /// Dumps the tokens forming this subtree.
127  std::string dumpTokens(const SourceManager &SM) const;
128 
129  /// Asserts invariants on this node of the tree and its immediate children.
130  /// Will not recurse into the subtree. No-op if NDEBUG is set.
131  void assertInvariants() const;
132  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
133  void assertInvariantsRecursive() const;
134 
135 private:
136  // Tree is allowed to change the Parent link and Role.
137  friend class Tree;
138  // TreeBuilder is allowed to set the Original and CanModify flags.
139  friend class TreeBuilder;
140  // MutationsImpl sets roles and CanModify flag.
141  friend class MutationsImpl;
142  // FactoryImpl sets CanModify flag.
143  friend class FactoryImpl;
144 
145  void setRole(NodeRole NR);
146 
147  Tree *Parent;
148  Node *NextSibling;
149  Node *PreviousSibling;
150  unsigned Kind : 16;
151  unsigned Role : 8;
152  unsigned Original : 1;
153  unsigned CanModify : 1;
154 };
155 
156 /// A leaf node points to a single token inside the expanded token stream.
157 class Leaf final : public Node {
158 public:
159  Leaf(const Token *T);
160  static bool classof(const Node *N);
161 
162  const Token *getToken() const { return Tok; }
163 
164 private:
165  const Token *Tok;
166 };
167 
168 /// A node that has children and represents a syntactic language construct.
169 class Tree : public Node {
170  /// Iterator over children (common base for const/non-const).
171  /// Not invalidated by tree mutations (holds a stable node pointer).
172  template <typename DerivedT, typename NodeT>
173  class ChildIteratorBase
174  : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
175  NodeT> {
176  protected:
177  NodeT *N = nullptr;
178  using Base = ChildIteratorBase;
179 
180  public:
181  ChildIteratorBase() = default;
182  explicit ChildIteratorBase(NodeT *N) : N(N) {}
183 
184  bool operator==(const DerivedT &O) const { return O.N == N; }
185  NodeT &operator*() const { return *N; }
186  DerivedT &operator++() {
187  N = N->getNextSibling();
188  return *static_cast<DerivedT *>(this);
189  }
190 
191  /// Truthy if valid (not past-the-end).
192  /// This allows: if (auto It = find_if(N.children(), ...) )
193  explicit operator bool() const { return N != nullptr; }
194  /// The element, or nullptr if past-the-end.
195  NodeT *asPointer() const { return N; }
196  };
197 
198 public:
199  static bool classof(const Node *N);
200 
201  Node *getFirstChild() { return FirstChild; }
202  const Node *getFirstChild() const { return FirstChild; }
203  Node *getLastChild() { return LastChild; }
204  const Node *getLastChild() const { return LastChild; }
205 
206  const Leaf *findFirstLeaf() const;
208  return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
209  }
210 
211  const Leaf *findLastLeaf() const;
213  return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
214  }
215 
216  /// child_iterator is not invalidated by mutations.
217  struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
218  using Base::ChildIteratorBase;
219  };
221  : ChildIteratorBase<ConstChildIterator, const Node> {
222  using Base::ChildIteratorBase;
223  ConstChildIterator() = default;
224  ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
225  };
226 
227  llvm::iterator_range<ChildIterator> getChildren() {
229  }
230  llvm::iterator_range<ConstChildIterator> getChildren() const {
232  }
233 
234  /// Find the first node with a corresponding role.
235  const Node *findChild(NodeRole R) const;
237  return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
238  }
239 
240 protected:
241  using Node::Node;
242 
243 private:
244  /// Append \p Child to the list of children and sets the parent pointer.
245  /// A very low-level operation that does not check any invariants, only used
246  /// by TreeBuilder and FactoryImpl.
247  /// EXPECTS: Role != Detached.
248  void appendChildLowLevel(Node *Child, NodeRole Role);
249  /// Similar but prepends.
250  void prependChildLowLevel(Node *Child, NodeRole Role);
251 
252  /// Like the previous overloads, but does not set role for \p Child.
253  /// EXPECTS: Child->Role != Detached
254  void appendChildLowLevel(Node *Child);
255  void prependChildLowLevel(Node *Child);
256  friend class TreeBuilder;
257  friend class FactoryImpl;
258 
259  /// Replace a range of children [Begin, End) with a list of
260  /// new nodes starting at \p New.
261  /// Only used by MutationsImpl to implement higher-level mutation operations.
262  /// (!) \p New can be null to model removal of the child range.
263  /// (!) \p End can be null to model one past the end.
264  /// (!) \p Begin can be null to model an append.
265  void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
266  friend class MutationsImpl;
267 
268  Node *FirstChild = nullptr;
269  Node *LastChild = nullptr;
270 };
271 
272 // Provide missing non_const == const overload.
273 // iterator_facade_base requires == to be a member, but implicit conversions
274 // don't work on the LHS of a member operator.
276  const Tree::ConstChildIterator &B) {
277  return A.operator==(B);
278 }
279 
280 /// A list of Elements separated or terminated by a fixed token.
281 ///
282 /// This type models the following grammar construct:
283 /// delimited-list(element, delimiter, termination, canBeEmpty)
284 class List : public Tree {
285 public:
286  template <typename Element> struct ElementAndDelimiter {
287  Element *element;
289  };
290 
291  enum class TerminationKind {
292  Terminated,
294  Separated,
295  };
296 
297  using Tree::Tree;
298  static bool classof(const Node *N);
299  /// Returns the elements and corresponding delimiters. Missing elements
300  /// and delimiters are represented as null pointers.
301  ///
302  /// For example, in a separated list:
303  /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)]
304  /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)]
305  /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)]
306  /// "a, b," <=> [("a" , ","), ("b" , "," ), (null, null)]
307  ///
308  /// In a terminated or maybe-terminated list:
309  /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
310  /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
311  /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
312  /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
313  std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
314 
315  /// Returns the elements of the list. Missing elements are represented
316  /// as null pointers in the same way as in the return value of
317  /// `getElementsAsNodesAndDelimiters()`.
318  std::vector<Node *> getElementsAsNodes();
319 
320  // These can't be implemented with the information we have!
321 
322  /// Returns the appropriate delimiter for this list.
323  ///
324  /// Useful for discovering the correct delimiter to use when adding
325  /// elements to empty or one-element lists.
327 
329 
330  /// Whether this list can be empty in syntactically and semantically correct
331  /// code.
332  ///
333  /// This list may be empty when the source code has errors even if
334  /// canBeEmpty() returns false.
335  bool canBeEmpty() const;
336 };
337 
338 } // namespace syntax
339 } // namespace clang
340 
341 #endif
clang::syntax::operator==
bool operator==(const Tree::ConstChildIterator &A, const Tree::ConstChildIterator &B)
Definition: Tree.h:275
clang::syntax::List::canBeEmpty
bool canBeEmpty() const
Whether this list can be empty in syntactically and semantically correct code.
Definition: Tree.cpp:456
clang::syntax::Tree::findFirstLeaf
Leaf * findFirstLeaf()
Definition: Tree.h:207
clang::syntax::Tree::ConstChildIterator::ConstChildIterator
ConstChildIterator()=default
clang::syntax::Tree::ConstChildIterator
Definition: Tree.h:220
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::syntax::Node::getPreviousSibling
const Node * getPreviousSibling() const
Definition: Tree.h:121
clang::syntax::List::getDelimiterTokenKind
clang::tok::TokenKind getDelimiterTokenKind() const
Returns the appropriate delimiter for this list.
Definition: Tree.cpp:428
clang::syntax::Node::getNextSibling
Node * getNextSibling()
Definition: Tree.h:120
clang::syntax::Node::getParent
const Tree * getParent() const
Definition: Tree.h:116
clang::syntax::Node
A node in a syntax tree.
Definition: Tree.h:80
clang::syntax::Tree::findLastLeaf
Leaf * findLastLeaf()
Definition: Tree.h:212
clang::syntax::Node::Tree
friend class Tree
Definition: Tree.h:137
clang::syntax::Leaf::Leaf
Leaf(const Token *T)
Definition: Tree.cpp:51
clang::syntax::List::TerminationKind
TerminationKind
Definition: Tree.h:291
clang::syntax::List::getTerminationKind
TerminationKind getTerminationKind() const
Definition: Tree.cpp:442
clang::syntax::Node::getParent
Tree * getParent()
Definition: Tree.h:117
SourceManager.h
clang::syntax::List::ElementAndDelimiter::delimiter
Leaf * delimiter
Definition: Tree.h:288
clang::syntax::Arena::getLangOptions
const LangOptions & getLangOptions() const
Definition: Tree.h:47
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::syntax::Tree::findFirstLeaf
const Leaf * findFirstLeaf() const
Definition: Tree.cpp:312
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:626
clang::syntax::Node::canModify
bool canModify() const
If this function return false, the tree cannot be modified because there is no reasonable way to prod...
Definition: Tree.h:114
TokenKinds.h
clang::syntax::List
A list of Elements separated or terminated by a fixed token.
Definition: Tree.h:284
clang::syntax::MutationsImpl
Definition: Mutations.cpp:28
clang::syntax::List::TerminationKind::MaybeTerminated
@ MaybeTerminated
clang::syntax::List::classof
static bool classof(const Node *N)
clang::syntax::Tree
A node that has children and represents a syntactic language construct.
Definition: Tree.h:169
Node
DynTypedNode Node
Definition: ASTMatchFinder.cpp:67
clang::syntax::Tree::getLastChild
Node * getLastChild()
Definition: Tree.h:203
clang::syntax::Node::Node
Node(NodeKind Kind)
Newly created nodes are detached from a tree, parent and sibling links are set when the node is added...
Definition: Tree.cpp:55
LangOptions.h
clang::syntax::List::TerminationKind::Separated
@ Separated
bool
#define bool
Definition: stdbool.h:15
clang::syntax::TreeBuilder
A helper class for constructing the syntax tree while traversing a clang AST.
Definition: BuildTree.cpp:367
clang::syntax::List::getElementsAsNodes
std::vector< Node * > getElementsAsNodes()
Returns the elements of the list.
Definition: Tree.cpp:386
clang::syntax::List::getElementsAsNodesAndDelimiters
std::vector< ElementAndDelimiter< Node > > getElementsAsNodesAndDelimiters()
Returns the elements and corresponding delimiters.
Definition: Tree.cpp:341
Base
clang::syntax::TokenBuffer
A list of tokens obtained by preprocessing a text buffer and operations to map between the expanded a...
Definition: Tokens.h:176
clang::syntax::Node::getKind
NodeKind getKind() const
Definition: Tree.h:96
clang::syntax::List::TerminationKind::Terminated
@ Terminated
clang::syntax::Node::getRole
NodeRole getRole() const
Definition: Tree.h:97
clang::syntax::Tree::ChildIterator
child_iterator is not invalidated by mutations.
Definition: Tree.h:217
clang::syntax::Leaf::classof
static bool classof(const Node *N)
operator*
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:212
clang::syntax::Tree::getChildren
llvm::iterator_range< ConstChildIterator > getChildren() const
Definition: Tree.h:230
SourceLocation.h
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
clang::syntax::Tree::ConstChildIterator::ConstChildIterator
ConstChildIterator(const ChildIterator &I)
Definition: Tree.h:224
Tokens.h
clang::syntax::List::ElementAndDelimiter
Definition: Tree.h:286
clang::syntax::Arena::getTokenBuffer
const TokenBuffer & getTokenBuffer() const
Definition: Tree.cpp:39
clang::syntax::Arena
A memory arena for syntax trees.
Definition: Tree.h:41
dump
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
Definition: CoverageMappingGen.cpp:1521
clang::syntax::Tree::findChild
const Node * findChild(NodeRole R) const
Find the first node with a corresponding role.
Definition: Tree.cpp:332
Begin
SourceLocation Begin
Definition: USRLocFinder.cpp:165
clang::syntax::Tree::classof
static bool classof(const Node *N)
clang::syntax::Arena::getAllocator
llvm::BumpPtrAllocator & getAllocator()
Definition: Tree.h:50
clang::syntax::Node::getPreviousSibling
Node * getPreviousSibling()
Definition: Tree.h:122
clang::syntax::FactoryImpl
Exposes private syntax tree APIs required to implement node synthesis.
Definition: Synthesis.cpp:16
clang::syntax::Leaf
A leaf node points to a single token inside the expanded token stream.
Definition: Tree.h:157
clang::LangOptions
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:58
clang::ObjCPropertyAttribute::Kind
Kind
Definition: DeclObjCCommon.h:22
clang::syntax::NodeRole
NodeRole
A relation between a parent and child node, e.g.
Definition: Nodes.h:59
clang::syntax::Token
A token coming directly from a file or from a macro invocation.
Definition: Tokens.h:105
clang::syntax::Tree::findChild
Node * findChild(NodeRole R)
Definition: Tree.h:236
clang
Definition: CalledOnceCheck.h:17
clang::syntax::NodeKind
NodeKind
A kind of a syntax node, used for implementing casts.
Definition: Nodes.h:37
clang::syntax::Tree::findLastLeaf
const Leaf * findLastLeaf() const
Definition: Tree.cpp:322
clang::syntax::Tree::getChildren
llvm::iterator_range< ChildIterator > getChildren()
Definition: Tree.h:227
clang::syntax::Arena::Arena
Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, const TokenBuffer &Tokens)
Definition: Tree.cpp:35
clang::syntax::Node::isOriginal
bool isOriginal() const
Whether the node was created from the AST backed by the source code rather than added later through m...
Definition: Tree.h:107
Parent
NodeId Parent
Definition: ASTDiff.cpp:192
clang::syntax::Node::getNextSibling
const Node * getNextSibling() const
Definition: Tree.h:119
clang::syntax::Tree::getFirstChild
const Node * getFirstChild() const
Definition: Tree.h:202
SM
#define SM(sm)
Definition: Cuda.cpp:78
clang::syntax::Tree::getFirstChild
Node * getFirstChild()
Definition: Tree.h:201
clang::syntax::Arena::getSourceManager
const SourceManager & getSourceManager() const
Definition: Tree.h:46
clang::syntax::Tree::getLastChild
const Node * getLastChild() const
Definition: Tree.h:204
clang::syntax::List::ElementAndDelimiter::element
Element * element
Definition: Tree.h:287
clang::syntax::Leaf::getToken
const Token * getToken() const
Definition: Tree.h:162