clang  17.0.0git
Tree.h
Go to the documentation of this file.
1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 // - leaf nodes correspond to tokens,
10 // - tree nodes correspond to language grammar constructs.
11 //
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continuous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
17 //
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
23 
24 #include "clang/Basic/TokenKinds.h"
26 #include "llvm/ADT/iterator.h"
27 #include "llvm/Support/Allocator.h"
28 #include <cstdint>
29 #include <vector>
30 
31 namespace clang {
32 namespace syntax {
33 
34 /// A memory arena for syntax trees.
35 // FIXME: use BumpPtrAllocator directly.
36 class Arena {
37 public:
38  llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
39 private:
40  /// Keeps all the allocated nodes and their intermediate data structures.
41  llvm::BumpPtrAllocator Allocator;
42 };
43 
44 class Tree;
45 class TreeBuilder;
46 class FactoryImpl;
47 class MutationsImpl;
48 
49 enum class NodeKind : uint16_t;
50 enum class NodeRole : uint8_t;
51 
52 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
53 /// a Tree (representing language constructrs).
54 class Node {
55 protected:
56  /// Newly created nodes are detached from a tree, parent and sibling links are
57  /// set when the node is added as a child to another one.
59  /// Nodes are allocated on Arenas; the destructor is never called.
60  ~Node() = default;
61 
62 public:
63  /// Nodes cannot simply be copied without violating tree invariants.
64  Node(const Node &) = delete;
65  Node &operator=(const Node &) = delete;
66  /// Idiomatically, nodes are allocated on an Arena and never moved.
67  Node(Node &&) = delete;
68  Node &operator=(Node &&) = delete;
69 
70  NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
71  NodeRole getRole() const { return static_cast<NodeRole>(Role); }
72 
73  /// Whether the node is detached from a tree, i.e. does not have a parent.
74  bool isDetached() const;
75  /// Whether the node was created from the AST backed by the source code
76  /// rather than added later through mutation APIs or created with factory
77  /// functions.
78  /// When this flag is true, all subtrees are also original.
79  /// This flag is set to false on any modifications to the node or any of its
80  /// subtrees, even if this simply involves swapping existing subtrees.
81  bool isOriginal() const { return Original; }
82  /// If this function return false, the tree cannot be modified because there
83  /// is no reasonable way to produce the corresponding textual replacements.
84  /// This can happen when the node crosses macro expansion boundaries.
85  ///
86  /// Note that even if the node is not modifiable, its child nodes can be
87  /// modifiable.
88  bool canModify() const { return CanModify; }
89 
90  const Tree *getParent() const { return Parent; }
91  Tree *getParent() { return Parent; }
92 
93  const Node *getNextSibling() const { return NextSibling; }
94  Node *getNextSibling() { return NextSibling; }
95  const Node *getPreviousSibling() const { return PreviousSibling; }
96  Node *getPreviousSibling() { return PreviousSibling; }
97 
98  /// Dumps the structure of a subtree. For debugging and testing purposes.
99  std::string dump(const TokenManager &SM) const;
100  /// Dumps the tokens forming this subtree.
101  std::string dumpTokens(const TokenManager &SM) const;
102 
103  /// Asserts invariants on this node of the tree and its immediate children.
104  /// Will not recurse into the subtree. No-op if NDEBUG is set.
105  void assertInvariants() const;
106  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
107  void assertInvariantsRecursive() const;
108 
109 private:
110  // Tree is allowed to change the Parent link and Role.
111  friend class Tree;
112  // TreeBuilder is allowed to set the Original and CanModify flags.
113  friend class TreeBuilder;
114  // MutationsImpl sets roles and CanModify flag.
115  friend class MutationsImpl;
116  // FactoryImpl sets CanModify flag.
117  friend class FactoryImpl;
118 
119  void setRole(NodeRole NR);
120 
121  Tree *Parent;
122  Node *NextSibling;
123  Node *PreviousSibling;
124  unsigned Kind : 16;
125  unsigned Role : 8;
126  unsigned Original : 1;
127  unsigned CanModify : 1;
128 };
129 
130 /// A leaf node points to a single token.
131 // FIXME: add TokenKind field (borrow some bits from the Node::kind).
132 class Leaf final : public Node {
133 public:
135  static bool classof(const Node *N);
136 
137  TokenManager::Key getTokenKey() const { return K; }
138 
139 private:
141 };
142 
143 /// A node that has children and represents a syntactic language construct.
144 class Tree : public Node {
145  /// Iterator over children (common base for const/non-const).
146  /// Not invalidated by tree mutations (holds a stable node pointer).
147  template <typename DerivedT, typename NodeT>
148  class ChildIteratorBase
149  : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
150  NodeT> {
151  protected:
152  NodeT *N = nullptr;
153  using Base = ChildIteratorBase;
154 
155  public:
156  ChildIteratorBase() = default;
157  explicit ChildIteratorBase(NodeT *N) : N(N) {}
158 
159  friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
160  return LHS.N == RHS.N;
161  }
162 
163  NodeT &operator*() const { return *N; }
164  DerivedT &operator++() {
165  N = N->getNextSibling();
166  return *static_cast<DerivedT *>(this);
167  }
168 
169  /// Truthy if valid (not past-the-end).
170  /// This allows: if (auto It = find_if(N.children(), ...) )
171  explicit operator bool() const { return N != nullptr; }
172  /// The element, or nullptr if past-the-end.
173  NodeT *asPointer() const { return N; }
174  };
175 
176 public:
177  static bool classof(const Node *N);
178 
179  Node *getFirstChild() { return FirstChild; }
180  const Node *getFirstChild() const { return FirstChild; }
181  Node *getLastChild() { return LastChild; }
182  const Node *getLastChild() const { return LastChild; }
183 
184  const Leaf *findFirstLeaf() const;
186  return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
187  }
188 
189  const Leaf *findLastLeaf() const;
191  return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
192  }
193 
194  /// child_iterator is not invalidated by mutations.
195  struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
196  using Base::ChildIteratorBase;
197  };
199  : ChildIteratorBase<ConstChildIterator, const Node> {
200  using Base::ChildIteratorBase;
201  ConstChildIterator() = default;
202  ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
203  };
204 
205  llvm::iterator_range<ChildIterator> getChildren() {
207  }
208  llvm::iterator_range<ConstChildIterator> getChildren() const {
210  }
211 
212  /// Find the first node with a corresponding role.
213  const Node *findChild(NodeRole R) const;
215  return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
216  }
217 
218 protected:
219  using Node::Node;
220 
221 private:
222  /// Append \p Child to the list of children and sets the parent pointer.
223  /// A very low-level operation that does not check any invariants, only used
224  /// by TreeBuilder and FactoryImpl.
225  /// EXPECTS: Role != Detached.
226  void appendChildLowLevel(Node *Child, NodeRole Role);
227  /// Similar but prepends.
228  void prependChildLowLevel(Node *Child, NodeRole Role);
229 
230  /// Like the previous overloads, but does not set role for \p Child.
231  /// EXPECTS: Child->Role != Detached
232  void appendChildLowLevel(Node *Child);
233  void prependChildLowLevel(Node *Child);
234  friend class TreeBuilder;
235  friend class FactoryImpl;
236 
237  /// Replace a range of children [Begin, End) with a list of
238  /// new nodes starting at \p New.
239  /// Only used by MutationsImpl to implement higher-level mutation operations.
240  /// (!) \p New can be null to model removal of the child range.
241  /// (!) \p End can be null to model one past the end.
242  /// (!) \p Begin can be null to model an append.
243  void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
244  friend class MutationsImpl;
245 
246  Node *FirstChild = nullptr;
247  Node *LastChild = nullptr;
248 };
249 
250 /// A list of Elements separated or terminated by a fixed token.
251 ///
252 /// This type models the following grammar construct:
253 /// delimited-list(element, delimiter, termination, canBeEmpty)
254 class List : public Tree {
255 public:
256  template <typename Element> struct ElementAndDelimiter {
257  Element *element;
259  };
260 
261  enum class TerminationKind {
262  Terminated,
264  Separated,
265  };
266 
267  using Tree::Tree;
268  static bool classof(const Node *N);
269  /// Returns the elements and corresponding delimiters. Missing elements
270  /// and delimiters are represented as null pointers.
271  ///
272  /// For example, in a separated list:
273  /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)]
274  /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)]
275  /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)]
276  /// "a, b," <=> [("a" , ","), ("b" , "," ), (null, null)]
277  ///
278  /// In a terminated or maybe-terminated list:
279  /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
280  /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
281  /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
282  /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
283  std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
284 
285  /// Returns the elements of the list. Missing elements are represented
286  /// as null pointers in the same way as in the return value of
287  /// `getElementsAsNodesAndDelimiters()`.
288  std::vector<Node *> getElementsAsNodes();
289 
290  // These can't be implemented with the information we have!
291 
292  /// Returns the appropriate delimiter for this list.
293  ///
294  /// Useful for discovering the correct delimiter to use when adding
295  /// elements to empty or one-element lists.
297 
299 
300  /// Whether this list can be empty in syntactically and semantically correct
301  /// code.
302  ///
303  /// This list may be empty when the source code has errors even if
304  /// canBeEmpty() returns false.
305  bool canBeEmpty() const;
306 };
307 
308 } // namespace syntax
309 } // namespace clang
310 
311 #endif
clang::syntax::List::canBeEmpty
bool canBeEmpty() const
Whether this list can be empty in syntactically and semantically correct code.
Definition: Tree.cpp:427
clang::syntax::Tree::findFirstLeaf
Leaf * findFirstLeaf()
Definition: Tree.h:185
clang::syntax::Tree::ConstChildIterator::ConstChildIterator
ConstChildIterator()=default
clang::syntax::Tree::ConstChildIterator
Definition: Tree.h:198
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:23
clang::syntax::Node::getPreviousSibling
const Node * getPreviousSibling() const
Definition: Tree.h:95
clang::syntax::List::getDelimiterTokenKind
clang::tok::TokenKind getDelimiterTokenKind() const
Returns the appropriate delimiter for this list.
Definition: Tree.cpp:399
clang::syntax::Node::getNextSibling
Node * getNextSibling()
Definition: Tree.h:94
clang::syntax::Node::getParent
const Tree * getParent() const
Definition: Tree.h:90
clang::syntax::Node
A node in a syntax tree.
Definition: Tree.h:54
clang::syntax::Tree::findLastLeaf
Leaf * findLastLeaf()
Definition: Tree.h:190
clang::syntax::Node::Tree
friend class Tree
Definition: Tree.h:111
clang::syntax::List::TerminationKind
TerminationKind
Definition: Tree.h:261
clang::syntax::List::getTerminationKind
TerminationKind getTerminationKind() const
Definition: Tree.cpp:413
clang::syntax::Node::getParent
Tree * getParent()
Definition: Tree.h:91
clang::syntax::List::ElementAndDelimiter::delimiter
Leaf * delimiter
Definition: Tree.h:258
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::syntax::Tree::findFirstLeaf
const Leaf * findFirstLeaf() const
Definition: Tree.cpp:283
clang::syntax::Node::canModify
bool canModify() const
If this function return false, the tree cannot be modified because there is no reasonable way to prod...
Definition: Tree.h:88
TokenKinds.h
clang::syntax::List
A list of Elements separated or terminated by a fixed token.
Definition: Tree.h:254
clang::syntax::MutationsImpl
Definition: Mutations.cpp:27
clang::syntax::List::TerminationKind::MaybeTerminated
@ MaybeTerminated
clang::syntax::List::classof
static bool classof(const Node *N)
clang::syntax::Tree
A node that has children and represents a syntactic language construct.
Definition: Tree.h:144
Node
DynTypedNode Node
Definition: ASTMatchFinder.cpp:68
clang::syntax::Tree::getLastChild
Node * getLastChild()
Definition: Tree.h:181
clang::syntax::Node::Node
Node(NodeKind Kind)
Newly created nodes are detached from a tree, parent and sibling links are set when the node is added...
Definition: Tree.cpp:37
clang::syntax::List::TerminationKind::Separated
@ Separated
clang::syntax::Leaf::getTokenKey
TokenManager::Key getTokenKey() const
Definition: Tree.h:137
bool
#define bool
Definition: stdbool.h:20
clang::syntax::TreeBuilder
A helper class for constructing the syntax tree while traversing a clang AST.
Definition: BuildTree.cpp:367
clang::syntax::List::getElementsAsNodes
std::vector< Node * > getElementsAsNodes()
Returns the elements of the list.
Definition: Tree.cpp:357
clang::syntax::List::getElementsAsNodesAndDelimiters
std::vector< ElementAndDelimiter< Node > > getElementsAsNodesAndDelimiters()
Returns the elements and corresponding delimiters.
Definition: Tree.cpp:312
Base
clang::syntax::Node::getKind
NodeKind getKind() const
Definition: Tree.h:70
clang::syntax::List::TerminationKind::Terminated
@ Terminated
clang::syntax::TokenManager
Defines interfaces for operating "Token" in the clang syntax-tree.
Definition: TokenManager.h:29
clang::syntax::Node::getRole
NodeRole getRole() const
Definition: Tree.h:71
clang::syntax::Tree::ChildIterator
child_iterator is not invalidated by mutations.
Definition: Tree.h:195
clang::syntax::Leaf::classof
static bool classof(const Node *N)
operator*
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:225
clang::syntax::Tree::getChildren
llvm::iterator_range< ConstChildIterator > getChildren() const
Definition: Tree.h:208
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
clang::syntax::Tree::ConstChildIterator::ConstChildIterator
ConstChildIterator(const ChildIterator &I)
Definition: Tree.h:202
clang::syntax::TokenManager::Key
uintptr_t Key
A key to identify a specific token.
Definition: TokenManager.h:40
clang::syntax::List::ElementAndDelimiter
Definition: Tree.h:256
clang::syntax::Arena
A memory arena for syntax trees.
Definition: Tree.h:36
dump
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
Definition: CoverageMappingGen.cpp:1542
clang::syntax::Tree::findChild
const Node * findChild(NodeRole R) const
Find the first node with a corresponding role.
Definition: Tree.cpp:303
Begin
SourceLocation Begin
Definition: USRLocFinder.cpp:165
clang::syntax::Tree::classof
static bool classof(const Node *N)
clang::syntax::Arena::getAllocator
llvm::BumpPtrAllocator & getAllocator()
Definition: Tree.h:38
clang::syntax::Node::getPreviousSibling
Node * getPreviousSibling()
Definition: Tree.h:96
clang::syntax::FactoryImpl
Exposes private syntax tree APIs required to implement node synthesis.
Definition: Synthesis.cpp:18
clang::syntax::Leaf
A leaf node points to a single token.
Definition: Tree.h:132
clang::ObjCPropertyAttribute::Kind
Kind
Definition: DeclObjCCommon.h:22
clang::syntax::NodeRole
NodeRole
A relation between a parent and child node, e.g.
Definition: Nodes.h:54
clang::syntax::Tree::findChild
Node * findChild(NodeRole R)
Definition: Tree.h:214
clang
Definition: CalledOnceCheck.h:17
clang::syntax::NodeKind
NodeKind
A kind of a syntax node, used for implementing casts.
Definition: Nodes.h:32
clang::syntax::Tree::findLastLeaf
const Leaf * findLastLeaf() const
Definition: Tree.cpp:293
clang::syntax::Tree::getChildren
llvm::iterator_range< ChildIterator > getChildren()
Definition: Tree.h:205
clang::syntax::Node::isOriginal
bool isOriginal() const
Whether the node was created from the AST backed by the source code rather than added later through m...
Definition: Tree.h:81
clang::syntax::Leaf::Leaf
Leaf(TokenManager::Key K)
Definition: Tree.cpp:35
Parent
NodeId Parent
Definition: ASTDiff.cpp:191
clang::syntax::Node::getNextSibling
const Node * getNextSibling() const
Definition: Tree.h:93
Tree
SyntaxTree::Impl & Tree
Definition: ASTDiff.cpp:192
clang::syntax::Tree::getFirstChild
const Node * getFirstChild() const
Definition: Tree.h:180
SM
#define SM(sm)
Definition: Cuda.cpp:78
TokenManager.h
clang::syntax::Tree::getFirstChild
Node * getFirstChild()
Definition: Tree.h:179
clang::syntax::Tree::getLastChild
const Node * getLastChild() const
Definition: Tree.h:182
clang::syntax::List::ElementAndDelimiter::element
Element * element
Definition: Tree.h:257
clang::operator==
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:207