clang  12.0.0git
Tree.h
Go to the documentation of this file.
1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 // - leaf nodes correspond to a token in the expanded token stream,
10 // - tree nodes correspond to language grammar constructs.
11 //
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
17 //
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
23 
27 #include "clang/Basic/TokenKinds.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/Support/Allocator.h"
32 #include <cstdint>
33 
34 namespace clang {
35 namespace syntax {
36 
37 /// A memory arena for syntax trees. Also tracks the underlying token buffers,
38 /// source manager, etc.
39 class Arena {
40 public:
41  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
42  TokenBuffer Tokens);
43 
44  const SourceManager &sourceManager() const { return SourceMgr; }
45  const LangOptions &langOptions() const { return LangOpts; }
46 
47  const TokenBuffer &tokenBuffer() const;
48  llvm::BumpPtrAllocator &allocator() { return Allocator; }
49 
50  /// Add \p Buffer to the underlying source manager, tokenize it and store the
51  /// resulting tokens. Useful when there is a need to materialize tokens that
52  /// were not written in user code.
53  std::pair<FileID, llvm::ArrayRef<syntax::Token>>
54  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
55 
56 private:
57  SourceManager &SourceMgr;
58  const LangOptions &LangOpts;
59  TokenBuffer Tokens;
60  /// IDs and storage for additional tokenized files.
61  llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
62  /// Keeps all the allocated nodes and their intermediate data structures.
63  llvm::BumpPtrAllocator Allocator;
64 };
65 
66 class Tree;
67 class TreeBuilder;
68 class FactoryImpl;
69 class MutationsImpl;
70 
71 enum class NodeKind : uint16_t;
72 enum class NodeRole : uint8_t;
73 
74 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
75 /// a Tree (representing language constructrs).
76 class Node {
77 public:
78  /// Newly created nodes are detached from a tree, parent and sibling links are
79  /// set when the node is added as a child to another one.
81 
82  NodeKind kind() const { return static_cast<NodeKind>(Kind); }
83  NodeRole role() const { return static_cast<NodeRole>(Role); }
84 
85  /// Whether the node is detached from a tree, i.e. does not have a parent.
86  bool isDetached() const;
87  /// Whether the node was created from the AST backed by the source code
88  /// rather than added later through mutation APIs or created with factory
89  /// functions.
90  /// When this flag is true, all subtrees are also original.
91  /// This flag is set to false on any modifications to the node or any of its
92  /// subtrees, even if this simply involves swapping existing subtrees.
93  bool isOriginal() const { return Original; }
94  /// If this function return false, the tree cannot be modified because there
95  /// is no reasonable way to produce the corresponding textual replacements.
96  /// This can happen when the node crosses macro expansion boundaries.
97  ///
98  /// Note that even if the node is not modifiable, its child nodes can be
99  /// modifiable.
100  bool canModify() const { return CanModify; }
101 
102  const Tree *parent() const { return Parent; }
103  Tree *parent() { return Parent; }
104 
105  const Node *nextSibling() const { return NextSibling; }
106  Node *nextSibling() { return NextSibling; }
107 
108  /// Dumps the structure of a subtree. For debugging and testing purposes.
109  std::string dump(const Arena &A) const;
110  /// Dumps the tokens forming this subtree.
111  std::string dumpTokens(const Arena &A) const;
112 
113  /// Asserts invariants on this node of the tree and its immediate children.
114  /// Will not recurse into the subtree. No-op if NDEBUG is set.
115  void assertInvariants() const;
116  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
117  void assertInvariantsRecursive() const;
118 
119 private:
120  // Tree is allowed to change the Parent link and Role.
121  friend class Tree;
122  // TreeBuilder is allowed to set the Original and CanModify flags.
123  friend class TreeBuilder;
124  // MutationsImpl sets roles and CanModify flag.
125  friend class MutationsImpl;
126  // FactoryImpl sets CanModify flag.
127  friend class FactoryImpl;
128 
129  void setRole(NodeRole NR);
130 
131  Tree *Parent;
132  Node *NextSibling;
133  unsigned Kind : 16;
134  unsigned Role : 8;
135  unsigned Original : 1;
136  unsigned CanModify : 1;
137 };
138 
139 /// A leaf node points to a single token inside the expanded token stream.
140 class Leaf final : public Node {
141 public:
142  Leaf(const syntax::Token *T);
143  static bool classof(const Node *N);
144 
145  const syntax::Token *token() const { return Tok; }
146 
147 private:
148  const syntax::Token *Tok;
149 };
150 
151 /// A node that has children and represents a syntactic language construct.
152 class Tree : public Node {
153 public:
154  using Node::Node;
155  static bool classof(const Node *N);
156 
157  Node *firstChild() { return FirstChild; }
158  const Node *firstChild() const { return FirstChild; }
159 
160  Leaf *firstLeaf();
161  const Leaf *firstLeaf() const {
162  return const_cast<Tree *>(this)->firstLeaf();
163  }
164 
165  Leaf *lastLeaf();
166  const Leaf *lastLeaf() const { return const_cast<Tree *>(this)->lastLeaf(); }
167 
168 protected:
169  /// Find the first node with a corresponding role.
170  syntax::Node *findChild(NodeRole R);
171 
172 private:
173  /// Prepend \p Child to the list of children and and sets the parent pointer.
174  /// A very low-level operation that does not check any invariants, only used
175  /// by TreeBuilder and FactoryImpl.
176  /// EXPECTS: Role != Detached.
177  void prependChildLowLevel(Node *Child, NodeRole Role);
178  /// Like the previous overload, but does not set role for \p Child.
179  /// EXPECTS: Child->Role != Detached
180  void prependChildLowLevel(Node *Child);
181  friend class TreeBuilder;
182  friend class FactoryImpl;
183 
184  /// Replace a range of children [BeforeBegin->NextSibling, End) with a list of
185  /// new nodes starting at \p New.
186  /// Only used by MutationsImpl to implement higher-level mutation operations.
187  /// (!) \p New can be null to model removal of the child range.
188  void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New);
189  friend class MutationsImpl;
190 
191  Node *FirstChild = nullptr;
192 };
193 
194 } // namespace syntax
195 } // namespace clang
196 
197 #endif
const syntax::Token * token() const
Definition: Tree.h:145
std::pair< FileID, llvm::ArrayRef< syntax::Token > > lexBuffer(std::unique_ptr< llvm::MemoryBuffer > Buffer)
Add Buffer to the underlying source manager, tokenize it and store the resulting tokens.
Definition: Tree.cpp:44
Defines the SourceManager interface.
NodeRole role() const
Definition: Tree.h:83
A node in a syntax tree.
Definition: Tree.h:76
A token coming directly from a file or from a macro invocation.
Definition: Tokens.h:104
Node(NodeKind Kind)
Newly created nodes are detached from a tree, parent and sibling links are set when the node is added...
Definition: Tree.cpp:59
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:54
const FormatToken & Tok
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
const SourceManager & sourceManager() const
Definition: Tree.h:44
llvm::BumpPtrAllocator & allocator()
Definition: Tree.h:48
const LangOptions & langOptions() const
Definition: Tree.h:45
Node * firstChild()
Definition: Tree.h:157
NodeId Parent
Definition: ASTDiff.cpp:192
const TokenBuffer & tokenBuffer() const
Definition: Tree.cpp:39
A memory arena for syntax trees.
Definition: Tree.h:39
bool canModify() const
If this function return false, the tree cannot be modified because there is no reasonable way to prod...
Definition: Tree.h:100
Defines the clang::LangOptions interface.
SourceLocation End
const Node * nextSibling() const
Definition: Tree.h:105
DynTypedNode Node
A node that has children and represents a syntactic language construct.
Definition: Tree.h:152
NodeKind
A kind of a syntax node, used for implementing casts.
Definition: Nodes.h:37
const Tree * parent() const
Definition: Tree.h:102
Kind
A helper class for constructing the syntax tree while traversing a clang AST.
Definition: BuildTree.cpp:266
const Leaf * lastLeaf() const
Definition: Tree.h:166
Exposes private syntax tree APIs required to implement node synthesis.
Definition: Synthesis.cpp:14
Tree * parent()
Definition: Tree.h:103
NodeRole
A relation between a parent and child node, e.g.
Definition: Nodes.h:124
Dataflow Directional Tag Classes.
Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, TokenBuffer Tokens)
Definition: Tree.cpp:35
A leaf node points to a single token inside the expanded token stream.
Definition: Tree.h:140
bool isOriginal() const
Whether the node was created from the AST backed by the source code rather than added later through m...
Definition: Tree.h:93
static bool classof(const OMPClause *T)
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
const Leaf * firstLeaf() const
Definition: Tree.h:161
NodeKind kind() const
Definition: Tree.h:82
A list of tokens obtained by preprocessing a text buffer and operations to map between the expanded a...
Definition: Tokens.h:175
Node * nextSibling()
Definition: Tree.h:106
const Node * firstChild() const
Definition: Tree.h:158
This class handles loading and caching of source files into memory.