clang  10.0.0svn
Tree.h
Go to the documentation of this file.
1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 // - leaf nodes correspond to a token in the expanded token stream,
10 // - tree nodes correspond to language grammar constructs.
11 //
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
17 //
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
23 
27 #include "clang/Basic/TokenKinds.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/Support/Allocator.h"
32 #include <cstdint>
33 
34 namespace clang {
35 namespace syntax {
36 
37 /// A memory arena for syntax trees. Also tracks the underlying token buffers,
38 /// source manager, etc.
39 class Arena {
40 public:
41  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
42  TokenBuffer Tokens);
43 
44  const SourceManager &sourceManager() const { return SourceMgr; }
45  const LangOptions &langOptions() const { return LangOpts; }
46 
47  const TokenBuffer &tokenBuffer() const;
48  llvm::BumpPtrAllocator &allocator() { return Allocator; }
49 
50  /// Add \p Buffer to the underlying source manager, tokenize it and store the
51  /// resulting tokens. Useful when there is a need to materialize tokens that
52  /// were not written in user code.
53  std::pair<FileID, llvm::ArrayRef<syntax::Token>>
54  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
55 
56 private:
57  SourceManager &SourceMgr;
58  const LangOptions &LangOpts;
59  TokenBuffer Tokens;
60  /// IDs and storage for additional tokenized files.
61  llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
62  /// Keeps all the allocated nodes and their intermediate data structures.
63  llvm::BumpPtrAllocator Allocator;
64 };
65 
66 class Tree;
67 class TreeBuilder;
68 enum class NodeKind : uint16_t;
69 enum class NodeRole : uint8_t;
70 
71 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
72 /// a Tree (representing language constructrs).
73 class Node {
74 public:
75  /// Newly created nodes are detached from a tree, parent and sibling links are
76  /// set when the node is added as a child to another one.
78 
79  NodeKind kind() const { return static_cast<NodeKind>(Kind); }
80  NodeRole role() const { return static_cast<NodeRole>(Role); }
81 
82  const Tree *parent() const { return Parent; }
83  Tree *parent() { return Parent; }
84 
85  const Node *nextSibling() const { return NextSibling; }
86  Node *nextSibling() { return NextSibling; }
87 
88  /// Dumps the structure of a subtree. For debugging and testing purposes.
89  std::string dump(const Arena &A) const;
90  /// Dumps the tokens forming this subtree.
91  std::string dumpTokens(const Arena &A) const;
92 
93 private:
94  // Tree is allowed to change the Parent link and Role.
95  friend class Tree;
96 
97  Tree *Parent;
98  Node *NextSibling;
99  unsigned Kind : 16;
100  unsigned Role : 8;
101 };
102 
103 /// A leaf node points to a single token inside the expanded token stream.
104 class Leaf final : public Node {
105 public:
106  Leaf(const syntax::Token *T);
107  static bool classof(const Node *N);
108 
109  const syntax::Token *token() const { return Tok; }
110 
111 private:
112  const syntax::Token *Tok;
113 };
114 
115 /// A node that has children and represents a syntactic language construct.
116 class Tree : public Node {
117 public:
118  using Node::Node;
119  static bool classof(const Node *N);
120 
121  Node *firstChild() { return FirstChild; }
122  const Node *firstChild() const { return FirstChild; }
123 
124 protected:
125  /// Find the first node with a corresponding role.
126  syntax::Node *findChild(NodeRole R);
127 
128 private:
129  /// Prepend \p Child to the list of children and and sets the parent pointer.
130  /// A very low-level operation that does not check any invariants, only used
131  /// by TreeBuilder.
132  /// EXPECTS: Role != NodeRoleDetached.
133  void prependChildLowLevel(Node *Child, NodeRole Role);
134  friend class TreeBuilder;
135 
136  Node *FirstChild = nullptr;
137 };
138 
139 } // namespace syntax
140 } // namespace clang
141 
142 #endif
const syntax::Token * token() const
Definition: Tree.h:109
std::pair< FileID, llvm::ArrayRef< syntax::Token > > lexBuffer(std::unique_ptr< llvm::MemoryBuffer > Buffer)
Add Buffer to the underlying source manager, tokenize it and store the resulting tokens.
Definition: Tree.cpp:26
Defines the SourceManager interface.
NodeRole role() const
Definition: Tree.h:80
A node in a syntax tree.
Definition: Tree.h:73
A token coming directly from a file or from a macro invocation.
Definition: Tokens.h:100
Node(NodeKind Kind)
Newly created nodes are detached from a tree, parent and sibling links are set when the node is added...
Definition: Tree.cpp:41
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
const FormatToken & Tok
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
const SourceManager & sourceManager() const
Definition: Tree.h:44
llvm::BumpPtrAllocator & allocator()
Definition: Tree.h:48
const LangOptions & langOptions() const
Definition: Tree.h:45
Node * firstChild()
Definition: Tree.h:121
NodeId Parent
Definition: ASTDiff.cpp:191
const TokenBuffer & tokenBuffer() const
Definition: Tree.cpp:21
A memory arena for syntax trees.
Definition: Tree.h:39
Defines the clang::LangOptions interface.
const Node * nextSibling() const
Definition: Tree.h:85
A node that has children and represents a syntactic language construct.
Definition: Tree.h:116
NodeKind
A kind of a syntax node, used for implementing casts.
Definition: Nodes.h:25
const Tree * parent() const
Definition: Tree.h:82
Kind
ast_type_traits::DynTypedNode Node
Tree * parent()
Definition: Tree.h:83
NodeRole
A relation between a parent and child node. Used for implementing accessors.
Definition: Nodes.h:35
Dataflow Directional Tag Classes.
Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, TokenBuffer Tokens)
Definition: Tree.cpp:17
A leaf node points to a single token inside the expanded token stream.
Definition: Tree.h:104
static bool classof(const OMPClause *T)
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
NodeKind kind() const
Definition: Tree.h:79
A list of tokens obtained by preprocessing a text buffer and operations to map between the expanded a...
Definition: Tokens.h:172
Node * nextSibling()
Definition: Tree.h:86
const Node * firstChild() const
Definition: Tree.h:122
This class handles loading and caching of source files into memory.