Tree.h revision 360784
1//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Defines the basic structure of the syntax tree. There are two kinds of nodes:
9//   - leaf nodes correspond to a token in the expanded token stream,
10//   - tree nodes correspond to language grammar constructs.
11//
12// The tree is initially built from an AST. Each node of a newly built tree
13// covers a continous subrange of expanded tokens (i.e. tokens after
14// preprocessing), the specific tokens coverered are stored in the leaf nodes of
15// a tree. A post-order traversal of a tree will visit leaf nodes in an order
16// corresponding the original order of expanded tokens.
17//
18// This is still work in progress and highly experimental, we leave room for
19// ourselves to completely change the design and/or implementation.
20//===----------------------------------------------------------------------===//
21#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
23
24#include "clang/Basic/LangOptions.h"
25#include "clang/Basic/SourceLocation.h"
26#include "clang/Basic/SourceManager.h"
27#include "clang/Basic/TokenKinds.h"
28#include "clang/Tooling/Syntax/Tokens.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/Support/Allocator.h"
32#include <cstdint>
33
34namespace clang {
35namespace syntax {
36
37/// A memory arena for syntax trees. Also tracks the underlying token buffers,
38/// source manager, etc.
39class Arena {
40public:
41  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
42        TokenBuffer Tokens);
43
44  const SourceManager &sourceManager() const { return SourceMgr; }
45  const LangOptions &langOptions() const { return LangOpts; }
46
47  const TokenBuffer &tokenBuffer() const;
48  llvm::BumpPtrAllocator &allocator() { return Allocator; }
49
50  /// Add \p Buffer to the underlying source manager, tokenize it and store the
51  /// resulting tokens. Useful when there is a need to materialize tokens that
52  /// were not written in user code.
53  std::pair<FileID, llvm::ArrayRef<syntax::Token>>
54  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
55
56private:
57  SourceManager &SourceMgr;
58  const LangOptions &LangOpts;
59  TokenBuffer Tokens;
60  /// IDs and storage for additional tokenized files.
61  llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
62  /// Keeps all the allocated nodes and their intermediate data structures.
63  llvm::BumpPtrAllocator Allocator;
64};
65
66class Tree;
67class TreeBuilder;
68class FactoryImpl;
69class MutationsImpl;
70
71enum class NodeKind : uint16_t;
72enum class NodeRole : uint8_t;
73
74/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
75/// a Tree (representing language constructrs).
76class Node {
77public:
78  /// Newly created nodes are detached from a tree, parent and sibling links are
79  /// set when the node is added as a child to another one.
80  Node(NodeKind Kind);
81
82  NodeKind kind() const { return static_cast<NodeKind>(Kind); }
83  NodeRole role() const { return static_cast<NodeRole>(Role); }
84
85  /// Whether the node is detached from a tree, i.e. does not have a parent.
86  bool isDetached() const;
87  /// Whether the node was created from the AST backed by the source code
88  /// rather than added later through mutation APIs or created with factory
89  /// functions.
90  /// When this flag is true, all subtrees are also original.
91  /// This flag is set to false on any modifications to the node or any of its
92  /// subtrees, even if this simply involves swapping existing subtrees.
93  bool isOriginal() const { return Original; }
94  /// If this function return false, the tree cannot be modified because there
95  /// is no reasonable way to produce the corresponding textual replacements.
96  /// This can happen when the node crosses macro expansion boundaries.
97  ///
98  /// Note that even if the node is not modifiable, its child nodes can be
99  /// modifiable.
100  bool canModify() const { return CanModify; }
101
102  const Tree *parent() const { return Parent; }
103  Tree *parent() { return Parent; }
104
105  const Node *nextSibling() const { return NextSibling; }
106  Node *nextSibling() { return NextSibling; }
107
108  /// Dumps the structure of a subtree. For debugging and testing purposes.
109  std::string dump(const Arena &A) const;
110  /// Dumps the tokens forming this subtree.
111  std::string dumpTokens(const Arena &A) const;
112
113  /// Asserts invariants on this node of the tree and its immediate children.
114  /// Will not recurse into the subtree. No-op if NDEBUG is set.
115  void assertInvariants() const;
116  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
117  void assertInvariantsRecursive() const;
118
119private:
120  // Tree is allowed to change the Parent link and Role.
121  friend class Tree;
122  // TreeBuilder is allowed to set the Original and CanModify flags.
123  friend class TreeBuilder;
124  // MutationsImpl sets roles and CanModify flag.
125  friend class MutationsImpl;
126  // FactoryImpl sets CanModify flag.
127  friend class FactoryImpl;
128
129  Tree *Parent;
130  Node *NextSibling;
131  unsigned Kind : 16;
132  unsigned Role : 8;
133  unsigned Original : 1;
134  unsigned CanModify : 1;
135};
136
137/// A leaf node points to a single token inside the expanded token stream.
138class Leaf final : public Node {
139public:
140  Leaf(const syntax::Token *T);
141  static bool classof(const Node *N);
142
143  const syntax::Token *token() const { return Tok; }
144
145private:
146  const syntax::Token *Tok;
147};
148
149/// A node that has children and represents a syntactic language construct.
150class Tree : public Node {
151public:
152  using Node::Node;
153  static bool classof(const Node *N);
154
155  Node *firstChild() { return FirstChild; }
156  const Node *firstChild() const { return FirstChild; }
157
158  Leaf *firstLeaf();
159  const Leaf *firstLeaf() const {
160    return const_cast<Tree *>(this)->firstLeaf();
161  }
162
163  Leaf *lastLeaf();
164  const Leaf *lastLeaf() const { return const_cast<Tree *>(this)->lastLeaf(); }
165
166protected:
167  /// Find the first node with a corresponding role.
168  syntax::Node *findChild(NodeRole R);
169
170private:
171  /// Prepend \p Child to the list of children and and sets the parent pointer.
172  /// A very low-level operation that does not check any invariants, only used
173  /// by TreeBuilder and FactoryImpl.
174  /// EXPECTS: Role != NodeRoleDetached.
175  void prependChildLowLevel(Node *Child, NodeRole Role);
176  friend class TreeBuilder;
177  friend class FactoryImpl;
178
179  /// Replace a range of children [BeforeBegin->NextSibling, End) with a list of
180  /// new nodes starting at \p New.
181  /// Only used by MutationsImpl to implement higher-level mutation operations.
182  /// (!) \p New can be null to model removal of the child range.
183  void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New);
184  friend class MutationsImpl;
185
186  Node *FirstChild = nullptr;
187};
188
189} // namespace syntax
190} // namespace clang
191
192#endif
193