Tree.h revision 360784
1//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// Defines the basic structure of the syntax tree. There are two kinds of nodes: 9// - leaf nodes correspond to a token in the expanded token stream, 10// - tree nodes correspond to language grammar constructs. 11// 12// The tree is initially built from an AST. Each node of a newly built tree 13// covers a continous subrange of expanded tokens (i.e. tokens after 14// preprocessing), the specific tokens coverered are stored in the leaf nodes of 15// a tree. A post-order traversal of a tree will visit leaf nodes in an order 16// corresponding the original order of expanded tokens. 17// 18// This is still work in progress and highly experimental, we leave room for 19// ourselves to completely change the design and/or implementation. 20//===----------------------------------------------------------------------===// 21#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H 22#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H 23 24#include "clang/Basic/LangOptions.h" 25#include "clang/Basic/SourceLocation.h" 26#include "clang/Basic/SourceManager.h" 27#include "clang/Basic/TokenKinds.h" 28#include "clang/Tooling/Syntax/Tokens.h" 29#include "llvm/ADT/ArrayRef.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/Support/Allocator.h" 32#include <cstdint> 33 34namespace clang { 35namespace syntax { 36 37/// A memory arena for syntax trees. Also tracks the underlying token buffers, 38/// source manager, etc. 39class Arena { 40public: 41 Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, 42 TokenBuffer Tokens); 43 44 const SourceManager &sourceManager() const { return SourceMgr; } 45 const LangOptions &langOptions() const { return LangOpts; } 46 47 const TokenBuffer &tokenBuffer() const; 48 llvm::BumpPtrAllocator &allocator() { return Allocator; } 49 50 /// Add \p Buffer to the underlying source manager, tokenize it and store the 51 /// resulting tokens. Useful when there is a need to materialize tokens that 52 /// were not written in user code. 53 std::pair<FileID, llvm::ArrayRef<syntax::Token>> 54 lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer); 55 56private: 57 SourceManager &SourceMgr; 58 const LangOptions &LangOpts; 59 TokenBuffer Tokens; 60 /// IDs and storage for additional tokenized files. 61 llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens; 62 /// Keeps all the allocated nodes and their intermediate data structures. 63 llvm::BumpPtrAllocator Allocator; 64}; 65 66class Tree; 67class TreeBuilder; 68class FactoryImpl; 69class MutationsImpl; 70 71enum class NodeKind : uint16_t; 72enum class NodeRole : uint8_t; 73 74/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or 75/// a Tree (representing language constructrs). 76class Node { 77public: 78 /// Newly created nodes are detached from a tree, parent and sibling links are 79 /// set when the node is added as a child to another one. 80 Node(NodeKind Kind); 81 82 NodeKind kind() const { return static_cast<NodeKind>(Kind); } 83 NodeRole role() const { return static_cast<NodeRole>(Role); } 84 85 /// Whether the node is detached from a tree, i.e. does not have a parent. 86 bool isDetached() const; 87 /// Whether the node was created from the AST backed by the source code 88 /// rather than added later through mutation APIs or created with factory 89 /// functions. 90 /// When this flag is true, all subtrees are also original. 91 /// This flag is set to false on any modifications to the node or any of its 92 /// subtrees, even if this simply involves swapping existing subtrees. 93 bool isOriginal() const { return Original; } 94 /// If this function return false, the tree cannot be modified because there 95 /// is no reasonable way to produce the corresponding textual replacements. 96 /// This can happen when the node crosses macro expansion boundaries. 97 /// 98 /// Note that even if the node is not modifiable, its child nodes can be 99 /// modifiable. 100 bool canModify() const { return CanModify; } 101 102 const Tree *parent() const { return Parent; } 103 Tree *parent() { return Parent; } 104 105 const Node *nextSibling() const { return NextSibling; } 106 Node *nextSibling() { return NextSibling; } 107 108 /// Dumps the structure of a subtree. For debugging and testing purposes. 109 std::string dump(const Arena &A) const; 110 /// Dumps the tokens forming this subtree. 111 std::string dumpTokens(const Arena &A) const; 112 113 /// Asserts invariants on this node of the tree and its immediate children. 114 /// Will not recurse into the subtree. No-op if NDEBUG is set. 115 void assertInvariants() const; 116 /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set. 117 void assertInvariantsRecursive() const; 118 119private: 120 // Tree is allowed to change the Parent link and Role. 121 friend class Tree; 122 // TreeBuilder is allowed to set the Original and CanModify flags. 123 friend class TreeBuilder; 124 // MutationsImpl sets roles and CanModify flag. 125 friend class MutationsImpl; 126 // FactoryImpl sets CanModify flag. 127 friend class FactoryImpl; 128 129 Tree *Parent; 130 Node *NextSibling; 131 unsigned Kind : 16; 132 unsigned Role : 8; 133 unsigned Original : 1; 134 unsigned CanModify : 1; 135}; 136 137/// A leaf node points to a single token inside the expanded token stream. 138class Leaf final : public Node { 139public: 140 Leaf(const syntax::Token *T); 141 static bool classof(const Node *N); 142 143 const syntax::Token *token() const { return Tok; } 144 145private: 146 const syntax::Token *Tok; 147}; 148 149/// A node that has children and represents a syntactic language construct. 150class Tree : public Node { 151public: 152 using Node::Node; 153 static bool classof(const Node *N); 154 155 Node *firstChild() { return FirstChild; } 156 const Node *firstChild() const { return FirstChild; } 157 158 Leaf *firstLeaf(); 159 const Leaf *firstLeaf() const { 160 return const_cast<Tree *>(this)->firstLeaf(); 161 } 162 163 Leaf *lastLeaf(); 164 const Leaf *lastLeaf() const { return const_cast<Tree *>(this)->lastLeaf(); } 165 166protected: 167 /// Find the first node with a corresponding role. 168 syntax::Node *findChild(NodeRole R); 169 170private: 171 /// Prepend \p Child to the list of children and and sets the parent pointer. 172 /// A very low-level operation that does not check any invariants, only used 173 /// by TreeBuilder and FactoryImpl. 174 /// EXPECTS: Role != NodeRoleDetached. 175 void prependChildLowLevel(Node *Child, NodeRole Role); 176 friend class TreeBuilder; 177 friend class FactoryImpl; 178 179 /// Replace a range of children [BeforeBegin->NextSibling, End) with a list of 180 /// new nodes starting at \p New. 181 /// Only used by MutationsImpl to implement higher-level mutation operations. 182 /// (!) \p New can be null to model removal of the child range. 183 void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New); 184 friend class MutationsImpl; 185 186 Node *FirstChild = nullptr; 187}; 188 189} // namespace syntax 190} // namespace clang 191 192#endif 193