11341Sstevel//===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===// 21341Sstevel// 31341Sstevel// The LLVM Compiler Infrastructure 41341Sstevel// 51341Sstevel// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 61341Sstevel// See https://llvm.org/LICENSE.txt for license information. 71341Sstevel// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 81341Sstevel// 91341Sstevel//===----------------------------------------------------------------------===// 101341Sstevel// 111341Sstevel// These backends consume the definitions of Syntax Tree nodes. 121341Sstevel// See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td 131341Sstevel// 141341Sstevel// The -gen-clang-syntax-node-list backend produces a .inc with macro calls 151341Sstevel// NODE(Kind, BaseKind) 161341Sstevel// ABSTRACT_NODE(Type, Base, FirstKind, LastKind) 171341Sstevel// similar to those for AST nodes such as AST/DeclNodes.inc. 181341Sstevel// 191341Sstevel// The -gen-clang-syntax-node-classes backend produces definitions for the 201341Sstevel// syntax::Node subclasses (except those marked as External). 211341Sstevel// 221341Sstevel// In future, another backend will encode the structure of the various node 231341Sstevel// types in tables so their invariants can be checked and enforced. 241341Sstevel// 251341Sstevel//===----------------------------------------------------------------------===// 261341Sstevel#include "TableGenBackends.h" 271341Sstevel 281341Sstevel#include <deque> 291341Sstevel 301341Sstevel#include "llvm/ADT/StringExtras.h" 311341Sstevel#include "llvm/Support/FormatVariadic.h" 321341Sstevel#include "llvm/Support/raw_ostream.h" 331341Sstevel#include "llvm/TableGen/Record.h" 341341Sstevel#include "llvm/TableGen/TableGenBackend.h" 351341Sstevel 361341Sstevelnamespace { 371341Sstevelusing llvm::formatv; 381341Sstevel 391341Sstevel// The class hierarchy of Node types. 401341Sstevel// We assemble this in order to be able to define the NodeKind enum in a 411341Sstevel// stable and useful way, where abstract Node subclasses correspond to ranges. 421341Sstevelclass Hierarchy { 431341Sstevelpublic: 441341Sstevel Hierarchy(const llvm::RecordKeeper &Records) { 451341Sstevel for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType")) 461341Sstevel add(T); 471341Sstevel for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType")) 481341Sstevel if (llvm::Record *Base = Derived->getValueAsOptionalDef("base")) 491341Sstevel link(Derived, Base); 501341Sstevel for (NodeType &N : AllTypes) { 511341Sstevel llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) { 521341Sstevel return L->Record->getName() < R->Record->getName(); 531341Sstevel }); 541341Sstevel // Alternatives nodes must have subclasses, External nodes may do. 551341Sstevel assert(N.Record->isSubClassOf("Alternatives") || 561341Sstevel N.Record->isSubClassOf("External") || N.Derived.empty()); 571341Sstevel assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty()); 581341Sstevel } 591341Sstevel } 601341Sstevel 611341Sstevel struct NodeType { 621341Sstevel const llvm::Record *Record = nullptr; 631341Sstevel const NodeType *Base = nullptr; 641341Sstevel std::vector<const NodeType *> Derived; 651341Sstevel llvm::StringRef name() const { return Record->getName(); } 661341Sstevel }; 671341Sstevel 681341Sstevel NodeType &get(llvm::StringRef Name = "Node") { 691341Sstevel auto NI = ByName.find(Name); 70 assert(NI != ByName.end() && "no such node"); 71 return *NI->second; 72 } 73 74 // Traverse the hierarchy in pre-order (base classes before derived). 75 void visit(llvm::function_ref<void(const NodeType &)> CB, 76 const NodeType *Start = nullptr) { 77 if (Start == nullptr) 78 Start = &get(); 79 CB(*Start); 80 for (const NodeType *D : Start->Derived) 81 visit(CB, D); 82 } 83 84private: 85 void add(const llvm::Record *R) { 86 AllTypes.emplace_back(); 87 AllTypes.back().Record = R; 88 bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second; 89 assert(Inserted && "Duplicate node name"); 90 (void)Inserted; 91 } 92 93 void link(const llvm::Record *Derived, const llvm::Record *Base) { 94 auto &CN = get(Derived->getName()), &PN = get(Base->getName()); 95 assert(CN.Base == nullptr && "setting base twice"); 96 PN.Derived.push_back(&CN); 97 CN.Base = &PN; 98 } 99 100 std::deque<NodeType> AllTypes; 101 llvm::DenseMap<llvm::StringRef, NodeType *> ByName; 102}; 103 104const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) { 105 return N.Derived.empty() ? N : firstConcrete(*N.Derived.front()); 106} 107const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) { 108 return N.Derived.empty() ? N : lastConcrete(*N.Derived.back()); 109} 110 111struct SyntaxConstraint { 112 SyntaxConstraint(const llvm::Record &R) { 113 if (R.isSubClassOf("Optional")) { 114 *this = SyntaxConstraint(*R.getValueAsDef("inner")); 115 } else if (R.isSubClassOf("AnyToken")) { 116 NodeType = "Leaf"; 117 } else if (R.isSubClassOf("NodeType")) { 118 NodeType = R.getName().str(); 119 } else { 120 assert(false && "Unhandled Syntax kind"); 121 } 122 } 123 124 std::string NodeType; 125 // optional and leaf types also go here, once we want to use them. 126}; 127 128} // namespace 129 130void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records, 131 llvm::raw_ostream &OS) { 132 llvm::emitSourceFileHeader("Syntax tree node list", OS); 133 Hierarchy H(Records); 134 OS << R"cpp( 135#ifndef NODE 136#define NODE(Kind, Base) 137#endif 138 139#ifndef CONCRETE_NODE 140#define CONCRETE_NODE(Kind, Base) NODE(Kind, Base) 141#endif 142 143#ifndef ABSTRACT_NODE 144#define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base) 145#endif 146 147)cpp"; 148 H.visit([&](const Hierarchy::NodeType &N) { 149 // Don't emit ABSTRACT_NODE for node itself, which has no parent. 150 if (N.Base == nullptr) 151 return; 152 if (N.Derived.empty()) 153 OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name()); 154 else 155 OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(), 156 N.Base->name(), firstConcrete(N).name(), 157 lastConcrete(N).name()); 158 }); 159 OS << R"cpp( 160#undef NODE 161#undef CONCRETE_NODE 162#undef ABSTRACT_NODE 163)cpp"; 164} 165 166// Format a documentation string as a C++ comment. 167// Trims leading whitespace handling since comments come from a TableGen file: 168// documentation = [{ 169// This is a widget. Example: 170// widget.explode() 171// }]; 172// and should be formatted as: 173// /// This is a widget. Example: 174// /// widget.explode() 175// Leading and trailing whitespace lines are stripped. 176// The indentation of the first line is stripped from all lines. 177static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) { 178 Doc = Doc.rtrim(); 179 llvm::StringRef Line; 180 while (Line.trim().empty() && !Doc.empty()) 181 std::tie(Line, Doc) = Doc.split('\n'); 182 llvm::StringRef Indent = Line.take_while(llvm::isSpace); 183 for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) { 184 Line.consume_front(Indent); 185 OS << "/// " << Line << "\n"; 186 } 187} 188 189void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records, 190 llvm::raw_ostream &OS) { 191 llvm::emitSourceFileHeader("Syntax tree node list", OS); 192 Hierarchy H(Records); 193 194 OS << "\n// Forward-declare node types so we don't have to carefully " 195 "sequence definitions.\n"; 196 H.visit([&](const Hierarchy::NodeType &N) { 197 OS << "class " << N.name() << ";\n"; 198 }); 199 200 OS << "\n// Node definitions\n\n"; 201 H.visit([&](const Hierarchy::NodeType &N) { 202 if (N.Record->isSubClassOf("External")) 203 return; 204 printDoc(N.Record->getValueAsString("documentation"), OS); 205 OS << formatv("class {0}{1} : public {2} {{\n", N.name(), 206 N.Derived.empty() ? " final" : "", N.Base->name()); 207 208 // Constructor. 209 if (N.Derived.empty()) 210 OS << formatv("public:\n {0}() : {1}(NodeKind::{0}) {{}\n", N.name(), 211 N.Base->name()); 212 else 213 OS << formatv("protected:\n {0}(NodeKind K) : {1}(K) {{}\npublic:\n", 214 N.name(), N.Base->name()); 215 216 if (N.Record->isSubClassOf("Sequence")) { 217 // Getters for sequence elements. 218 for (const auto &C : N.Record->getValueAsListOfDefs("children")) { 219 assert(C->isSubClassOf("Role")); 220 llvm::StringRef Role = C->getValueAsString("role"); 221 SyntaxConstraint Constraint(*C->getValueAsDef("syntax")); 222 for (const char *Const : {"", "const "}) 223 OS << formatv( 224 " {2}{1} *get{0}() {2} {{\n" 225 " return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n" 226 " }\n", 227 Role, Constraint.NodeType, Const); 228 } 229 } 230 231 // classof. FIXME: move definition inline once ~all nodes are generated. 232 OS << " static bool classof(const Node *N);\n"; 233 234 OS << "};\n\n"; 235 }); 236} 237