11341Sstevel//===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
21341Sstevel//
31341Sstevel//                     The LLVM Compiler Infrastructure
41341Sstevel//
51341Sstevel// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
61341Sstevel// See https://llvm.org/LICENSE.txt for license information.
71341Sstevel// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
81341Sstevel//
91341Sstevel//===----------------------------------------------------------------------===//
101341Sstevel//
111341Sstevel// These backends consume the definitions of Syntax Tree nodes.
121341Sstevel// See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
131341Sstevel//
141341Sstevel// The -gen-clang-syntax-node-list backend produces a .inc with macro calls
151341Sstevel//   NODE(Kind, BaseKind)
161341Sstevel//   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
171341Sstevel// similar to those for AST nodes such as AST/DeclNodes.inc.
181341Sstevel//
191341Sstevel// The -gen-clang-syntax-node-classes backend produces definitions for the
201341Sstevel// syntax::Node subclasses (except those marked as External).
211341Sstevel//
221341Sstevel// In future, another backend will encode the structure of the various node
231341Sstevel// types in tables so their invariants can be checked and enforced.
241341Sstevel//
251341Sstevel//===----------------------------------------------------------------------===//
261341Sstevel#include "TableGenBackends.h"
271341Sstevel
281341Sstevel#include <deque>
291341Sstevel
301341Sstevel#include "llvm/ADT/StringExtras.h"
311341Sstevel#include "llvm/Support/FormatVariadic.h"
321341Sstevel#include "llvm/Support/raw_ostream.h"
331341Sstevel#include "llvm/TableGen/Record.h"
341341Sstevel#include "llvm/TableGen/TableGenBackend.h"
351341Sstevel
361341Sstevelnamespace {
371341Sstevelusing llvm::formatv;
381341Sstevel
391341Sstevel// The class hierarchy of Node types.
401341Sstevel// We assemble this in order to be able to define the NodeKind enum in a
411341Sstevel// stable and useful way, where abstract Node subclasses correspond to ranges.
421341Sstevelclass Hierarchy {
431341Sstevelpublic:
441341Sstevel  Hierarchy(const llvm::RecordKeeper &Records) {
451341Sstevel    for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
461341Sstevel      add(T);
471341Sstevel    for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
481341Sstevel      if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
491341Sstevel        link(Derived, Base);
501341Sstevel    for (NodeType &N : AllTypes) {
511341Sstevel      llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
521341Sstevel        return L->Record->getName() < R->Record->getName();
531341Sstevel      });
541341Sstevel      // Alternatives nodes must have subclasses, External nodes may do.
551341Sstevel      assert(N.Record->isSubClassOf("Alternatives") ||
561341Sstevel             N.Record->isSubClassOf("External") || N.Derived.empty());
571341Sstevel      assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
581341Sstevel    }
591341Sstevel  }
601341Sstevel
611341Sstevel  struct NodeType {
621341Sstevel    const llvm::Record *Record = nullptr;
631341Sstevel    const NodeType *Base = nullptr;
641341Sstevel    std::vector<const NodeType *> Derived;
651341Sstevel    llvm::StringRef name() const { return Record->getName(); }
661341Sstevel  };
671341Sstevel
681341Sstevel  NodeType &get(llvm::StringRef Name = "Node") {
691341Sstevel    auto NI = ByName.find(Name);
70    assert(NI != ByName.end() && "no such node");
71    return *NI->second;
72  }
73
74  // Traverse the hierarchy in pre-order (base classes before derived).
75  void visit(llvm::function_ref<void(const NodeType &)> CB,
76             const NodeType *Start = nullptr) {
77    if (Start == nullptr)
78      Start = &get();
79    CB(*Start);
80    for (const NodeType *D : Start->Derived)
81      visit(CB, D);
82  }
83
84private:
85  void add(const llvm::Record *R) {
86    AllTypes.emplace_back();
87    AllTypes.back().Record = R;
88    bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
89    assert(Inserted && "Duplicate node name");
90    (void)Inserted;
91  }
92
93  void link(const llvm::Record *Derived, const llvm::Record *Base) {
94    auto &CN = get(Derived->getName()), &PN = get(Base->getName());
95    assert(CN.Base == nullptr && "setting base twice");
96    PN.Derived.push_back(&CN);
97    CN.Base = &PN;
98  }
99
100  std::deque<NodeType> AllTypes;
101  llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
102};
103
104const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
105  return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
106}
107const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
108  return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
109}
110
111struct SyntaxConstraint {
112  SyntaxConstraint(const llvm::Record &R) {
113    if (R.isSubClassOf("Optional")) {
114      *this = SyntaxConstraint(*R.getValueAsDef("inner"));
115    } else if (R.isSubClassOf("AnyToken")) {
116      NodeType = "Leaf";
117    } else if (R.isSubClassOf("NodeType")) {
118      NodeType = R.getName().str();
119    } else {
120      assert(false && "Unhandled Syntax kind");
121    }
122  }
123
124  std::string NodeType;
125  // optional and leaf types also go here, once we want to use them.
126};
127
128} // namespace
129
130void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
131                                    llvm::raw_ostream &OS) {
132  llvm::emitSourceFileHeader("Syntax tree node list", OS);
133  Hierarchy H(Records);
134  OS << R"cpp(
135#ifndef NODE
136#define NODE(Kind, Base)
137#endif
138
139#ifndef CONCRETE_NODE
140#define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
141#endif
142
143#ifndef ABSTRACT_NODE
144#define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
145#endif
146
147)cpp";
148  H.visit([&](const Hierarchy::NodeType &N) {
149    // Don't emit ABSTRACT_NODE for node itself, which has no parent.
150    if (N.Base == nullptr)
151      return;
152    if (N.Derived.empty())
153      OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
154    else
155      OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
156                    N.Base->name(), firstConcrete(N).name(),
157                    lastConcrete(N).name());
158  });
159  OS << R"cpp(
160#undef NODE
161#undef CONCRETE_NODE
162#undef ABSTRACT_NODE
163)cpp";
164}
165
166// Format a documentation string as a C++ comment.
167// Trims leading whitespace handling since comments come from a TableGen file:
168//    documentation = [{
169//      This is a widget. Example:
170//        widget.explode()
171//    }];
172// and should be formatted as:
173//    /// This is a widget. Example:
174//    ///   widget.explode()
175// Leading and trailing whitespace lines are stripped.
176// The indentation of the first line is stripped from all lines.
177static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
178  Doc = Doc.rtrim();
179  llvm::StringRef Line;
180  while (Line.trim().empty() && !Doc.empty())
181    std::tie(Line, Doc) = Doc.split('\n');
182  llvm::StringRef Indent = Line.take_while(llvm::isSpace);
183  for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
184    Line.consume_front(Indent);
185    OS << "/// " << Line << "\n";
186  }
187}
188
189void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
190                                       llvm::raw_ostream &OS) {
191  llvm::emitSourceFileHeader("Syntax tree node list", OS);
192  Hierarchy H(Records);
193
194  OS << "\n// Forward-declare node types so we don't have to carefully "
195        "sequence definitions.\n";
196  H.visit([&](const Hierarchy::NodeType &N) {
197    OS << "class " << N.name() << ";\n";
198  });
199
200  OS << "\n// Node definitions\n\n";
201  H.visit([&](const Hierarchy::NodeType &N) {
202    if (N.Record->isSubClassOf("External"))
203      return;
204    printDoc(N.Record->getValueAsString("documentation"), OS);
205    OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
206                  N.Derived.empty() ? " final" : "", N.Base->name());
207
208    // Constructor.
209    if (N.Derived.empty())
210      OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
211                    N.Base->name());
212    else
213      OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
214                    N.name(), N.Base->name());
215
216    if (N.Record->isSubClassOf("Sequence")) {
217      // Getters for sequence elements.
218      for (const auto &C : N.Record->getValueAsListOfDefs("children")) {
219        assert(C->isSubClassOf("Role"));
220        llvm::StringRef Role = C->getValueAsString("role");
221        SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
222        for (const char *Const : {"", "const "})
223          OS << formatv(
224              "  {2}{1} *get{0}() {2} {{\n"
225              "    return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
226              "  }\n",
227              Role, Constraint.NodeType, Const);
228      }
229    }
230
231    // classof. FIXME: move definition inline once ~all nodes are generated.
232    OS << "  static bool classof(const Node *N);\n";
233
234    OS << "};\n\n";
235  });
236}
237