1//===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file declares a class that exposes a simple in-memory representation
10/// of a document of MsgPack objects, that can be read from MsgPack, written to
11/// MsgPack, and inspected and modified in memory. This is intended to be a
12/// lighter-weight (in terms of memory allocations) replacement for
13/// MsgPackTypes.
14///
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
18#define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
19
20#include "llvm/BinaryFormat/MsgPackReader.h"
21#include <map>
22
23namespace llvm {
24namespace msgpack {
25
26class ArrayDocNode;
27class Document;
28class MapDocNode;
29
30/// The kind of a DocNode and its owning Document.
31struct KindAndDocument {
32  Document *Doc;
33  Type Kind;
34};
35
36/// A node in a MsgPack Document. This is a simple copyable and
37/// passable-by-value type that does not own any memory.
38class DocNode {
39  friend Document;
40
41public:
42  typedef std::map<DocNode, DocNode> MapTy;
43  typedef std::vector<DocNode> ArrayTy;
44
45private:
46  // Using KindAndDocument allows us to squeeze Kind and a pointer to the
47  // owning Document into the same word. Having a pointer to the owning
48  // Document makes the API of DocNode more convenient, and allows its use in
49  // YAMLIO.
50  const KindAndDocument *KindAndDoc;
51
52protected:
53  // The union of different values.
54  union {
55    int64_t Int;
56    uint64_t UInt;
57    bool Bool;
58    double Float;
59    StringRef Raw;
60    ArrayTy *Array;
61    MapTy *Map;
62  };
63
64public:
65  // Default constructor gives an empty node with no associated Document. All
66  // you can do with it is "isEmpty()".
67  DocNode() : KindAndDoc(nullptr) {}
68
69  // Type methods
70  bool isMap() const { return getKind() == Type::Map; }
71  bool isArray() const { return getKind() == Type::Array; }
72  bool isScalar() const { return !isMap() && !isArray(); }
73  bool isString() const { return getKind() == Type::String; }
74
75  // Accessors. isEmpty() returns true for both a default-constructed DocNode
76  // that has no associated Document, and the result of getEmptyNode(), which
77  // does have an associated document.
78  bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; }
79  Type getKind() const { return KindAndDoc->Kind; }
80  Document *getDocument() const { return KindAndDoc->Doc; }
81
82  int64_t &getInt() {
83    assert(getKind() == Type::Int);
84    return Int;
85  }
86
87  uint64_t &getUInt() {
88    assert(getKind() == Type::UInt);
89    return UInt;
90  }
91
92  bool &getBool() {
93    assert(getKind() == Type::Boolean);
94    return Bool;
95  }
96
97  double &getFloat() {
98    assert(getKind() == Type::Float);
99    return Float;
100  }
101
102  int64_t getInt() const {
103    assert(getKind() == Type::Int);
104    return Int;
105  }
106
107  uint64_t getUInt() const {
108    assert(getKind() == Type::UInt);
109    return UInt;
110  }
111
112  bool getBool() const {
113    assert(getKind() == Type::Boolean);
114    return Bool;
115  }
116
117  double getFloat() const {
118    assert(getKind() == Type::Float);
119    return Float;
120  }
121
122  StringRef getString() const {
123    assert(getKind() == Type::String);
124    return Raw;
125  }
126
127  /// Get an ArrayDocNode for an array node. If Convert, convert the node to an
128  /// array node if necessary.
129  ArrayDocNode &getArray(bool Convert = false) {
130    if (getKind() != Type::Array) {
131      assert(Convert);
132      convertToArray();
133    }
134    // This could be a static_cast, except ArrayDocNode is a forward reference.
135    return *reinterpret_cast<ArrayDocNode *>(this);
136  }
137
138  /// Get a MapDocNode for a map node. If Convert, convert the node to a map
139  /// node if necessary.
140  MapDocNode &getMap(bool Convert = false) {
141    if (getKind() != Type::Map) {
142      assert(Convert);
143      convertToMap();
144    }
145    // This could be a static_cast, except MapDocNode is a forward reference.
146    return *reinterpret_cast<MapDocNode *>(this);
147  }
148
149  /// Comparison operator, used for map keys.
150  friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
151    // This has to cope with one or both of the nodes being default-constructed,
152    // such that KindAndDoc is not set.
153    if (Rhs.isEmpty())
154      return false;
155    if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
156      if (Lhs.isEmpty())
157        return true;
158      return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
159    }
160    switch (Lhs.getKind()) {
161    case Type::Int:
162      return Lhs.Int < Rhs.Int;
163    case Type::UInt:
164      return Lhs.UInt < Rhs.UInt;
165    case Type::Nil:
166      return false;
167    case Type::Boolean:
168      return Lhs.Bool < Rhs.Bool;
169    case Type::Float:
170      return Lhs.Float < Rhs.Float;
171    case Type::String:
172    case Type::Binary:
173      return Lhs.Raw < Rhs.Raw;
174    default:
175      llvm_unreachable("bad map key type");
176    }
177  }
178
179  /// Equality operator
180  friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) {
181    return !(Lhs < Rhs) && !(Rhs < Lhs);
182  }
183
184  /// Inequality operator
185  friend bool operator!=(const DocNode &Lhs, const DocNode &Rhs) {
186    return !(Lhs == Rhs);
187  }
188
189  /// Convert this node to a string, assuming it is scalar.
190  std::string toString() const;
191
192  /// Convert the StringRef and use it to set this DocNode (assuming scalar). If
193  /// it is a string, copy the string into the Document's strings list so we do
194  /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
195  StringRef fromString(StringRef S, StringRef Tag = "");
196
197  /// Convenience assignment operators. This only works if the destination
198  /// DocNode has an associated Document, i.e. it was not constructed using the
199  /// default constructor. The string one does not copy, so the string must
200  /// remain valid for the lifetime of the Document. Use fromString to avoid
201  /// that restriction.
202  DocNode &operator=(const char *Val) { return *this = StringRef(Val); }
203  DocNode &operator=(StringRef Val);
204  DocNode &operator=(bool Val);
205  DocNode &operator=(int Val);
206  DocNode &operator=(unsigned Val);
207  DocNode &operator=(int64_t Val);
208  DocNode &operator=(uint64_t Val);
209
210private:
211  // Private constructor setting KindAndDoc, used by methods in Document.
212  DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {}
213
214  void convertToArray();
215  void convertToMap();
216};
217
218/// A DocNode that is a map.
219class MapDocNode : public DocNode {
220public:
221  MapDocNode() {}
222  MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
223
224  // Map access methods.
225  size_t size() const { return Map->size(); }
226  bool empty() const { return !size(); }
227  MapTy::iterator begin() { return Map->begin(); }
228  MapTy::iterator end() { return Map->end(); }
229  MapTy::iterator find(DocNode Key) { return Map->find(Key); }
230  MapTy::iterator find(StringRef Key);
231  MapTy::iterator erase(MapTy::const_iterator I) { return Map->erase(I); }
232  size_t erase(DocNode Key) { return Map->erase(Key); }
233  MapTy::iterator erase(MapTy::const_iterator First,
234                        MapTy::const_iterator Second) {
235    return Map->erase(First, Second);
236  }
237  /// Member access. The string data must remain valid for the lifetime of the
238  /// Document.
239  DocNode &operator[](StringRef S);
240  /// Member access, with convenience versions for an integer key.
241  DocNode &operator[](DocNode Key);
242  DocNode &operator[](int Key);
243  DocNode &operator[](unsigned Key);
244  DocNode &operator[](int64_t Key);
245  DocNode &operator[](uint64_t Key);
246};
247
248/// A DocNode that is an array.
249class ArrayDocNode : public DocNode {
250public:
251  ArrayDocNode() {}
252  ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
253
254  // Array access methods.
255  size_t size() const { return Array->size(); }
256  bool empty() const { return !size(); }
257  DocNode &back() const { return Array->back(); }
258  ArrayTy::iterator begin() { return Array->begin(); }
259  ArrayTy::iterator end() { return Array->end(); }
260  void push_back(DocNode N) {
261    assert(N.isEmpty() || N.getDocument() == getDocument());
262    Array->push_back(N);
263  }
264
265  /// Element access. This extends the array if necessary, with empty nodes.
266  DocNode &operator[](size_t Index);
267};
268
269/// Simple in-memory representation of a document of msgpack objects with
270/// ability to find and create array and map elements.  Does not currently cope
271/// with any extension types.
272class Document {
273  // Maps, arrays and strings used by nodes in the document. No attempt is made
274  // to free unused ones.
275  std::vector<std::unique_ptr<DocNode::MapTy>> Maps;
276  std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays;
277  std::vector<std::unique_ptr<char[]>> Strings;
278
279  // The root node of the document.
280  DocNode Root;
281
282  // The KindAndDocument structs pointed to by nodes in the document.
283  KindAndDocument KindAndDocs[size_t(Type::Empty) + 1];
284
285  // Whether YAML output uses hex for UInt.
286  bool HexMode = false;
287
288public:
289  Document() {
290    clear();
291    for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T)
292      KindAndDocs[T] = {this, Type(T)};
293  }
294
295  /// Get ref to the document's root element.
296  DocNode &getRoot() { return Root; }
297
298  /// Restore the Document to an empty state.
299  void clear() { getRoot() = getEmptyNode(); }
300
301  /// Create an empty node associated with this Document.
302  DocNode getEmptyNode() {
303    auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]);
304    return N;
305  }
306
307  /// Create a nil node associated with this Document.
308  DocNode getNode() {
309    auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]);
310    return N;
311  }
312
313  /// Create an Int node associated with this Document.
314  DocNode getNode(int64_t V) {
315    auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
316    N.Int = V;
317    return N;
318  }
319
320  /// Create an Int node associated with this Document.
321  DocNode getNode(int V) {
322    auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
323    N.Int = V;
324    return N;
325  }
326
327  /// Create a UInt node associated with this Document.
328  DocNode getNode(uint64_t V) {
329    auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
330    N.UInt = V;
331    return N;
332  }
333
334  /// Create a UInt node associated with this Document.
335  DocNode getNode(unsigned V) {
336    auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
337    N.UInt = V;
338    return N;
339  }
340
341  /// Create a Boolean node associated with this Document.
342  DocNode getNode(bool V) {
343    auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]);
344    N.Bool = V;
345    return N;
346  }
347
348  /// Create a Float node associated with this Document.
349  DocNode getNode(double V) {
350    auto N = DocNode(&KindAndDocs[size_t(Type::Float)]);
351    N.Float = V;
352    return N;
353  }
354
355  /// Create a String node associated with this Document. If !Copy, the passed
356  /// string must remain valid for the lifetime of the Document.
357  DocNode getNode(StringRef V, bool Copy = false) {
358    if (Copy)
359      V = addString(V);
360    auto N = DocNode(&KindAndDocs[size_t(Type::String)]);
361    N.Raw = V;
362    return N;
363  }
364
365  /// Create a String node associated with this Document. If !Copy, the passed
366  /// string must remain valid for the lifetime of the Document.
367  DocNode getNode(const char *V, bool Copy = false) {
368    return getNode(StringRef(V), Copy);
369  }
370
371  /// Create an empty Map node associated with this Document.
372  MapDocNode getMapNode() {
373    auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
374    Maps.push_back(std::unique_ptr<DocNode::MapTy>(new DocNode::MapTy));
375    N.Map = Maps.back().get();
376    return N.getMap();
377  }
378
379  /// Create an empty Array node associated with this Document.
380  ArrayDocNode getArrayNode() {
381    auto N = DocNode(&KindAndDocs[size_t(Type::Array)]);
382    Arrays.push_back(std::unique_ptr<DocNode::ArrayTy>(new DocNode::ArrayTy));
383    N.Array = Arrays.back().get();
384    return N.getArray();
385  }
386
387  /// Read a document from a binary msgpack blob, merging into anything already
388  /// in the Document. The blob data must remain valid for the lifetime of this
389  /// Document (because a string object in the document contains a StringRef
390  /// into the original blob). If Multi, then this sets root to an array and
391  /// adds top-level objects to it. If !Multi, then it only reads a single
392  /// top-level object, even if there are more, and sets root to that. Returns
393  /// false if failed due to illegal format or merge error.
394  ///
395  /// The Merger arg is a callback function that is called when the merge has a
396  /// conflict, that is, it is trying to set an item that is already set. If the
397  /// conflict cannot be resolved, the callback function returns -1. If the
398  /// conflict can be resolved, the callback returns a non-negative number and
399  /// sets *DestNode to the resolved node. The returned non-negative number is
400  /// significant only for an array node; it is then the array index to start
401  /// populating at. That allows Merger to choose whether to merge array
402  /// elements (returns 0) or append new elements (returns existing size).
403  ///
404  /// If SrcNode is an array or map, the resolution must be that *DestNode is an
405  /// array or map respectively, although it could be the array or map
406  /// (respectively) that was already there. MapKey is the key if *DestNode is a
407  /// map entry, a nil node otherwise.
408  ///
409  /// The default for Merger is to disallow any conflict.
410  bool readFromBlob(
411      StringRef Blob, bool Multi,
412      function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
413          Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
414            return -1;
415          });
416
417  /// Write a MsgPack document to a binary MsgPack blob.
418  void writeToBlob(std::string &Blob);
419
420  /// Copy a string into the Document's strings list, and return the copy that
421  /// is owned by the Document.
422  StringRef addString(StringRef S) {
423    Strings.push_back(std::unique_ptr<char[]>(new char[S.size()]));
424    memcpy(&Strings.back()[0], S.data(), S.size());
425    return StringRef(&Strings.back()[0], S.size());
426  }
427
428  /// Set whether YAML output uses hex for UInt. Default off.
429  void setHexMode(bool Val = true) { HexMode = Val; }
430
431  /// Get Hexmode flag.
432  bool getHexMode() const { return HexMode; }
433
434  /// Convert MsgPack Document to YAML text.
435  void toYAML(raw_ostream &OS);
436
437  /// Read YAML text into the MsgPack document. Returns false on failure.
438  bool fromYAML(StringRef S);
439};
440
441} // namespace msgpack
442} // namespace llvm
443
444#endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
445