1//===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9///  \file
10///  This is a MessagePack reader.
11///
12///  See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
13///  standard.
14///
15///  Typical usage:
16///  \code
17///  StringRef input = GetInput();
18///  msgpack::Reader MPReader(input);
19///  msgpack::Object Obj;
20///
21///  while (true) {
22///    Expected<bool> ReadObj = MPReader.read(&Obj);
23///    if (!ReadObj)
24///      // Handle error...
25///    if (!ReadObj.get())
26///      break; // Reached end of input
27///    switch (Obj.Kind) {
28///    case msgpack::Type::Int:
29//       // Use Obj.Int
30///      break;
31///    // ...
32///    }
33///  }
34///  \endcode
35///
36//===----------------------------------------------------------------------===//
37
38#ifndef LLVM_BINARYFORMAT_MSGPACKREADER_H
39#define LLVM_BINARYFORMAT_MSGPACKREADER_H
40
41#include "llvm/Support/Error.h"
42#include "llvm/Support/MemoryBufferRef.h"
43#include <cstdint>
44
45namespace llvm {
46namespace msgpack {
47
48/// MessagePack types as defined in the standard, with the exception of Integer
49/// being divided into a signed Int and unsigned UInt variant in order to map
50/// directly to C++ types.
51///
52/// The types map onto corresponding union members of the \c Object struct.
53enum class Type : uint8_t {
54  Int,
55  UInt,
56  Nil,
57  Boolean,
58  Float,
59  String,
60  Binary,
61  Array,
62  Map,
63  Extension,
64  Empty, // Used by MsgPackDocument to represent an empty node
65};
66
67/// Extension types are composed of a user-defined type ID and an uninterpreted
68/// sequence of bytes.
69struct ExtensionType {
70  /// User-defined extension type.
71  int8_t Type;
72  /// Raw bytes of the extension object.
73  StringRef Bytes;
74};
75
76/// MessagePack object, represented as a tagged union of C++ types.
77///
78/// All types except \c Type::Nil (which has only one value, and so is
79/// completely represented by the \c Kind itself) map to a exactly one union
80/// member.
81struct Object {
82  Type Kind;
83  union {
84    /// Value for \c Type::Int.
85    int64_t Int;
86    /// Value for \c Type::Uint.
87    uint64_t UInt;
88    /// Value for \c Type::Boolean.
89    bool Bool;
90    /// Value for \c Type::Float.
91    double Float;
92    /// Value for \c Type::String and \c Type::Binary.
93    StringRef Raw;
94    /// Value for \c Type::Array and \c Type::Map.
95    size_t Length;
96    /// Value for \c Type::Extension.
97    ExtensionType Extension;
98  };
99
100  Object() : Kind(Type::Int), Int(0) {}
101};
102
103/// Reads MessagePack objects from memory, one at a time.
104class Reader {
105public:
106  /// Construct a reader, keeping a reference to the \p InputBuffer.
107  Reader(MemoryBufferRef InputBuffer);
108  /// Construct a reader, keeping a reference to the \p Input.
109  Reader(StringRef Input);
110
111  Reader(const Reader &) = delete;
112  Reader &operator=(const Reader &) = delete;
113
114  /// Read one object from the input buffer, advancing past it.
115  ///
116  /// The \p Obj is updated with the kind of the object read, and the
117  /// corresponding union member is updated.
118  ///
119  /// For the collection objects (Array and Map), only the length is read, and
120  /// the caller must make and additional \c N calls (in the case of Array) or
121  /// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection
122  /// elements.
123  ///
124  /// \param [out] Obj filled with next object on success.
125  ///
126  /// \returns true when object successfully read, false when at end of
127  /// input (and so \p Obj was not updated), otherwise an error.
128  Expected<bool> read(Object &Obj);
129
130private:
131  MemoryBufferRef InputBuffer;
132  StringRef::iterator Current;
133  StringRef::iterator End;
134
135  size_t remainingSpace() {
136    // The rest of the code maintains the invariant that End >= Current, so
137    // that this cast is always defined behavior.
138    return static_cast<size_t>(End - Current);
139  }
140
141  template <class T> Expected<bool> readRaw(Object &Obj);
142  template <class T> Expected<bool> readInt(Object &Obj);
143  template <class T> Expected<bool> readUInt(Object &Obj);
144  template <class T> Expected<bool> readLength(Object &Obj);
145  template <class T> Expected<bool> readExt(Object &Obj);
146  Expected<bool> createRaw(Object &Obj, uint32_t Size);
147  Expected<bool> createExt(Object &Obj, uint32_t Size);
148};
149
150} // end namespace msgpack
151} // end namespace llvm
152
153#endif // LLVM_BINARYFORMAT_MSGPACKREADER_H
154