1336809Sdim//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2336809Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6336809Sdim//
7336809Sdim//===---------------------------------------------------------------------===//
8336809Sdim///
9336809Sdim/// \file
10336809Sdim/// This file supports working with JSON data.
11336809Sdim///
12336809Sdim/// It comprises:
13336809Sdim///
14336809Sdim/// - classes which hold dynamically-typed parsed JSON structures
15336809Sdim///   These are value types that can be composed, inspected, and modified.
16336809Sdim///   See json::Value, and the related types json::Object and json::Array.
17336809Sdim///
18336809Sdim/// - functions to parse JSON text into Values, and to serialize Values to text.
19336809Sdim///   See parse(), operator<<, and format_provider.
20336809Sdim///
21336809Sdim/// - a convention and helpers for mapping between json::Value and user-defined
22336809Sdim///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
23336809Sdim///
24353358Sdim/// - an output API json::OStream which can emit JSON without materializing
25353358Sdim///   all structures as json::Value.
26353358Sdim///
27336809Sdim/// Typically, JSON data would be read from an external source, parsed into
28336809Sdim/// a Value, and then converted into some native data structure before doing
29336809Sdim/// real work on it. (And vice versa when writing).
30336809Sdim///
31336809Sdim/// Other serialization mechanisms you may consider:
32336809Sdim///
33336809Sdim/// - YAML is also text-based, and more human-readable than JSON. It's a more
34336809Sdim///   complex format and data model, and YAML parsers aren't ubiquitous.
35336809Sdim///   YAMLParser.h is a streaming parser suitable for parsing large documents
36336809Sdim///   (including JSON, as YAML is a superset). It can be awkward to use
37336809Sdim///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38336809Sdim///   declarative than the toJSON/fromJSON conventions here.
39336809Sdim///
40336809Sdim/// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41336809Sdim///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
42353358Sdim///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
43336809Sdim///
44336809Sdim//===---------------------------------------------------------------------===//
45336809Sdim
46336809Sdim#ifndef LLVM_SUPPORT_JSON_H
47336809Sdim#define LLVM_SUPPORT_JSON_H
48336809Sdim
49336809Sdim#include "llvm/ADT/DenseMap.h"
50336809Sdim#include "llvm/ADT/SmallVector.h"
51336809Sdim#include "llvm/ADT/StringRef.h"
52336809Sdim#include "llvm/Support/Error.h"
53336809Sdim#include "llvm/Support/FormatVariadic.h"
54336809Sdim#include "llvm/Support/raw_ostream.h"
55336809Sdim#include <map>
56336809Sdim
57336809Sdimnamespace llvm {
58336809Sdimnamespace json {
59336809Sdim
60336809Sdim// === String encodings ===
61336809Sdim//
62336809Sdim// JSON strings are character sequences (not byte sequences like std::string).
63336809Sdim// We need to know the encoding, and for simplicity only support UTF-8.
64336809Sdim//
65336809Sdim//   - When parsing, invalid UTF-8 is a syntax error like any other
66336809Sdim//
67336809Sdim//   - When creating Values from strings, callers must ensure they are UTF-8.
68336809Sdim//        with asserts on, invalid UTF-8 will crash the program
69336809Sdim//        with asserts off, we'll substitute the replacement character (U+FFFD)
70336809Sdim//     Callers can use json::isUTF8() and json::fixUTF8() for validation.
71336809Sdim//
72336809Sdim//   - When retrieving strings from Values (e.g. asString()), the result will
73336809Sdim//     always be valid UTF-8.
74336809Sdim
75336809Sdim/// Returns true if \p S is valid UTF-8, which is required for use as JSON.
76336809Sdim/// If it returns false, \p Offset is set to a byte offset near the first error.
77336809Sdimbool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
78336809Sdim/// Replaces invalid UTF-8 sequences in \p S with the replacement character
79336809Sdim/// (U+FFFD). The returned string is valid UTF-8.
80336809Sdim/// This is much slower than isUTF8, so test that first.
81336809Sdimstd::string fixUTF8(llvm::StringRef S);
82336809Sdim
83336809Sdimclass Array;
84336809Sdimclass ObjectKey;
85336809Sdimclass Value;
86336809Sdimtemplate <typename T> Value toJSON(const llvm::Optional<T> &Opt);
87336809Sdim
88336809Sdim/// An Object is a JSON object, which maps strings to heterogenous JSON values.
89336809Sdim/// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
90336809Sdimclass Object {
91336809Sdim  using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
92336809Sdim  Storage M;
93336809Sdim
94336809Sdimpublic:
95336809Sdim  using key_type = ObjectKey;
96336809Sdim  using mapped_type = Value;
97336809Sdim  using value_type = Storage::value_type;
98336809Sdim  using iterator = Storage::iterator;
99336809Sdim  using const_iterator = Storage::const_iterator;
100336809Sdim
101353358Sdim  Object() = default;
102336809Sdim  // KV is a trivial key-value struct for list-initialization.
103336809Sdim  // (using std::pair forces extra copies).
104336809Sdim  struct KV;
105336809Sdim  explicit Object(std::initializer_list<KV> Properties);
106336809Sdim
107336809Sdim  iterator begin() { return M.begin(); }
108336809Sdim  const_iterator begin() const { return M.begin(); }
109336809Sdim  iterator end() { return M.end(); }
110336809Sdim  const_iterator end() const { return M.end(); }
111336809Sdim
112336809Sdim  bool empty() const { return M.empty(); }
113336809Sdim  size_t size() const { return M.size(); }
114336809Sdim
115336809Sdim  void clear() { M.clear(); }
116336809Sdim  std::pair<iterator, bool> insert(KV E);
117336809Sdim  template <typename... Ts>
118336809Sdim  std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
119336809Sdim    return M.try_emplace(K, std::forward<Ts>(Args)...);
120336809Sdim  }
121336809Sdim  template <typename... Ts>
122336809Sdim  std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
123336809Sdim    return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
124336809Sdim  }
125360784Sdim  bool erase(StringRef K);
126360784Sdim  void erase(iterator I) { M.erase(I); }
127336809Sdim
128336809Sdim  iterator find(StringRef K) { return M.find_as(K); }
129336809Sdim  const_iterator find(StringRef K) const { return M.find_as(K); }
130336809Sdim  // operator[] acts as if Value was default-constructible as null.
131336809Sdim  Value &operator[](const ObjectKey &K);
132336809Sdim  Value &operator[](ObjectKey &&K);
133336809Sdim  // Look up a property, returning nullptr if it doesn't exist.
134336809Sdim  Value *get(StringRef K);
135336809Sdim  const Value *get(StringRef K) const;
136336809Sdim  // Typed accessors return None/nullptr if
137336809Sdim  //   - the property doesn't exist
138336809Sdim  //   - or it has the wrong type
139336809Sdim  llvm::Optional<std::nullptr_t> getNull(StringRef K) const;
140336809Sdim  llvm::Optional<bool> getBoolean(StringRef K) const;
141336809Sdim  llvm::Optional<double> getNumber(StringRef K) const;
142336809Sdim  llvm::Optional<int64_t> getInteger(StringRef K) const;
143336809Sdim  llvm::Optional<llvm::StringRef> getString(StringRef K) const;
144336809Sdim  const json::Object *getObject(StringRef K) const;
145336809Sdim  json::Object *getObject(StringRef K);
146336809Sdim  const json::Array *getArray(StringRef K) const;
147336809Sdim  json::Array *getArray(StringRef K);
148336809Sdim};
149336809Sdimbool operator==(const Object &LHS, const Object &RHS);
150336809Sdiminline bool operator!=(const Object &LHS, const Object &RHS) {
151336809Sdim  return !(LHS == RHS);
152336809Sdim}
153336809Sdim
154336809Sdim/// An Array is a JSON array, which contains heterogeneous JSON values.
155336809Sdim/// It simulates std::vector<Value>.
156336809Sdimclass Array {
157336809Sdim  std::vector<Value> V;
158336809Sdim
159336809Sdimpublic:
160336809Sdim  using value_type = Value;
161336809Sdim  using iterator = std::vector<Value>::iterator;
162336809Sdim  using const_iterator = std::vector<Value>::const_iterator;
163336809Sdim
164353358Sdim  Array() = default;
165336809Sdim  explicit Array(std::initializer_list<Value> Elements);
166336809Sdim  template <typename Collection> explicit Array(const Collection &C) {
167336809Sdim    for (const auto &V : C)
168336809Sdim      emplace_back(V);
169336809Sdim  }
170336809Sdim
171336809Sdim  Value &operator[](size_t I) { return V[I]; }
172336809Sdim  const Value &operator[](size_t I) const { return V[I]; }
173336809Sdim  Value &front() { return V.front(); }
174336809Sdim  const Value &front() const { return V.front(); }
175336809Sdim  Value &back() { return V.back(); }
176336809Sdim  const Value &back() const { return V.back(); }
177336809Sdim  Value *data() { return V.data(); }
178336809Sdim  const Value *data() const { return V.data(); }
179336809Sdim
180336809Sdim  iterator begin() { return V.begin(); }
181336809Sdim  const_iterator begin() const { return V.begin(); }
182336809Sdim  iterator end() { return V.end(); }
183336809Sdim  const_iterator end() const { return V.end(); }
184336809Sdim
185336809Sdim  bool empty() const { return V.empty(); }
186336809Sdim  size_t size() const { return V.size(); }
187353358Sdim  void reserve(size_t S) { V.reserve(S); }
188336809Sdim
189336809Sdim  void clear() { V.clear(); }
190336809Sdim  void push_back(const Value &E) { V.push_back(E); }
191336809Sdim  void push_back(Value &&E) { V.push_back(std::move(E)); }
192336809Sdim  template <typename... Args> void emplace_back(Args &&... A) {
193336809Sdim    V.emplace_back(std::forward<Args>(A)...);
194336809Sdim  }
195336809Sdim  void pop_back() { V.pop_back(); }
196336809Sdim  // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees.
197336809Sdim  iterator insert(iterator P, const Value &E) { return V.insert(P, E); }
198336809Sdim  iterator insert(iterator P, Value &&E) {
199336809Sdim    return V.insert(P, std::move(E));
200336809Sdim  }
201336809Sdim  template <typename It> iterator insert(iterator P, It A, It Z) {
202336809Sdim    return V.insert(P, A, Z);
203336809Sdim  }
204336809Sdim  template <typename... Args> iterator emplace(const_iterator P, Args &&... A) {
205336809Sdim    return V.emplace(P, std::forward<Args>(A)...);
206336809Sdim  }
207336809Sdim
208336809Sdim  friend bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
209336809Sdim};
210336809Sdiminline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
211336809Sdim
212336809Sdim/// A Value is an JSON value of unknown type.
213336809Sdim/// They can be copied, but should generally be moved.
214336809Sdim///
215336809Sdim/// === Composing values ===
216336809Sdim///
217336809Sdim/// You can implicitly construct Values from:
218336809Sdim///   - strings: std::string, SmallString, formatv, StringRef, char*
219336809Sdim///              (char*, and StringRef are references, not copies!)
220336809Sdim///   - numbers
221336809Sdim///   - booleans
222336809Sdim///   - null: nullptr
223336809Sdim///   - arrays: {"foo", 42.0, false}
224336809Sdim///   - serializable things: types with toJSON(const T&)->Value, found by ADL
225336809Sdim///
226336809Sdim/// They can also be constructed from object/array helpers:
227336809Sdim///   - json::Object is a type like map<ObjectKey, Value>
228336809Sdim///   - json::Array is a type like vector<Value>
229336809Sdim/// These can be list-initialized, or used to build up collections in a loop.
230336809Sdim/// json::ary(Collection) converts all items in a collection to Values.
231336809Sdim///
232336809Sdim/// === Inspecting values ===
233336809Sdim///
234336809Sdim/// Each Value is one of the JSON kinds:
235336809Sdim///   null    (nullptr_t)
236336809Sdim///   boolean (bool)
237336809Sdim///   number  (double or int64)
238336809Sdim///   string  (StringRef)
239336809Sdim///   array   (json::Array)
240336809Sdim///   object  (json::Object)
241336809Sdim///
242336809Sdim/// The kind can be queried directly, or implicitly via the typed accessors:
243336809Sdim///   if (Optional<StringRef> S = E.getAsString()
244336809Sdim///     assert(E.kind() == Value::String);
245336809Sdim///
246336809Sdim/// Array and Object also have typed indexing accessors for easy traversal:
247336809Sdim///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
248336809Sdim///   if (Object* O = E->getAsObject())
249336809Sdim///     if (Object* Opts = O->getObject("options"))
250336809Sdim///       if (Optional<StringRef> Font = Opts->getString("font"))
251336809Sdim///         assert(Opts->at("font").kind() == Value::String);
252336809Sdim///
253336809Sdim/// === Converting JSON values to C++ types ===
254336809Sdim///
255336809Sdim/// The convention is to have a deserializer function findable via ADL:
256336809Sdim///     fromJSON(const json::Value&, T&)->bool
257336809Sdim/// Deserializers are provided for:
258336809Sdim///   - bool
259336809Sdim///   - int and int64_t
260336809Sdim///   - double
261336809Sdim///   - std::string
262336809Sdim///   - vector<T>, where T is deserializable
263336809Sdim///   - map<string, T>, where T is deserializable
264336809Sdim///   - Optional<T>, where T is deserializable
265336809Sdim/// ObjectMapper can help writing fromJSON() functions for object types.
266336809Sdim///
267336809Sdim/// For conversion in the other direction, the serializer function is:
268336809Sdim///    toJSON(const T&) -> json::Value
269336809Sdim/// If this exists, then it also allows constructing Value from T, and can
270336809Sdim/// be used to serialize vector<T>, map<string, T>, and Optional<T>.
271336809Sdim///
272336809Sdim/// === Serialization ===
273336809Sdim///
274336809Sdim/// Values can be serialized to JSON:
275336809Sdim///   1) raw_ostream << Value                    // Basic formatting.
276336809Sdim///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
277336809Sdim///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
278336809Sdim///
279336809Sdim/// And parsed:
280336809Sdim///   Expected<Value> E = json::parse("[1, 2, null]");
281336809Sdim///   assert(E && E->kind() == Value::Array);
282336809Sdimclass Value {
283336809Sdimpublic:
284336809Sdim  enum Kind {
285336809Sdim    Null,
286336809Sdim    Boolean,
287336809Sdim    /// Number values can store both int64s and doubles at full precision,
288336809Sdim    /// depending on what they were constructed/parsed from.
289336809Sdim    Number,
290336809Sdim    String,
291336809Sdim    Array,
292336809Sdim    Object,
293336809Sdim  };
294336809Sdim
295336809Sdim  // It would be nice to have Value() be null. But that would make {} null too.
296336809Sdim  Value(const Value &M) { copyFrom(M); }
297336809Sdim  Value(Value &&M) { moveFrom(std::move(M)); }
298336809Sdim  Value(std::initializer_list<Value> Elements);
299336809Sdim  Value(json::Array &&Elements) : Type(T_Array) {
300336809Sdim    create<json::Array>(std::move(Elements));
301336809Sdim  }
302344779Sdim  template <typename Elt>
303344779Sdim  Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
304336809Sdim  Value(json::Object &&Properties) : Type(T_Object) {
305336809Sdim    create<json::Object>(std::move(Properties));
306336809Sdim  }
307344779Sdim  template <typename Elt>
308344779Sdim  Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
309336809Sdim  // Strings: types with value semantics. Must be valid UTF-8.
310336809Sdim  Value(std::string V) : Type(T_String) {
311336809Sdim    if (LLVM_UNLIKELY(!isUTF8(V))) {
312336809Sdim      assert(false && "Invalid UTF-8 in value used as JSON");
313336809Sdim      V = fixUTF8(std::move(V));
314336809Sdim    }
315336809Sdim    create<std::string>(std::move(V));
316336809Sdim  }
317336809Sdim  Value(const llvm::SmallVectorImpl<char> &V)
318353358Sdim      : Value(std::string(V.begin(), V.end())) {}
319353358Sdim  Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
320336809Sdim  // Strings: types with reference semantics. Must be valid UTF-8.
321336809Sdim  Value(StringRef V) : Type(T_StringRef) {
322336809Sdim    create<llvm::StringRef>(V);
323336809Sdim    if (LLVM_UNLIKELY(!isUTF8(V))) {
324336809Sdim      assert(false && "Invalid UTF-8 in value used as JSON");
325336809Sdim      *this = Value(fixUTF8(V));
326336809Sdim    }
327336809Sdim  }
328336809Sdim  Value(const char *V) : Value(StringRef(V)) {}
329336809Sdim  Value(std::nullptr_t) : Type(T_Null) {}
330336809Sdim  // Boolean (disallow implicit conversions).
331336809Sdim  // (The last template parameter is a dummy to keep templates distinct.)
332336809Sdim  template <
333336809Sdim      typename T,
334336809Sdim      typename = typename std::enable_if<std::is_same<T, bool>::value>::type,
335336809Sdim      bool = false>
336336809Sdim  Value(T B) : Type(T_Boolean) {
337336809Sdim    create<bool>(B);
338336809Sdim  }
339336809Sdim  // Integers (except boolean). Must be non-narrowing convertible to int64_t.
340336809Sdim  template <
341336809Sdim      typename T,
342336809Sdim      typename = typename std::enable_if<std::is_integral<T>::value>::type,
343336809Sdim      typename = typename std::enable_if<!std::is_same<T, bool>::value>::type>
344336809Sdim  Value(T I) : Type(T_Integer) {
345336809Sdim    create<int64_t>(int64_t{I});
346336809Sdim  }
347336809Sdim  // Floating point. Must be non-narrowing convertible to double.
348336809Sdim  template <typename T,
349336809Sdim            typename =
350336809Sdim                typename std::enable_if<std::is_floating_point<T>::value>::type,
351336809Sdim            double * = nullptr>
352336809Sdim  Value(T D) : Type(T_Double) {
353336809Sdim    create<double>(double{D});
354336809Sdim  }
355336809Sdim  // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
356336809Sdim  template <typename T,
357336809Sdim            typename = typename std::enable_if<std::is_same<
358336809Sdim                Value, decltype(toJSON(*(const T *)nullptr))>::value>,
359336809Sdim            Value * = nullptr>
360336809Sdim  Value(const T &V) : Value(toJSON(V)) {}
361336809Sdim
362336809Sdim  Value &operator=(const Value &M) {
363336809Sdim    destroy();
364336809Sdim    copyFrom(M);
365336809Sdim    return *this;
366336809Sdim  }
367336809Sdim  Value &operator=(Value &&M) {
368336809Sdim    destroy();
369336809Sdim    moveFrom(std::move(M));
370336809Sdim    return *this;
371336809Sdim  }
372336809Sdim  ~Value() { destroy(); }
373336809Sdim
374336809Sdim  Kind kind() const {
375336809Sdim    switch (Type) {
376336809Sdim    case T_Null:
377336809Sdim      return Null;
378336809Sdim    case T_Boolean:
379336809Sdim      return Boolean;
380336809Sdim    case T_Double:
381336809Sdim    case T_Integer:
382336809Sdim      return Number;
383336809Sdim    case T_String:
384336809Sdim    case T_StringRef:
385336809Sdim      return String;
386336809Sdim    case T_Object:
387336809Sdim      return Object;
388336809Sdim    case T_Array:
389336809Sdim      return Array;
390336809Sdim    }
391336809Sdim    llvm_unreachable("Unknown kind");
392336809Sdim  }
393336809Sdim
394336809Sdim  // Typed accessors return None/nullptr if the Value is not of this type.
395336809Sdim  llvm::Optional<std::nullptr_t> getAsNull() const {
396336809Sdim    if (LLVM_LIKELY(Type == T_Null))
397336809Sdim      return nullptr;
398336809Sdim    return llvm::None;
399336809Sdim  }
400336809Sdim  llvm::Optional<bool> getAsBoolean() const {
401336809Sdim    if (LLVM_LIKELY(Type == T_Boolean))
402336809Sdim      return as<bool>();
403336809Sdim    return llvm::None;
404336809Sdim  }
405336809Sdim  llvm::Optional<double> getAsNumber() const {
406336809Sdim    if (LLVM_LIKELY(Type == T_Double))
407336809Sdim      return as<double>();
408336809Sdim    if (LLVM_LIKELY(Type == T_Integer))
409336809Sdim      return as<int64_t>();
410336809Sdim    return llvm::None;
411336809Sdim  }
412336809Sdim  // Succeeds if the Value is a Number, and exactly representable as int64_t.
413336809Sdim  llvm::Optional<int64_t> getAsInteger() const {
414336809Sdim    if (LLVM_LIKELY(Type == T_Integer))
415336809Sdim      return as<int64_t>();
416336809Sdim    if (LLVM_LIKELY(Type == T_Double)) {
417336809Sdim      double D = as<double>();
418336809Sdim      if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
419336809Sdim                      D >= double(std::numeric_limits<int64_t>::min()) &&
420336809Sdim                      D <= double(std::numeric_limits<int64_t>::max())))
421336809Sdim        return D;
422336809Sdim    }
423336809Sdim    return llvm::None;
424336809Sdim  }
425336809Sdim  llvm::Optional<llvm::StringRef> getAsString() const {
426336809Sdim    if (Type == T_String)
427336809Sdim      return llvm::StringRef(as<std::string>());
428336809Sdim    if (LLVM_LIKELY(Type == T_StringRef))
429336809Sdim      return as<llvm::StringRef>();
430336809Sdim    return llvm::None;
431336809Sdim  }
432336809Sdim  const json::Object *getAsObject() const {
433336809Sdim    return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
434336809Sdim  }
435336809Sdim  json::Object *getAsObject() {
436336809Sdim    return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
437336809Sdim  }
438336809Sdim  const json::Array *getAsArray() const {
439336809Sdim    return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
440336809Sdim  }
441336809Sdim  json::Array *getAsArray() {
442336809Sdim    return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
443336809Sdim  }
444336809Sdim
445336809Sdimprivate:
446336809Sdim  void destroy();
447336809Sdim  void copyFrom(const Value &M);
448336809Sdim  // We allow moving from *const* Values, by marking all members as mutable!
449336809Sdim  // This hack is needed to support initializer-list syntax efficiently.
450336809Sdim  // (std::initializer_list<T> is a container of const T).
451336809Sdim  void moveFrom(const Value &&M);
452336809Sdim  friend class Array;
453336809Sdim  friend class Object;
454336809Sdim
455336809Sdim  template <typename T, typename... U> void create(U &&... V) {
456336809Sdim    new (reinterpret_cast<T *>(Union.buffer)) T(std::forward<U>(V)...);
457336809Sdim  }
458336809Sdim  template <typename T> T &as() const {
459344779Sdim    // Using this two-step static_cast via void * instead of reinterpret_cast
460344779Sdim    // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
461344779Sdim    void *Storage = static_cast<void *>(Union.buffer);
462344779Sdim    return *static_cast<T *>(Storage);
463336809Sdim  }
464336809Sdim
465353358Sdim  friend class OStream;
466336809Sdim
467336809Sdim  enum ValueType : char {
468336809Sdim    T_Null,
469336809Sdim    T_Boolean,
470336809Sdim    T_Double,
471336809Sdim    T_Integer,
472336809Sdim    T_StringRef,
473336809Sdim    T_String,
474336809Sdim    T_Object,
475336809Sdim    T_Array,
476336809Sdim  };
477336809Sdim  // All members mutable, see moveFrom().
478336809Sdim  mutable ValueType Type;
479336809Sdim  mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
480336809Sdim                                      std::string, json::Array, json::Object>
481336809Sdim      Union;
482344779Sdim  friend bool operator==(const Value &, const Value &);
483336809Sdim};
484336809Sdim
485336809Sdimbool operator==(const Value &, const Value &);
486336809Sdiminline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
487336809Sdim
488336809Sdim/// ObjectKey is a used to capture keys in Object. Like Value but:
489336809Sdim///   - only strings are allowed
490336809Sdim///   - it's optimized for the string literal case (Owned == nullptr)
491336809Sdim/// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
492336809Sdimclass ObjectKey {
493336809Sdimpublic:
494336809Sdim  ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
495336809Sdim  ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
496336809Sdim    if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
497336809Sdim      assert(false && "Invalid UTF-8 in value used as JSON");
498336809Sdim      *Owned = fixUTF8(std::move(*Owned));
499336809Sdim    }
500336809Sdim    Data = *Owned;
501336809Sdim  }
502336809Sdim  ObjectKey(llvm::StringRef S) : Data(S) {
503336809Sdim    if (LLVM_UNLIKELY(!isUTF8(Data))) {
504336809Sdim      assert(false && "Invalid UTF-8 in value used as JSON");
505336809Sdim      *this = ObjectKey(fixUTF8(S));
506336809Sdim    }
507336809Sdim  }
508336809Sdim  ObjectKey(const llvm::SmallVectorImpl<char> &V)
509336809Sdim      : ObjectKey(std::string(V.begin(), V.end())) {}
510336809Sdim  ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
511336809Sdim
512336809Sdim  ObjectKey(const ObjectKey &C) { *this = C; }
513336809Sdim  ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
514336809Sdim  ObjectKey &operator=(const ObjectKey &C) {
515336809Sdim    if (C.Owned) {
516336809Sdim      Owned.reset(new std::string(*C.Owned));
517336809Sdim      Data = *Owned;
518336809Sdim    } else {
519336809Sdim      Data = C.Data;
520336809Sdim    }
521336809Sdim    return *this;
522336809Sdim  }
523336809Sdim  ObjectKey &operator=(ObjectKey &&) = default;
524336809Sdim
525336809Sdim  operator llvm::StringRef() const { return Data; }
526336809Sdim  std::string str() const { return Data.str(); }
527336809Sdim
528336809Sdimprivate:
529336809Sdim  // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
530336809Sdim  // could be 2 pointers at most.
531336809Sdim  std::unique_ptr<std::string> Owned;
532336809Sdim  llvm::StringRef Data;
533336809Sdim};
534336809Sdim
535336809Sdiminline bool operator==(const ObjectKey &L, const ObjectKey &R) {
536336809Sdim  return llvm::StringRef(L) == llvm::StringRef(R);
537336809Sdim}
538336809Sdiminline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
539336809Sdim  return !(L == R);
540336809Sdim}
541336809Sdiminline bool operator<(const ObjectKey &L, const ObjectKey &R) {
542336809Sdim  return StringRef(L) < StringRef(R);
543336809Sdim}
544336809Sdim
545336809Sdimstruct Object::KV {
546336809Sdim  ObjectKey K;
547336809Sdim  Value V;
548336809Sdim};
549336809Sdim
550336809Sdiminline Object::Object(std::initializer_list<KV> Properties) {
551336809Sdim  for (const auto &P : Properties) {
552336809Sdim    auto R = try_emplace(P.K, nullptr);
553336809Sdim    if (R.second)
554336809Sdim      R.first->getSecond().moveFrom(std::move(P.V));
555336809Sdim  }
556336809Sdim}
557336809Sdiminline std::pair<Object::iterator, bool> Object::insert(KV E) {
558336809Sdim  return try_emplace(std::move(E.K), std::move(E.V));
559336809Sdim}
560360784Sdiminline bool Object::erase(StringRef K) {
561360784Sdim  return M.erase(ObjectKey(K));
562360784Sdim}
563336809Sdim
564336809Sdim// Standard deserializers are provided for primitive types.
565336809Sdim// See comments on Value.
566336809Sdiminline bool fromJSON(const Value &E, std::string &Out) {
567336809Sdim  if (auto S = E.getAsString()) {
568336809Sdim    Out = *S;
569336809Sdim    return true;
570336809Sdim  }
571336809Sdim  return false;
572336809Sdim}
573336809Sdiminline bool fromJSON(const Value &E, int &Out) {
574336809Sdim  if (auto S = E.getAsInteger()) {
575336809Sdim    Out = *S;
576336809Sdim    return true;
577336809Sdim  }
578336809Sdim  return false;
579336809Sdim}
580336809Sdiminline bool fromJSON(const Value &E, int64_t &Out) {
581336809Sdim  if (auto S = E.getAsInteger()) {
582336809Sdim    Out = *S;
583336809Sdim    return true;
584336809Sdim  }
585336809Sdim  return false;
586336809Sdim}
587336809Sdiminline bool fromJSON(const Value &E, double &Out) {
588336809Sdim  if (auto S = E.getAsNumber()) {
589336809Sdim    Out = *S;
590336809Sdim    return true;
591336809Sdim  }
592336809Sdim  return false;
593336809Sdim}
594336809Sdiminline bool fromJSON(const Value &E, bool &Out) {
595336809Sdim  if (auto S = E.getAsBoolean()) {
596336809Sdim    Out = *S;
597336809Sdim    return true;
598336809Sdim  }
599336809Sdim  return false;
600336809Sdim}
601336809Sdimtemplate <typename T> bool fromJSON(const Value &E, llvm::Optional<T> &Out) {
602336809Sdim  if (E.getAsNull()) {
603336809Sdim    Out = llvm::None;
604336809Sdim    return true;
605336809Sdim  }
606336809Sdim  T Result;
607336809Sdim  if (!fromJSON(E, Result))
608336809Sdim    return false;
609336809Sdim  Out = std::move(Result);
610336809Sdim  return true;
611336809Sdim}
612336809Sdimtemplate <typename T> bool fromJSON(const Value &E, std::vector<T> &Out) {
613336809Sdim  if (auto *A = E.getAsArray()) {
614336809Sdim    Out.clear();
615336809Sdim    Out.resize(A->size());
616336809Sdim    for (size_t I = 0; I < A->size(); ++I)
617336809Sdim      if (!fromJSON((*A)[I], Out[I]))
618336809Sdim        return false;
619336809Sdim    return true;
620336809Sdim  }
621336809Sdim  return false;
622336809Sdim}
623336809Sdimtemplate <typename T>
624336809Sdimbool fromJSON(const Value &E, std::map<std::string, T> &Out) {
625336809Sdim  if (auto *O = E.getAsObject()) {
626336809Sdim    Out.clear();
627336809Sdim    for (const auto &KV : *O)
628336809Sdim      if (!fromJSON(KV.second, Out[llvm::StringRef(KV.first)]))
629336809Sdim        return false;
630336809Sdim    return true;
631336809Sdim  }
632336809Sdim  return false;
633336809Sdim}
634336809Sdim
635336809Sdim// Allow serialization of Optional<T> for supported T.
636336809Sdimtemplate <typename T> Value toJSON(const llvm::Optional<T> &Opt) {
637336809Sdim  return Opt ? Value(*Opt) : Value(nullptr);
638336809Sdim}
639336809Sdim
640336809Sdim/// Helper for mapping JSON objects onto protocol structs.
641336809Sdim///
642336809Sdim/// Example:
643336809Sdim/// \code
644336809Sdim///   bool fromJSON(const Value &E, MyStruct &R) {
645336809Sdim///     ObjectMapper O(E);
646336809Sdim///     if (!O || !O.map("mandatory_field", R.MandatoryField))
647336809Sdim///       return false;
648336809Sdim///     O.map("optional_field", R.OptionalField);
649336809Sdim///     return true;
650336809Sdim///   }
651336809Sdim/// \endcode
652336809Sdimclass ObjectMapper {
653336809Sdimpublic:
654336809Sdim  ObjectMapper(const Value &E) : O(E.getAsObject()) {}
655336809Sdim
656336809Sdim  /// True if the expression is an object.
657336809Sdim  /// Must be checked before calling map().
658336809Sdim  operator bool() { return O; }
659336809Sdim
660336809Sdim  /// Maps a property to a field, if it exists.
661336809Sdim  template <typename T> bool map(StringRef Prop, T &Out) {
662336809Sdim    assert(*this && "Must check this is an object before calling map()");
663336809Sdim    if (const Value *E = O->get(Prop))
664336809Sdim      return fromJSON(*E, Out);
665336809Sdim    return false;
666336809Sdim  }
667336809Sdim
668336809Sdim  /// Maps a property to a field, if it exists.
669336809Sdim  /// (Optional requires special handling, because missing keys are OK).
670336809Sdim  template <typename T> bool map(StringRef Prop, llvm::Optional<T> &Out) {
671336809Sdim    assert(*this && "Must check this is an object before calling map()");
672336809Sdim    if (const Value *E = O->get(Prop))
673336809Sdim      return fromJSON(*E, Out);
674336809Sdim    Out = llvm::None;
675336809Sdim    return true;
676336809Sdim  }
677336809Sdim
678336809Sdimprivate:
679336809Sdim  const Object *O;
680336809Sdim};
681336809Sdim
682336809Sdim/// Parses the provided JSON source, or returns a ParseError.
683336809Sdim/// The returned Value is self-contained and owns its strings (they do not refer
684336809Sdim/// to the original source).
685336809Sdimllvm::Expected<Value> parse(llvm::StringRef JSON);
686336809Sdim
687336809Sdimclass ParseError : public llvm::ErrorInfo<ParseError> {
688336809Sdim  const char *Msg;
689336809Sdim  unsigned Line, Column, Offset;
690336809Sdim
691336809Sdimpublic:
692336809Sdim  static char ID;
693336809Sdim  ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
694336809Sdim      : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
695336809Sdim  void log(llvm::raw_ostream &OS) const override {
696336809Sdim    OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
697336809Sdim  }
698336809Sdim  std::error_code convertToErrorCode() const override {
699336809Sdim    return llvm::inconvertibleErrorCode();
700336809Sdim  }
701336809Sdim};
702353358Sdim
703353358Sdim/// json::OStream allows writing well-formed JSON without materializing
704353358Sdim/// all structures as json::Value ahead of time.
705353358Sdim/// It's faster, lower-level, and less safe than OS << json::Value.
706353358Sdim///
707353358Sdim/// Only one "top-level" object can be written to a stream.
708353358Sdim/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
709353358Sdim///
710353358Sdim///   json::OStream J(OS);
711353358Sdim///   J.array([&]{
712353358Sdim///     for (const Event &E : Events)
713353358Sdim///       J.object([&] {
714353358Sdim///         J.attribute("timestamp", int64_t(E.Time));
715353358Sdim///         J.attributeArray("participants", [&] {
716353358Sdim///           for (const Participant &P : E.Participants)
717360784Sdim///             J.value(P.toString());
718353358Sdim///         });
719353358Sdim///       });
720353358Sdim///   });
721353358Sdim///
722353358Sdim/// This would produce JSON like:
723353358Sdim///
724353358Sdim///   [
725353358Sdim///     {
726353358Sdim///       "timestamp": 19287398741,
727353358Sdim///       "participants": [
728353358Sdim///         "King Kong",
729353358Sdim///         "Miley Cyrus",
730353358Sdim///         "Cleopatra"
731353358Sdim///       ]
732353358Sdim///     },
733353358Sdim///     ...
734353358Sdim///   ]
735353358Sdim///
736353358Sdim/// The lower level begin/end methods (arrayBegin()) are more flexible but
737353358Sdim/// care must be taken to pair them correctly:
738353358Sdim///
739353358Sdim///   json::OStream J(OS);
740353358Sdim//    J.arrayBegin();
741353358Sdim///   for (const Event &E : Events) {
742353358Sdim///     J.objectBegin();
743353358Sdim///     J.attribute("timestamp", int64_t(E.Time));
744353358Sdim///     J.attributeBegin("participants");
745353358Sdim///     for (const Participant &P : E.Participants)
746353358Sdim///       J.value(P.toString());
747353358Sdim///     J.attributeEnd();
748353358Sdim///     J.objectEnd();
749353358Sdim///   }
750353358Sdim///   J.arrayEnd();
751353358Sdim///
752353358Sdim/// If the call sequence isn't valid JSON, asserts will fire in debug mode.
753353358Sdim/// This can be mismatched begin()/end() pairs, trying to emit attributes inside
754353358Sdim/// an array, and so on.
755353358Sdim/// With asserts disabled, this is undefined behavior.
756353358Sdimclass OStream {
757353358Sdim public:
758353358Sdim  using Block = llvm::function_ref<void()>;
759353358Sdim  // If IndentSize is nonzero, output is pretty-printed.
760353358Sdim  explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
761353358Sdim      : OS(OS), IndentSize(IndentSize) {
762353358Sdim    Stack.emplace_back();
763353358Sdim  }
764353358Sdim  ~OStream() {
765353358Sdim    assert(Stack.size() == 1 && "Unmatched begin()/end()");
766353358Sdim    assert(Stack.back().Ctx == Singleton);
767353358Sdim    assert(Stack.back().HasValue && "Did not write top-level value");
768353358Sdim  }
769353358Sdim
770353358Sdim  /// Flushes the underlying ostream. OStream does not buffer internally.
771353358Sdim  void flush() { OS.flush(); }
772353358Sdim
773353358Sdim  // High level functions to output a value.
774353358Sdim  // Valid at top-level (exactly once), in an attribute value (exactly once),
775353358Sdim  // or in an array (any number of times).
776353358Sdim
777353358Sdim  /// Emit a self-contained value (number, string, vector<string> etc).
778353358Sdim  void value(const Value &V);
779353358Sdim  /// Emit an array whose elements are emitted in the provided Block.
780353358Sdim  void array(Block Contents) {
781353358Sdim    arrayBegin();
782353358Sdim    Contents();
783353358Sdim    arrayEnd();
784353358Sdim  }
785353358Sdim  /// Emit an object whose elements are emitted in the provided Block.
786353358Sdim  void object(Block Contents) {
787353358Sdim    objectBegin();
788353358Sdim    Contents();
789353358Sdim    objectEnd();
790353358Sdim  }
791353358Sdim
792353358Sdim  // High level functions to output object attributes.
793353358Sdim  // Valid only within an object (any number of times).
794353358Sdim
795353358Sdim  /// Emit an attribute whose value is self-contained (number, vector<int> etc).
796353358Sdim  void attribute(llvm::StringRef Key, const Value& Contents) {
797353358Sdim    attributeImpl(Key, [&] { value(Contents); });
798353358Sdim  }
799353358Sdim  /// Emit an attribute whose value is an array with elements from the Block.
800353358Sdim  void attributeArray(llvm::StringRef Key, Block Contents) {
801353358Sdim    attributeImpl(Key, [&] { array(Contents); });
802353358Sdim  }
803353358Sdim  /// Emit an attribute whose value is an object with attributes from the Block.
804353358Sdim  void attributeObject(llvm::StringRef Key, Block Contents) {
805353358Sdim    attributeImpl(Key, [&] { object(Contents); });
806353358Sdim  }
807353358Sdim
808353358Sdim  // Low-level begin/end functions to output arrays, objects, and attributes.
809353358Sdim  // Must be correctly paired. Allowed contexts are as above.
810353358Sdim
811353358Sdim  void arrayBegin();
812353358Sdim  void arrayEnd();
813353358Sdim  void objectBegin();
814353358Sdim  void objectEnd();
815353358Sdim  void attributeBegin(llvm::StringRef Key);
816353358Sdim  void attributeEnd();
817353358Sdim
818353358Sdim private:
819353358Sdim  void attributeImpl(llvm::StringRef Key, Block Contents) {
820353358Sdim    attributeBegin(Key);
821353358Sdim    Contents();
822353358Sdim    attributeEnd();
823353358Sdim  }
824353358Sdim
825353358Sdim  void valueBegin();
826353358Sdim  void newline();
827353358Sdim
828353358Sdim  enum Context {
829353358Sdim    Singleton, // Top level, or object attribute.
830353358Sdim    Array,
831353358Sdim    Object,
832353358Sdim  };
833353358Sdim  struct State {
834353358Sdim    Context Ctx = Singleton;
835353358Sdim    bool HasValue = false;
836353358Sdim  };
837353358Sdim  llvm::SmallVector<State, 16> Stack; // Never empty.
838353358Sdim  llvm::raw_ostream &OS;
839353358Sdim  unsigned IndentSize;
840353358Sdim  unsigned Indent = 0;
841353358Sdim};
842353358Sdim
843353358Sdim/// Serializes this Value to JSON, writing it to the provided stream.
844353358Sdim/// The formatting is compact (no extra whitespace) and deterministic.
845353358Sdim/// For pretty-printing, use the formatv() format_provider below.
846353358Sdiminline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
847353358Sdim  OStream(OS).value(V);
848353358Sdim  return OS;
849353358Sdim}
850336809Sdim} // namespace json
851336809Sdim
852336809Sdim/// Allow printing json::Value with formatv().
853336809Sdim/// The default style is basic/compact formatting, like operator<<.
854336809Sdim/// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
855336809Sdimtemplate <> struct format_provider<llvm::json::Value> {
856336809Sdim  static void format(const llvm::json::Value &, raw_ostream &, StringRef);
857336809Sdim};
858336809Sdim} // namespace llvm
859336809Sdim
860336809Sdim#endif
861