1//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8///
9/// \file
10/// This file supports working with JSON data.
11///
12/// It comprises:
13///
14/// - classes which hold dynamically-typed parsed JSON structures
15///   These are value types that can be composed, inspected, and modified.
16///   See json::Value, and the related types json::Object and json::Array.
17///
18/// - functions to parse JSON text into Values, and to serialize Values to text.
19///   See parse(), operator<<, and format_provider.
20///
21/// - a convention and helpers for mapping between json::Value and user-defined
22///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
23///
24/// - an output API json::OStream which can emit JSON without materializing
25///   all structures as json::Value.
26///
27/// Typically, JSON data would be read from an external source, parsed into
28/// a Value, and then converted into some native data structure before doing
29/// real work on it. (And vice versa when writing).
30///
31/// Other serialization mechanisms you may consider:
32///
33/// - YAML is also text-based, and more human-readable than JSON. It's a more
34///   complex format and data model, and YAML parsers aren't ubiquitous.
35///   YAMLParser.h is a streaming parser suitable for parsing large documents
36///   (including JSON, as YAML is a superset). It can be awkward to use
37///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38///   declarative than the toJSON/fromJSON conventions here.
39///
40/// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
42///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
43///
44//===---------------------------------------------------------------------===//
45
46#ifndef LLVM_SUPPORT_JSON_H
47#define LLVM_SUPPORT_JSON_H
48
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/STLFunctionalExtras.h"
51#include "llvm/ADT/SmallVector.h"
52#include "llvm/ADT/StringRef.h"
53#include "llvm/Support/Compiler.h"
54#include "llvm/Support/Error.h"
55#include "llvm/Support/FormatVariadic.h"
56#include "llvm/Support/raw_ostream.h"
57#include <cmath>
58#include <map>
59
60namespace llvm {
61namespace json {
62
63// === String encodings ===
64//
65// JSON strings are character sequences (not byte sequences like std::string).
66// We need to know the encoding, and for simplicity only support UTF-8.
67//
68//   - When parsing, invalid UTF-8 is a syntax error like any other
69//
70//   - When creating Values from strings, callers must ensure they are UTF-8.
71//        with asserts on, invalid UTF-8 will crash the program
72//        with asserts off, we'll substitute the replacement character (U+FFFD)
73//     Callers can use json::isUTF8() and json::fixUTF8() for validation.
74//
75//   - When retrieving strings from Values (e.g. asString()), the result will
76//     always be valid UTF-8.
77
78template <typename T>
79constexpr bool is_uint_64_bit_v =
80    std::is_integral_v<T> && std::is_unsigned_v<T> &&
81    sizeof(T) == sizeof(uint64_t);
82
83/// Returns true if \p S is valid UTF-8, which is required for use as JSON.
84/// If it returns false, \p Offset is set to a byte offset near the first error.
85bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
86/// Replaces invalid UTF-8 sequences in \p S with the replacement character
87/// (U+FFFD). The returned string is valid UTF-8.
88/// This is much slower than isUTF8, so test that first.
89std::string fixUTF8(llvm::StringRef S);
90
91class Array;
92class ObjectKey;
93class Value;
94template <typename T> Value toJSON(const std::optional<T> &Opt);
95
96/// An Object is a JSON object, which maps strings to heterogenous JSON values.
97/// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
98class Object {
99  using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
100  Storage M;
101
102public:
103  using key_type = ObjectKey;
104  using mapped_type = Value;
105  using value_type = Storage::value_type;
106  using iterator = Storage::iterator;
107  using const_iterator = Storage::const_iterator;
108
109  Object() = default;
110  // KV is a trivial key-value struct for list-initialization.
111  // (using std::pair forces extra copies).
112  struct KV;
113  explicit Object(std::initializer_list<KV> Properties);
114
115  iterator begin() { return M.begin(); }
116  const_iterator begin() const { return M.begin(); }
117  iterator end() { return M.end(); }
118  const_iterator end() const { return M.end(); }
119
120  bool empty() const { return M.empty(); }
121  size_t size() const { return M.size(); }
122
123  void clear() { M.clear(); }
124  std::pair<iterator, bool> insert(KV E);
125  template <typename... Ts>
126  std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
127    return M.try_emplace(K, std::forward<Ts>(Args)...);
128  }
129  template <typename... Ts>
130  std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
131    return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
132  }
133  bool erase(StringRef K);
134  void erase(iterator I) { M.erase(I); }
135
136  iterator find(StringRef K) { return M.find_as(K); }
137  const_iterator find(StringRef K) const { return M.find_as(K); }
138  // operator[] acts as if Value was default-constructible as null.
139  Value &operator[](const ObjectKey &K);
140  Value &operator[](ObjectKey &&K);
141  // Look up a property, returning nullptr if it doesn't exist.
142  Value *get(StringRef K);
143  const Value *get(StringRef K) const;
144  // Typed accessors return std::nullopt/nullptr if
145  //   - the property doesn't exist
146  //   - or it has the wrong type
147  std::optional<std::nullptr_t> getNull(StringRef K) const;
148  std::optional<bool> getBoolean(StringRef K) const;
149  std::optional<double> getNumber(StringRef K) const;
150  std::optional<int64_t> getInteger(StringRef K) const;
151  std::optional<llvm::StringRef> getString(StringRef K) const;
152  const json::Object *getObject(StringRef K) const;
153  json::Object *getObject(StringRef K);
154  const json::Array *getArray(StringRef K) const;
155  json::Array *getArray(StringRef K);
156};
157bool operator==(const Object &LHS, const Object &RHS);
158inline bool operator!=(const Object &LHS, const Object &RHS) {
159  return !(LHS == RHS);
160}
161
162/// An Array is a JSON array, which contains heterogeneous JSON values.
163/// It simulates std::vector<Value>.
164class Array {
165  std::vector<Value> V;
166
167public:
168  using value_type = Value;
169  using iterator = std::vector<Value>::iterator;
170  using const_iterator = std::vector<Value>::const_iterator;
171
172  Array() = default;
173  explicit Array(std::initializer_list<Value> Elements);
174  template <typename Collection> explicit Array(const Collection &C) {
175    for (const auto &V : C)
176      emplace_back(V);
177  }
178
179  Value &operator[](size_t I);
180  const Value &operator[](size_t I) const;
181  Value &front();
182  const Value &front() const;
183  Value &back();
184  const Value &back() const;
185  Value *data();
186  const Value *data() const;
187
188  iterator begin();
189  const_iterator begin() const;
190  iterator end();
191  const_iterator end() const;
192
193  bool empty() const;
194  size_t size() const;
195  void reserve(size_t S);
196
197  void clear();
198  void push_back(const Value &E);
199  void push_back(Value &&E);
200  template <typename... Args> void emplace_back(Args &&...A);
201  void pop_back();
202  iterator insert(const_iterator P, const Value &E);
203  iterator insert(const_iterator P, Value &&E);
204  template <typename It> iterator insert(const_iterator P, It A, It Z);
205  template <typename... Args> iterator emplace(const_iterator P, Args &&...A);
206
207  friend bool operator==(const Array &L, const Array &R);
208};
209inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
210
211/// A Value is an JSON value of unknown type.
212/// They can be copied, but should generally be moved.
213///
214/// === Composing values ===
215///
216/// You can implicitly construct Values from:
217///   - strings: std::string, SmallString, formatv, StringRef, char*
218///              (char*, and StringRef are references, not copies!)
219///   - numbers
220///   - booleans
221///   - null: nullptr
222///   - arrays: {"foo", 42.0, false}
223///   - serializable things: types with toJSON(const T&)->Value, found by ADL
224///
225/// They can also be constructed from object/array helpers:
226///   - json::Object is a type like map<ObjectKey, Value>
227///   - json::Array is a type like vector<Value>
228/// These can be list-initialized, or used to build up collections in a loop.
229/// json::ary(Collection) converts all items in a collection to Values.
230///
231/// === Inspecting values ===
232///
233/// Each Value is one of the JSON kinds:
234///   null    (nullptr_t)
235///   boolean (bool)
236///   number  (double, int64 or uint64)
237///   string  (StringRef)
238///   array   (json::Array)
239///   object  (json::Object)
240///
241/// The kind can be queried directly, or implicitly via the typed accessors:
242///   if (std::optional<StringRef> S = E.getAsString()
243///     assert(E.kind() == Value::String);
244///
245/// Array and Object also have typed indexing accessors for easy traversal:
246///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
247///   if (Object* O = E->getAsObject())
248///     if (Object* Opts = O->getObject("options"))
249///       if (std::optional<StringRef> Font = Opts->getString("font"))
250///         assert(Opts->at("font").kind() == Value::String);
251///
252/// === Converting JSON values to C++ types ===
253///
254/// The convention is to have a deserializer function findable via ADL:
255///     fromJSON(const json::Value&, T&, Path) -> bool
256///
257/// The return value indicates overall success, and Path is used for precise
258/// error reporting. (The Path::Root passed in at the top level fromJSON call
259/// captures any nested error and can render it in context).
260/// If conversion fails, fromJSON calls Path::report() and immediately returns.
261/// This ensures that the first fatal error survives.
262///
263/// Deserializers are provided for:
264///   - bool
265///   - int and int64_t
266///   - double
267///   - std::string
268///   - vector<T>, where T is deserializable
269///   - map<string, T>, where T is deserializable
270///   - std::optional<T>, where T is deserializable
271/// ObjectMapper can help writing fromJSON() functions for object types.
272///
273/// For conversion in the other direction, the serializer function is:
274///    toJSON(const T&) -> json::Value
275/// If this exists, then it also allows constructing Value from T, and can
276/// be used to serialize vector<T>, map<string, T>, and std::optional<T>.
277///
278/// === Serialization ===
279///
280/// Values can be serialized to JSON:
281///   1) raw_ostream << Value                    // Basic formatting.
282///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
283///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
284///
285/// And parsed:
286///   Expected<Value> E = json::parse("[1, 2, null]");
287///   assert(E && E->kind() == Value::Array);
288class Value {
289public:
290  enum Kind {
291    Null,
292    Boolean,
293    /// Number values can store both int64s and doubles at full precision,
294    /// depending on what they were constructed/parsed from.
295    Number,
296    String,
297    Array,
298    Object,
299  };
300
301  // It would be nice to have Value() be null. But that would make {} null too.
302  Value(const Value &M) { copyFrom(M); }
303  Value(Value &&M) { moveFrom(std::move(M)); }
304  Value(std::initializer_list<Value> Elements);
305  Value(json::Array &&Elements) : Type(T_Array) {
306    create<json::Array>(std::move(Elements));
307  }
308  template <typename Elt>
309  Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
310  Value(json::Object &&Properties) : Type(T_Object) {
311    create<json::Object>(std::move(Properties));
312  }
313  template <typename Elt>
314  Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
315  // Strings: types with value semantics. Must be valid UTF-8.
316  Value(std::string V) : Type(T_String) {
317    if (LLVM_UNLIKELY(!isUTF8(V))) {
318      assert(false && "Invalid UTF-8 in value used as JSON");
319      V = fixUTF8(std::move(V));
320    }
321    create<std::string>(std::move(V));
322  }
323  Value(const llvm::SmallVectorImpl<char> &V)
324      : Value(std::string(V.begin(), V.end())) {}
325  Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
326  // Strings: types with reference semantics. Must be valid UTF-8.
327  Value(StringRef V) : Type(T_StringRef) {
328    create<llvm::StringRef>(V);
329    if (LLVM_UNLIKELY(!isUTF8(V))) {
330      assert(false && "Invalid UTF-8 in value used as JSON");
331      *this = Value(fixUTF8(V));
332    }
333  }
334  Value(const char *V) : Value(StringRef(V)) {}
335  Value(std::nullptr_t) : Type(T_Null) {}
336  // Boolean (disallow implicit conversions).
337  // (The last template parameter is a dummy to keep templates distinct.)
338  template <typename T, typename = std::enable_if_t<std::is_same_v<T, bool>>,
339            bool = false>
340  Value(T B) : Type(T_Boolean) {
341    create<bool>(B);
342  }
343
344  // Unsigned 64-bit integers.
345  template <typename T, typename = std::enable_if_t<is_uint_64_bit_v<T>>>
346  Value(T V) : Type(T_UINT64) {
347    create<uint64_t>(uint64_t{V});
348  }
349
350  // Integers (except boolean and uint64_t).
351  // Must be non-narrowing convertible to int64_t.
352  template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>,
353            typename = std::enable_if_t<!std::is_same_v<T, bool>>,
354            typename = std::enable_if_t<!is_uint_64_bit_v<T>>>
355  Value(T I) : Type(T_Integer) {
356    create<int64_t>(int64_t{I});
357  }
358  // Floating point. Must be non-narrowing convertible to double.
359  template <typename T,
360            typename = std::enable_if_t<std::is_floating_point_v<T>>,
361            double * = nullptr>
362  Value(T D) : Type(T_Double) {
363    create<double>(double{D});
364  }
365  // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
366  template <typename T,
367            typename = std::enable_if_t<
368                std::is_same_v<Value, decltype(toJSON(*(const T *)nullptr))>>,
369            Value * = nullptr>
370  Value(const T &V) : Value(toJSON(V)) {}
371
372  Value &operator=(const Value &M) {
373    destroy();
374    copyFrom(M);
375    return *this;
376  }
377  Value &operator=(Value &&M) {
378    destroy();
379    moveFrom(std::move(M));
380    return *this;
381  }
382  ~Value() { destroy(); }
383
384  Kind kind() const {
385    switch (Type) {
386    case T_Null:
387      return Null;
388    case T_Boolean:
389      return Boolean;
390    case T_Double:
391    case T_Integer:
392    case T_UINT64:
393      return Number;
394    case T_String:
395    case T_StringRef:
396      return String;
397    case T_Object:
398      return Object;
399    case T_Array:
400      return Array;
401    }
402    llvm_unreachable("Unknown kind");
403  }
404
405  // Typed accessors return std::nullopt/nullptr if the Value is not of this
406  // type.
407  std::optional<std::nullptr_t> getAsNull() const {
408    if (LLVM_LIKELY(Type == T_Null))
409      return nullptr;
410    return std::nullopt;
411  }
412  std::optional<bool> getAsBoolean() const {
413    if (LLVM_LIKELY(Type == T_Boolean))
414      return as<bool>();
415    return std::nullopt;
416  }
417  std::optional<double> getAsNumber() const {
418    if (LLVM_LIKELY(Type == T_Double))
419      return as<double>();
420    if (LLVM_LIKELY(Type == T_Integer))
421      return as<int64_t>();
422    if (LLVM_LIKELY(Type == T_UINT64))
423      return as<uint64_t>();
424    return std::nullopt;
425  }
426  // Succeeds if the Value is a Number, and exactly representable as int64_t.
427  std::optional<int64_t> getAsInteger() const {
428    if (LLVM_LIKELY(Type == T_Integer))
429      return as<int64_t>();
430    if (LLVM_LIKELY(Type == T_UINT64)) {
431      uint64_t U = as<uint64_t>();
432      if (LLVM_LIKELY(U <= uint64_t(std::numeric_limits<int64_t>::max()))) {
433        return U;
434      }
435    }
436    if (LLVM_LIKELY(Type == T_Double)) {
437      double D = as<double>();
438      if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
439                      D >= double(std::numeric_limits<int64_t>::min()) &&
440                      D <= double(std::numeric_limits<int64_t>::max())))
441        return D;
442    }
443    return std::nullopt;
444  }
445  std::optional<uint64_t> getAsUINT64() const {
446    if (Type == T_UINT64)
447      return as<uint64_t>();
448    else if (Type == T_Integer) {
449      int64_t N = as<int64_t>();
450      if (N >= 0)
451        return as<uint64_t>();
452    }
453    return std::nullopt;
454  }
455  std::optional<llvm::StringRef> getAsString() const {
456    if (Type == T_String)
457      return llvm::StringRef(as<std::string>());
458    if (LLVM_LIKELY(Type == T_StringRef))
459      return as<llvm::StringRef>();
460    return std::nullopt;
461  }
462  const json::Object *getAsObject() const {
463    return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
464  }
465  json::Object *getAsObject() {
466    return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
467  }
468  const json::Array *getAsArray() const {
469    return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
470  }
471  json::Array *getAsArray() {
472    return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
473  }
474
475private:
476  void destroy();
477  void copyFrom(const Value &M);
478  // We allow moving from *const* Values, by marking all members as mutable!
479  // This hack is needed to support initializer-list syntax efficiently.
480  // (std::initializer_list<T> is a container of const T).
481  void moveFrom(const Value &&M);
482  friend class Array;
483  friend class Object;
484
485  template <typename T, typename... U> void create(U &&... V) {
486#if LLVM_ADDRESS_SANITIZER_BUILD
487    // Unpoisoning to prevent overwriting poisoned object (e.g., annotated short
488    // string). Objects that have had their memory poisoned may cause an ASan
489    // error if their memory is reused without calling their destructor.
490    // Unpoisoning the memory prevents this error from occurring.
491    // FIXME: This is a temporary solution to prevent buildbots from failing.
492    //  The more appropriate approach would be to call the object's destructor
493    //  to unpoison memory. This would prevent any potential memory leaks (long
494    //  strings). Read for details:
495    //  https://github.com/llvm/llvm-project/pull/79065#discussion_r1462621761
496    __asan_unpoison_memory_region(&Union, sizeof(T));
497#endif
498    new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
499  }
500  template <typename T> T &as() const {
501    // Using this two-step static_cast via void * instead of reinterpret_cast
502    // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
503    void *Storage = static_cast<void *>(&Union);
504    return *static_cast<T *>(Storage);
505  }
506
507  friend class OStream;
508
509  enum ValueType : char16_t {
510    T_Null,
511    T_Boolean,
512    T_Double,
513    T_Integer,
514    T_UINT64,
515    T_StringRef,
516    T_String,
517    T_Object,
518    T_Array,
519  };
520  // All members mutable, see moveFrom().
521  mutable ValueType Type;
522  mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
523                                      llvm::StringRef, std::string, json::Array,
524                                      json::Object>
525      Union;
526  friend bool operator==(const Value &, const Value &);
527};
528
529bool operator==(const Value &, const Value &);
530inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
531
532// Array Methods
533inline Value &Array::operator[](size_t I) { return V[I]; }
534inline const Value &Array::operator[](size_t I) const { return V[I]; }
535inline Value &Array::front() { return V.front(); }
536inline const Value &Array::front() const { return V.front(); }
537inline Value &Array::back() { return V.back(); }
538inline const Value &Array::back() const { return V.back(); }
539inline Value *Array::data() { return V.data(); }
540inline const Value *Array::data() const { return V.data(); }
541
542inline typename Array::iterator Array::begin() { return V.begin(); }
543inline typename Array::const_iterator Array::begin() const { return V.begin(); }
544inline typename Array::iterator Array::end() { return V.end(); }
545inline typename Array::const_iterator Array::end() const { return V.end(); }
546
547inline bool Array::empty() const { return V.empty(); }
548inline size_t Array::size() const { return V.size(); }
549inline void Array::reserve(size_t S) { V.reserve(S); }
550
551inline void Array::clear() { V.clear(); }
552inline void Array::push_back(const Value &E) { V.push_back(E); }
553inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); }
554template <typename... Args> inline void Array::emplace_back(Args &&...A) {
555  V.emplace_back(std::forward<Args>(A)...);
556}
557inline void Array::pop_back() { V.pop_back(); }
558inline typename Array::iterator Array::insert(const_iterator P, const Value &E) {
559  return V.insert(P, E);
560}
561inline typename Array::iterator Array::insert(const_iterator P, Value &&E) {
562  return V.insert(P, std::move(E));
563}
564template <typename It>
565inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) {
566  return V.insert(P, A, Z);
567}
568template <typename... Args>
569inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) {
570  return V.emplace(P, std::forward<Args>(A)...);
571}
572inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
573
574/// ObjectKey is a used to capture keys in Object. Like Value but:
575///   - only strings are allowed
576///   - it's optimized for the string literal case (Owned == nullptr)
577/// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
578class ObjectKey {
579public:
580  ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
581  ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
582    if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
583      assert(false && "Invalid UTF-8 in value used as JSON");
584      *Owned = fixUTF8(std::move(*Owned));
585    }
586    Data = *Owned;
587  }
588  ObjectKey(llvm::StringRef S) : Data(S) {
589    if (LLVM_UNLIKELY(!isUTF8(Data))) {
590      assert(false && "Invalid UTF-8 in value used as JSON");
591      *this = ObjectKey(fixUTF8(S));
592    }
593  }
594  ObjectKey(const llvm::SmallVectorImpl<char> &V)
595      : ObjectKey(std::string(V.begin(), V.end())) {}
596  ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
597
598  ObjectKey(const ObjectKey &C) { *this = C; }
599  ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
600  ObjectKey &operator=(const ObjectKey &C) {
601    if (C.Owned) {
602      Owned.reset(new std::string(*C.Owned));
603      Data = *Owned;
604    } else {
605      Data = C.Data;
606    }
607    return *this;
608  }
609  ObjectKey &operator=(ObjectKey &&) = default;
610
611  operator llvm::StringRef() const { return Data; }
612  std::string str() const { return Data.str(); }
613
614private:
615  // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
616  // could be 2 pointers at most.
617  std::unique_ptr<std::string> Owned;
618  llvm::StringRef Data;
619};
620
621inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
622  return llvm::StringRef(L) == llvm::StringRef(R);
623}
624inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
625  return !(L == R);
626}
627inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
628  return StringRef(L) < StringRef(R);
629}
630
631struct Object::KV {
632  ObjectKey K;
633  Value V;
634};
635
636inline Object::Object(std::initializer_list<KV> Properties) {
637  for (const auto &P : Properties) {
638    auto R = try_emplace(P.K, nullptr);
639    if (R.second)
640      R.first->getSecond().moveFrom(std::move(P.V));
641  }
642}
643inline std::pair<Object::iterator, bool> Object::insert(KV E) {
644  return try_emplace(std::move(E.K), std::move(E.V));
645}
646inline bool Object::erase(StringRef K) {
647  return M.erase(ObjectKey(K));
648}
649
650/// A "cursor" marking a position within a Value.
651/// The Value is a tree, and this is the path from the root to the current node.
652/// This is used to associate errors with particular subobjects.
653class Path {
654public:
655  class Root;
656
657  /// Records that the value at the current path is invalid.
658  /// Message is e.g. "expected number" and becomes part of the final error.
659  /// This overwrites any previously written error message in the root.
660  void report(llvm::StringLiteral Message);
661
662  /// The root may be treated as a Path.
663  Path(Root &R) : Parent(nullptr), Seg(&R) {}
664  /// Derives a path for an array element: this[Index]
665  Path index(unsigned Index) const { return Path(this, Segment(Index)); }
666  /// Derives a path for an object field: this.Field
667  Path field(StringRef Field) const { return Path(this, Segment(Field)); }
668
669private:
670  /// One element in a JSON path: an object field (.foo) or array index [27].
671  /// Exception: the root Path encodes a pointer to the Path::Root.
672  class Segment {
673    uintptr_t Pointer;
674    unsigned Offset;
675
676  public:
677    Segment() = default;
678    Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
679    Segment(llvm::StringRef Field)
680        : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
681          Offset(static_cast<unsigned>(Field.size())) {}
682    Segment(unsigned Index) : Pointer(0), Offset(Index) {}
683
684    bool isField() const { return Pointer != 0; }
685    StringRef field() const {
686      return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
687    }
688    unsigned index() const { return Offset; }
689    Root *root() const { return reinterpret_cast<Root *>(Pointer); }
690  };
691
692  const Path *Parent;
693  Segment Seg;
694
695  Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
696};
697
698/// The root is the trivial Path to the root value.
699/// It also stores the latest reported error and the path where it occurred.
700class Path::Root {
701  llvm::StringRef Name;
702  llvm::StringLiteral ErrorMessage;
703  std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
704
705  friend void Path::report(llvm::StringLiteral Message);
706
707public:
708  Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
709  // No copy/move allowed as there are incoming pointers.
710  Root(Root &&) = delete;
711  Root &operator=(Root &&) = delete;
712  Root(const Root &) = delete;
713  Root &operator=(const Root &) = delete;
714
715  /// Returns the last error reported, or else a generic error.
716  Error getError() const;
717  /// Print the root value with the error shown inline as a comment.
718  /// Unrelated parts of the value are elided for brevity, e.g.
719  ///   {
720  ///      "id": 42,
721  ///      "name": /* expected string */ null,
722  ///      "properties": { ... }
723  ///   }
724  void printErrorContext(const Value &, llvm::raw_ostream &) const;
725};
726
727// Standard deserializers are provided for primitive types.
728// See comments on Value.
729inline bool fromJSON(const Value &E, std::string &Out, Path P) {
730  if (auto S = E.getAsString()) {
731    Out = std::string(*S);
732    return true;
733  }
734  P.report("expected string");
735  return false;
736}
737inline bool fromJSON(const Value &E, int &Out, Path P) {
738  if (auto S = E.getAsInteger()) {
739    Out = *S;
740    return true;
741  }
742  P.report("expected integer");
743  return false;
744}
745inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
746  if (auto S = E.getAsInteger()) {
747    Out = *S;
748    return true;
749  }
750  P.report("expected integer");
751  return false;
752}
753inline bool fromJSON(const Value &E, double &Out, Path P) {
754  if (auto S = E.getAsNumber()) {
755    Out = *S;
756    return true;
757  }
758  P.report("expected number");
759  return false;
760}
761inline bool fromJSON(const Value &E, bool &Out, Path P) {
762  if (auto S = E.getAsBoolean()) {
763    Out = *S;
764    return true;
765  }
766  P.report("expected boolean");
767  return false;
768}
769inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
770  if (auto S = E.getAsUINT64()) {
771    Out = *S;
772    return true;
773  }
774  P.report("expected uint64_t");
775  return false;
776}
777inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
778  if (auto S = E.getAsNull()) {
779    Out = *S;
780    return true;
781  }
782  P.report("expected null");
783  return false;
784}
785template <typename T>
786bool fromJSON(const Value &E, std::optional<T> &Out, Path P) {
787  if (E.getAsNull()) {
788    Out = std::nullopt;
789    return true;
790  }
791  T Result = {};
792  if (!fromJSON(E, Result, P))
793    return false;
794  Out = std::move(Result);
795  return true;
796}
797template <typename T>
798bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
799  if (auto *A = E.getAsArray()) {
800    Out.clear();
801    Out.resize(A->size());
802    for (size_t I = 0; I < A->size(); ++I)
803      if (!fromJSON((*A)[I], Out[I], P.index(I)))
804        return false;
805    return true;
806  }
807  P.report("expected array");
808  return false;
809}
810template <typename T>
811bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
812  if (auto *O = E.getAsObject()) {
813    Out.clear();
814    for (const auto &KV : *O)
815      if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
816                    P.field(KV.first)))
817        return false;
818    return true;
819  }
820  P.report("expected object");
821  return false;
822}
823
824// Allow serialization of std::optional<T> for supported T.
825template <typename T> Value toJSON(const std::optional<T> &Opt) {
826  return Opt ? Value(*Opt) : Value(nullptr);
827}
828
829/// Helper for mapping JSON objects onto protocol structs.
830///
831/// Example:
832/// \code
833///   bool fromJSON(const Value &E, MyStruct &R, Path P) {
834///     ObjectMapper O(E, P);
835///     // When returning false, error details were already reported.
836///     return O && O.map("mandatory_field", R.MandatoryField) &&
837///         O.mapOptional("optional_field", R.OptionalField);
838///   }
839/// \endcode
840class ObjectMapper {
841public:
842  /// If O is not an object, this mapper is invalid and an error is reported.
843  ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
844    if (!O)
845      P.report("expected object");
846  }
847
848  /// True if the expression is an object.
849  /// Must be checked before calling map().
850  operator bool() const { return O; }
851
852  /// Maps a property to a field.
853  /// If the property is missing or invalid, reports an error.
854  template <typename T> bool map(StringLiteral Prop, T &Out) {
855    assert(*this && "Must check this is an object before calling map()");
856    if (const Value *E = O->get(Prop))
857      return fromJSON(*E, Out, P.field(Prop));
858    P.field(Prop).report("missing value");
859    return false;
860  }
861
862  /// Maps a property to a field, if it exists.
863  /// If the property exists and is invalid, reports an error.
864  /// (Optional requires special handling, because missing keys are OK).
865  template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) {
866    assert(*this && "Must check this is an object before calling map()");
867    if (const Value *E = O->get(Prop))
868      return fromJSON(*E, Out, P.field(Prop));
869    Out = std::nullopt;
870    return true;
871  }
872
873  /// Maps a property to a field, if it exists.
874  /// If the property exists and is invalid, reports an error.
875  /// If the property does not exist, Out is unchanged.
876  template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
877    assert(*this && "Must check this is an object before calling map()");
878    if (const Value *E = O->get(Prop))
879      return fromJSON(*E, Out, P.field(Prop));
880    return true;
881  }
882
883private:
884  const Object *O;
885  Path P;
886};
887
888/// Parses the provided JSON source, or returns a ParseError.
889/// The returned Value is self-contained and owns its strings (they do not refer
890/// to the original source).
891llvm::Expected<Value> parse(llvm::StringRef JSON);
892
893class ParseError : public llvm::ErrorInfo<ParseError> {
894  const char *Msg;
895  unsigned Line, Column, Offset;
896
897public:
898  static char ID;
899  ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
900      : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
901  void log(llvm::raw_ostream &OS) const override {
902    OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
903  }
904  std::error_code convertToErrorCode() const override {
905    return llvm::inconvertibleErrorCode();
906  }
907};
908
909/// Version of parse() that converts the parsed value to the type T.
910/// RootName describes the root object and is used in error messages.
911template <typename T>
912Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
913  auto V = parse(JSON);
914  if (!V)
915    return V.takeError();
916  Path::Root R(RootName);
917  T Result;
918  if (fromJSON(*V, Result, R))
919    return std::move(Result);
920  return R.getError();
921}
922
923/// json::OStream allows writing well-formed JSON without materializing
924/// all structures as json::Value ahead of time.
925/// It's faster, lower-level, and less safe than OS << json::Value.
926/// It also allows emitting more constructs, such as comments.
927///
928/// Only one "top-level" object can be written to a stream.
929/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
930///
931///   json::OStream J(OS);
932///   J.array([&]{
933///     for (const Event &E : Events)
934///       J.object([&] {
935///         J.attribute("timestamp", int64_t(E.Time));
936///         J.attributeArray("participants", [&] {
937///           for (const Participant &P : E.Participants)
938///             J.value(P.toString());
939///         });
940///       });
941///   });
942///
943/// This would produce JSON like:
944///
945///   [
946///     {
947///       "timestamp": 19287398741,
948///       "participants": [
949///         "King Kong",
950///         "Miley Cyrus",
951///         "Cleopatra"
952///       ]
953///     },
954///     ...
955///   ]
956///
957/// The lower level begin/end methods (arrayBegin()) are more flexible but
958/// care must be taken to pair them correctly:
959///
960///   json::OStream J(OS);
961//    J.arrayBegin();
962///   for (const Event &E : Events) {
963///     J.objectBegin();
964///     J.attribute("timestamp", int64_t(E.Time));
965///     J.attributeBegin("participants");
966///     for (const Participant &P : E.Participants)
967///       J.value(P.toString());
968///     J.attributeEnd();
969///     J.objectEnd();
970///   }
971///   J.arrayEnd();
972///
973/// If the call sequence isn't valid JSON, asserts will fire in debug mode.
974/// This can be mismatched begin()/end() pairs, trying to emit attributes inside
975/// an array, and so on.
976/// With asserts disabled, this is undefined behavior.
977class OStream {
978 public:
979  using Block = llvm::function_ref<void()>;
980  // If IndentSize is nonzero, output is pretty-printed.
981  explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
982      : OS(OS), IndentSize(IndentSize) {
983    Stack.emplace_back();
984  }
985  ~OStream() {
986    assert(Stack.size() == 1 && "Unmatched begin()/end()");
987    assert(Stack.back().Ctx == Singleton);
988    assert(Stack.back().HasValue && "Did not write top-level value");
989  }
990
991  /// Flushes the underlying ostream. OStream does not buffer internally.
992  void flush() { OS.flush(); }
993
994  // High level functions to output a value.
995  // Valid at top-level (exactly once), in an attribute value (exactly once),
996  // or in an array (any number of times).
997
998  /// Emit a self-contained value (number, string, vector<string> etc).
999  void value(const Value &V);
1000  /// Emit an array whose elements are emitted in the provided Block.
1001  void array(Block Contents) {
1002    arrayBegin();
1003    Contents();
1004    arrayEnd();
1005  }
1006  /// Emit an object whose elements are emitted in the provided Block.
1007  void object(Block Contents) {
1008    objectBegin();
1009    Contents();
1010    objectEnd();
1011  }
1012  /// Emit an externally-serialized value.
1013  /// The caller must write exactly one valid JSON value to the provided stream.
1014  /// No validation or formatting of this value occurs.
1015  void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
1016    rawValueBegin();
1017    Contents(OS);
1018    rawValueEnd();
1019  }
1020  void rawValue(llvm::StringRef Contents) {
1021    rawValue([&](raw_ostream &OS) { OS << Contents; });
1022  }
1023  /// Emit a JavaScript comment associated with the next printed value.
1024  /// The string must be valid until the next attribute or value is emitted.
1025  /// Comments are not part of standard JSON, and many parsers reject them!
1026  void comment(llvm::StringRef);
1027
1028  // High level functions to output object attributes.
1029  // Valid only within an object (any number of times).
1030
1031  /// Emit an attribute whose value is self-contained (number, vector<int> etc).
1032  void attribute(llvm::StringRef Key, const Value& Contents) {
1033    attributeImpl(Key, [&] { value(Contents); });
1034  }
1035  /// Emit an attribute whose value is an array with elements from the Block.
1036  void attributeArray(llvm::StringRef Key, Block Contents) {
1037    attributeImpl(Key, [&] { array(Contents); });
1038  }
1039  /// Emit an attribute whose value is an object with attributes from the Block.
1040  void attributeObject(llvm::StringRef Key, Block Contents) {
1041    attributeImpl(Key, [&] { object(Contents); });
1042  }
1043
1044  // Low-level begin/end functions to output arrays, objects, and attributes.
1045  // Must be correctly paired. Allowed contexts are as above.
1046
1047  void arrayBegin();
1048  void arrayEnd();
1049  void objectBegin();
1050  void objectEnd();
1051  void attributeBegin(llvm::StringRef Key);
1052  void attributeEnd();
1053  raw_ostream &rawValueBegin();
1054  void rawValueEnd();
1055
1056private:
1057  void attributeImpl(llvm::StringRef Key, Block Contents) {
1058    attributeBegin(Key);
1059    Contents();
1060    attributeEnd();
1061  }
1062
1063  void valueBegin();
1064  void flushComment();
1065  void newline();
1066
1067  enum Context {
1068    Singleton, // Top level, or object attribute.
1069    Array,
1070    Object,
1071    RawValue, // External code writing a value to OS directly.
1072  };
1073  struct State {
1074    Context Ctx = Singleton;
1075    bool HasValue = false;
1076  };
1077  llvm::SmallVector<State, 16> Stack; // Never empty.
1078  llvm::StringRef PendingComment;
1079  llvm::raw_ostream &OS;
1080  unsigned IndentSize;
1081  unsigned Indent = 0;
1082};
1083
1084/// Serializes this Value to JSON, writing it to the provided stream.
1085/// The formatting is compact (no extra whitespace) and deterministic.
1086/// For pretty-printing, use the formatv() format_provider below.
1087inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
1088  OStream(OS).value(V);
1089  return OS;
1090}
1091} // namespace json
1092
1093/// Allow printing json::Value with formatv().
1094/// The default style is basic/compact formatting, like operator<<.
1095/// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
1096template <> struct format_provider<llvm::json::Value> {
1097  static void format(const llvm::json::Value &, raw_ostream &, StringRef);
1098};
1099} // namespace llvm
1100
1101#endif
1102