1336809Sdim//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===// 2336809Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6336809Sdim// 7336809Sdim//===---------------------------------------------------------------------===// 8336809Sdim/// 9336809Sdim/// \file 10336809Sdim/// This file supports working with JSON data. 11336809Sdim/// 12336809Sdim/// It comprises: 13336809Sdim/// 14336809Sdim/// - classes which hold dynamically-typed parsed JSON structures 15336809Sdim/// These are value types that can be composed, inspected, and modified. 16336809Sdim/// See json::Value, and the related types json::Object and json::Array. 17336809Sdim/// 18336809Sdim/// - functions to parse JSON text into Values, and to serialize Values to text. 19336809Sdim/// See parse(), operator<<, and format_provider. 20336809Sdim/// 21336809Sdim/// - a convention and helpers for mapping between json::Value and user-defined 22336809Sdim/// types. See fromJSON(), ObjectMapper, and the class comment on Value. 23336809Sdim/// 24353358Sdim/// - an output API json::OStream which can emit JSON without materializing 25353358Sdim/// all structures as json::Value. 26353358Sdim/// 27336809Sdim/// Typically, JSON data would be read from an external source, parsed into 28336809Sdim/// a Value, and then converted into some native data structure before doing 29336809Sdim/// real work on it. (And vice versa when writing). 30336809Sdim/// 31336809Sdim/// Other serialization mechanisms you may consider: 32336809Sdim/// 33336809Sdim/// - YAML is also text-based, and more human-readable than JSON. It's a more 34336809Sdim/// complex format and data model, and YAML parsers aren't ubiquitous. 35336809Sdim/// YAMLParser.h is a streaming parser suitable for parsing large documents 36336809Sdim/// (including JSON, as YAML is a superset). It can be awkward to use 37336809Sdim/// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more 38336809Sdim/// declarative than the toJSON/fromJSON conventions here. 39336809Sdim/// 40336809Sdim/// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it 41336809Sdim/// encodes LLVM IR ("bitcode"), but it can be a container for other data. 42353358Sdim/// Low-level reader/writer libraries are in Bitstream/Bitstream*.h 43336809Sdim/// 44336809Sdim//===---------------------------------------------------------------------===// 45336809Sdim 46336809Sdim#ifndef LLVM_SUPPORT_JSON_H 47336809Sdim#define LLVM_SUPPORT_JSON_H 48336809Sdim 49336809Sdim#include "llvm/ADT/DenseMap.h" 50336809Sdim#include "llvm/ADT/SmallVector.h" 51336809Sdim#include "llvm/ADT/StringRef.h" 52336809Sdim#include "llvm/Support/Error.h" 53336809Sdim#include "llvm/Support/FormatVariadic.h" 54336809Sdim#include "llvm/Support/raw_ostream.h" 55336809Sdim#include <map> 56336809Sdim 57336809Sdimnamespace llvm { 58336809Sdimnamespace json { 59336809Sdim 60336809Sdim// === String encodings === 61336809Sdim// 62336809Sdim// JSON strings are character sequences (not byte sequences like std::string). 63336809Sdim// We need to know the encoding, and for simplicity only support UTF-8. 64336809Sdim// 65336809Sdim// - When parsing, invalid UTF-8 is a syntax error like any other 66336809Sdim// 67336809Sdim// - When creating Values from strings, callers must ensure they are UTF-8. 68336809Sdim// with asserts on, invalid UTF-8 will crash the program 69336809Sdim// with asserts off, we'll substitute the replacement character (U+FFFD) 70336809Sdim// Callers can use json::isUTF8() and json::fixUTF8() for validation. 71336809Sdim// 72336809Sdim// - When retrieving strings from Values (e.g. asString()), the result will 73336809Sdim// always be valid UTF-8. 74336809Sdim 75336809Sdim/// Returns true if \p S is valid UTF-8, which is required for use as JSON. 76336809Sdim/// If it returns false, \p Offset is set to a byte offset near the first error. 77336809Sdimbool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr); 78336809Sdim/// Replaces invalid UTF-8 sequences in \p S with the replacement character 79336809Sdim/// (U+FFFD). The returned string is valid UTF-8. 80336809Sdim/// This is much slower than isUTF8, so test that first. 81336809Sdimstd::string fixUTF8(llvm::StringRef S); 82336809Sdim 83336809Sdimclass Array; 84336809Sdimclass ObjectKey; 85336809Sdimclass Value; 86336809Sdimtemplate <typename T> Value toJSON(const llvm::Optional<T> &Opt); 87336809Sdim 88336809Sdim/// An Object is a JSON object, which maps strings to heterogenous JSON values. 89336809Sdim/// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string. 90336809Sdimclass Object { 91336809Sdim using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>; 92336809Sdim Storage M; 93336809Sdim 94336809Sdimpublic: 95336809Sdim using key_type = ObjectKey; 96336809Sdim using mapped_type = Value; 97336809Sdim using value_type = Storage::value_type; 98336809Sdim using iterator = Storage::iterator; 99336809Sdim using const_iterator = Storage::const_iterator; 100336809Sdim 101353358Sdim Object() = default; 102336809Sdim // KV is a trivial key-value struct for list-initialization. 103336809Sdim // (using std::pair forces extra copies). 104336809Sdim struct KV; 105336809Sdim explicit Object(std::initializer_list<KV> Properties); 106336809Sdim 107336809Sdim iterator begin() { return M.begin(); } 108336809Sdim const_iterator begin() const { return M.begin(); } 109336809Sdim iterator end() { return M.end(); } 110336809Sdim const_iterator end() const { return M.end(); } 111336809Sdim 112336809Sdim bool empty() const { return M.empty(); } 113336809Sdim size_t size() const { return M.size(); } 114336809Sdim 115336809Sdim void clear() { M.clear(); } 116336809Sdim std::pair<iterator, bool> insert(KV E); 117336809Sdim template <typename... Ts> 118336809Sdim std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) { 119336809Sdim return M.try_emplace(K, std::forward<Ts>(Args)...); 120336809Sdim } 121336809Sdim template <typename... Ts> 122336809Sdim std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) { 123336809Sdim return M.try_emplace(std::move(K), std::forward<Ts>(Args)...); 124336809Sdim } 125360784Sdim bool erase(StringRef K); 126360784Sdim void erase(iterator I) { M.erase(I); } 127336809Sdim 128336809Sdim iterator find(StringRef K) { return M.find_as(K); } 129336809Sdim const_iterator find(StringRef K) const { return M.find_as(K); } 130336809Sdim // operator[] acts as if Value was default-constructible as null. 131336809Sdim Value &operator[](const ObjectKey &K); 132336809Sdim Value &operator[](ObjectKey &&K); 133336809Sdim // Look up a property, returning nullptr if it doesn't exist. 134336809Sdim Value *get(StringRef K); 135336809Sdim const Value *get(StringRef K) const; 136336809Sdim // Typed accessors return None/nullptr if 137336809Sdim // - the property doesn't exist 138336809Sdim // - or it has the wrong type 139336809Sdim llvm::Optional<std::nullptr_t> getNull(StringRef K) const; 140336809Sdim llvm::Optional<bool> getBoolean(StringRef K) const; 141336809Sdim llvm::Optional<double> getNumber(StringRef K) const; 142336809Sdim llvm::Optional<int64_t> getInteger(StringRef K) const; 143336809Sdim llvm::Optional<llvm::StringRef> getString(StringRef K) const; 144336809Sdim const json::Object *getObject(StringRef K) const; 145336809Sdim json::Object *getObject(StringRef K); 146336809Sdim const json::Array *getArray(StringRef K) const; 147336809Sdim json::Array *getArray(StringRef K); 148336809Sdim}; 149336809Sdimbool operator==(const Object &LHS, const Object &RHS); 150336809Sdiminline bool operator!=(const Object &LHS, const Object &RHS) { 151336809Sdim return !(LHS == RHS); 152336809Sdim} 153336809Sdim 154336809Sdim/// An Array is a JSON array, which contains heterogeneous JSON values. 155336809Sdim/// It simulates std::vector<Value>. 156336809Sdimclass Array { 157336809Sdim std::vector<Value> V; 158336809Sdim 159336809Sdimpublic: 160336809Sdim using value_type = Value; 161336809Sdim using iterator = std::vector<Value>::iterator; 162336809Sdim using const_iterator = std::vector<Value>::const_iterator; 163336809Sdim 164353358Sdim Array() = default; 165336809Sdim explicit Array(std::initializer_list<Value> Elements); 166336809Sdim template <typename Collection> explicit Array(const Collection &C) { 167336809Sdim for (const auto &V : C) 168336809Sdim emplace_back(V); 169336809Sdim } 170336809Sdim 171336809Sdim Value &operator[](size_t I) { return V[I]; } 172336809Sdim const Value &operator[](size_t I) const { return V[I]; } 173336809Sdim Value &front() { return V.front(); } 174336809Sdim const Value &front() const { return V.front(); } 175336809Sdim Value &back() { return V.back(); } 176336809Sdim const Value &back() const { return V.back(); } 177336809Sdim Value *data() { return V.data(); } 178336809Sdim const Value *data() const { return V.data(); } 179336809Sdim 180336809Sdim iterator begin() { return V.begin(); } 181336809Sdim const_iterator begin() const { return V.begin(); } 182336809Sdim iterator end() { return V.end(); } 183336809Sdim const_iterator end() const { return V.end(); } 184336809Sdim 185336809Sdim bool empty() const { return V.empty(); } 186336809Sdim size_t size() const { return V.size(); } 187353358Sdim void reserve(size_t S) { V.reserve(S); } 188336809Sdim 189336809Sdim void clear() { V.clear(); } 190336809Sdim void push_back(const Value &E) { V.push_back(E); } 191336809Sdim void push_back(Value &&E) { V.push_back(std::move(E)); } 192336809Sdim template <typename... Args> void emplace_back(Args &&... A) { 193336809Sdim V.emplace_back(std::forward<Args>(A)...); 194336809Sdim } 195336809Sdim void pop_back() { V.pop_back(); } 196336809Sdim // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees. 197336809Sdim iterator insert(iterator P, const Value &E) { return V.insert(P, E); } 198336809Sdim iterator insert(iterator P, Value &&E) { 199336809Sdim return V.insert(P, std::move(E)); 200336809Sdim } 201336809Sdim template <typename It> iterator insert(iterator P, It A, It Z) { 202336809Sdim return V.insert(P, A, Z); 203336809Sdim } 204336809Sdim template <typename... Args> iterator emplace(const_iterator P, Args &&... A) { 205336809Sdim return V.emplace(P, std::forward<Args>(A)...); 206336809Sdim } 207336809Sdim 208336809Sdim friend bool operator==(const Array &L, const Array &R) { return L.V == R.V; } 209336809Sdim}; 210336809Sdiminline bool operator!=(const Array &L, const Array &R) { return !(L == R); } 211336809Sdim 212336809Sdim/// A Value is an JSON value of unknown type. 213336809Sdim/// They can be copied, but should generally be moved. 214336809Sdim/// 215336809Sdim/// === Composing values === 216336809Sdim/// 217336809Sdim/// You can implicitly construct Values from: 218336809Sdim/// - strings: std::string, SmallString, formatv, StringRef, char* 219336809Sdim/// (char*, and StringRef are references, not copies!) 220336809Sdim/// - numbers 221336809Sdim/// - booleans 222336809Sdim/// - null: nullptr 223336809Sdim/// - arrays: {"foo", 42.0, false} 224336809Sdim/// - serializable things: types with toJSON(const T&)->Value, found by ADL 225336809Sdim/// 226336809Sdim/// They can also be constructed from object/array helpers: 227336809Sdim/// - json::Object is a type like map<ObjectKey, Value> 228336809Sdim/// - json::Array is a type like vector<Value> 229336809Sdim/// These can be list-initialized, or used to build up collections in a loop. 230336809Sdim/// json::ary(Collection) converts all items in a collection to Values. 231336809Sdim/// 232336809Sdim/// === Inspecting values === 233336809Sdim/// 234336809Sdim/// Each Value is one of the JSON kinds: 235336809Sdim/// null (nullptr_t) 236336809Sdim/// boolean (bool) 237336809Sdim/// number (double or int64) 238336809Sdim/// string (StringRef) 239336809Sdim/// array (json::Array) 240336809Sdim/// object (json::Object) 241336809Sdim/// 242336809Sdim/// The kind can be queried directly, or implicitly via the typed accessors: 243336809Sdim/// if (Optional<StringRef> S = E.getAsString() 244336809Sdim/// assert(E.kind() == Value::String); 245336809Sdim/// 246336809Sdim/// Array and Object also have typed indexing accessors for easy traversal: 247336809Sdim/// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )"); 248336809Sdim/// if (Object* O = E->getAsObject()) 249336809Sdim/// if (Object* Opts = O->getObject("options")) 250336809Sdim/// if (Optional<StringRef> Font = Opts->getString("font")) 251336809Sdim/// assert(Opts->at("font").kind() == Value::String); 252336809Sdim/// 253336809Sdim/// === Converting JSON values to C++ types === 254336809Sdim/// 255336809Sdim/// The convention is to have a deserializer function findable via ADL: 256336809Sdim/// fromJSON(const json::Value&, T&)->bool 257336809Sdim/// Deserializers are provided for: 258336809Sdim/// - bool 259336809Sdim/// - int and int64_t 260336809Sdim/// - double 261336809Sdim/// - std::string 262336809Sdim/// - vector<T>, where T is deserializable 263336809Sdim/// - map<string, T>, where T is deserializable 264336809Sdim/// - Optional<T>, where T is deserializable 265336809Sdim/// ObjectMapper can help writing fromJSON() functions for object types. 266336809Sdim/// 267336809Sdim/// For conversion in the other direction, the serializer function is: 268336809Sdim/// toJSON(const T&) -> json::Value 269336809Sdim/// If this exists, then it also allows constructing Value from T, and can 270336809Sdim/// be used to serialize vector<T>, map<string, T>, and Optional<T>. 271336809Sdim/// 272336809Sdim/// === Serialization === 273336809Sdim/// 274336809Sdim/// Values can be serialized to JSON: 275336809Sdim/// 1) raw_ostream << Value // Basic formatting. 276336809Sdim/// 2) raw_ostream << formatv("{0}", Value) // Basic formatting. 277336809Sdim/// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2. 278336809Sdim/// 279336809Sdim/// And parsed: 280336809Sdim/// Expected<Value> E = json::parse("[1, 2, null]"); 281336809Sdim/// assert(E && E->kind() == Value::Array); 282336809Sdimclass Value { 283336809Sdimpublic: 284336809Sdim enum Kind { 285336809Sdim Null, 286336809Sdim Boolean, 287336809Sdim /// Number values can store both int64s and doubles at full precision, 288336809Sdim /// depending on what they were constructed/parsed from. 289336809Sdim Number, 290336809Sdim String, 291336809Sdim Array, 292336809Sdim Object, 293336809Sdim }; 294336809Sdim 295336809Sdim // It would be nice to have Value() be null. But that would make {} null too. 296336809Sdim Value(const Value &M) { copyFrom(M); } 297336809Sdim Value(Value &&M) { moveFrom(std::move(M)); } 298336809Sdim Value(std::initializer_list<Value> Elements); 299336809Sdim Value(json::Array &&Elements) : Type(T_Array) { 300336809Sdim create<json::Array>(std::move(Elements)); 301336809Sdim } 302344779Sdim template <typename Elt> 303344779Sdim Value(const std::vector<Elt> &C) : Value(json::Array(C)) {} 304336809Sdim Value(json::Object &&Properties) : Type(T_Object) { 305336809Sdim create<json::Object>(std::move(Properties)); 306336809Sdim } 307344779Sdim template <typename Elt> 308344779Sdim Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {} 309336809Sdim // Strings: types with value semantics. Must be valid UTF-8. 310336809Sdim Value(std::string V) : Type(T_String) { 311336809Sdim if (LLVM_UNLIKELY(!isUTF8(V))) { 312336809Sdim assert(false && "Invalid UTF-8 in value used as JSON"); 313336809Sdim V = fixUTF8(std::move(V)); 314336809Sdim } 315336809Sdim create<std::string>(std::move(V)); 316336809Sdim } 317336809Sdim Value(const llvm::SmallVectorImpl<char> &V) 318353358Sdim : Value(std::string(V.begin(), V.end())) {} 319353358Sdim Value(const llvm::formatv_object_base &V) : Value(V.str()) {} 320336809Sdim // Strings: types with reference semantics. Must be valid UTF-8. 321336809Sdim Value(StringRef V) : Type(T_StringRef) { 322336809Sdim create<llvm::StringRef>(V); 323336809Sdim if (LLVM_UNLIKELY(!isUTF8(V))) { 324336809Sdim assert(false && "Invalid UTF-8 in value used as JSON"); 325336809Sdim *this = Value(fixUTF8(V)); 326336809Sdim } 327336809Sdim } 328336809Sdim Value(const char *V) : Value(StringRef(V)) {} 329336809Sdim Value(std::nullptr_t) : Type(T_Null) {} 330336809Sdim // Boolean (disallow implicit conversions). 331336809Sdim // (The last template parameter is a dummy to keep templates distinct.) 332336809Sdim template < 333336809Sdim typename T, 334336809Sdim typename = typename std::enable_if<std::is_same<T, bool>::value>::type, 335336809Sdim bool = false> 336336809Sdim Value(T B) : Type(T_Boolean) { 337336809Sdim create<bool>(B); 338336809Sdim } 339336809Sdim // Integers (except boolean). Must be non-narrowing convertible to int64_t. 340336809Sdim template < 341336809Sdim typename T, 342336809Sdim typename = typename std::enable_if<std::is_integral<T>::value>::type, 343336809Sdim typename = typename std::enable_if<!std::is_same<T, bool>::value>::type> 344336809Sdim Value(T I) : Type(T_Integer) { 345336809Sdim create<int64_t>(int64_t{I}); 346336809Sdim } 347336809Sdim // Floating point. Must be non-narrowing convertible to double. 348336809Sdim template <typename T, 349336809Sdim typename = 350336809Sdim typename std::enable_if<std::is_floating_point<T>::value>::type, 351336809Sdim double * = nullptr> 352336809Sdim Value(T D) : Type(T_Double) { 353336809Sdim create<double>(double{D}); 354336809Sdim } 355336809Sdim // Serializable types: with a toJSON(const T&)->Value function, found by ADL. 356336809Sdim template <typename T, 357336809Sdim typename = typename std::enable_if<std::is_same< 358336809Sdim Value, decltype(toJSON(*(const T *)nullptr))>::value>, 359336809Sdim Value * = nullptr> 360336809Sdim Value(const T &V) : Value(toJSON(V)) {} 361336809Sdim 362336809Sdim Value &operator=(const Value &M) { 363336809Sdim destroy(); 364336809Sdim copyFrom(M); 365336809Sdim return *this; 366336809Sdim } 367336809Sdim Value &operator=(Value &&M) { 368336809Sdim destroy(); 369336809Sdim moveFrom(std::move(M)); 370336809Sdim return *this; 371336809Sdim } 372336809Sdim ~Value() { destroy(); } 373336809Sdim 374336809Sdim Kind kind() const { 375336809Sdim switch (Type) { 376336809Sdim case T_Null: 377336809Sdim return Null; 378336809Sdim case T_Boolean: 379336809Sdim return Boolean; 380336809Sdim case T_Double: 381336809Sdim case T_Integer: 382336809Sdim return Number; 383336809Sdim case T_String: 384336809Sdim case T_StringRef: 385336809Sdim return String; 386336809Sdim case T_Object: 387336809Sdim return Object; 388336809Sdim case T_Array: 389336809Sdim return Array; 390336809Sdim } 391336809Sdim llvm_unreachable("Unknown kind"); 392336809Sdim } 393336809Sdim 394336809Sdim // Typed accessors return None/nullptr if the Value is not of this type. 395336809Sdim llvm::Optional<std::nullptr_t> getAsNull() const { 396336809Sdim if (LLVM_LIKELY(Type == T_Null)) 397336809Sdim return nullptr; 398336809Sdim return llvm::None; 399336809Sdim } 400336809Sdim llvm::Optional<bool> getAsBoolean() const { 401336809Sdim if (LLVM_LIKELY(Type == T_Boolean)) 402336809Sdim return as<bool>(); 403336809Sdim return llvm::None; 404336809Sdim } 405336809Sdim llvm::Optional<double> getAsNumber() const { 406336809Sdim if (LLVM_LIKELY(Type == T_Double)) 407336809Sdim return as<double>(); 408336809Sdim if (LLVM_LIKELY(Type == T_Integer)) 409336809Sdim return as<int64_t>(); 410336809Sdim return llvm::None; 411336809Sdim } 412336809Sdim // Succeeds if the Value is a Number, and exactly representable as int64_t. 413336809Sdim llvm::Optional<int64_t> getAsInteger() const { 414336809Sdim if (LLVM_LIKELY(Type == T_Integer)) 415336809Sdim return as<int64_t>(); 416336809Sdim if (LLVM_LIKELY(Type == T_Double)) { 417336809Sdim double D = as<double>(); 418336809Sdim if (LLVM_LIKELY(std::modf(D, &D) == 0.0 && 419336809Sdim D >= double(std::numeric_limits<int64_t>::min()) && 420336809Sdim D <= double(std::numeric_limits<int64_t>::max()))) 421336809Sdim return D; 422336809Sdim } 423336809Sdim return llvm::None; 424336809Sdim } 425336809Sdim llvm::Optional<llvm::StringRef> getAsString() const { 426336809Sdim if (Type == T_String) 427336809Sdim return llvm::StringRef(as<std::string>()); 428336809Sdim if (LLVM_LIKELY(Type == T_StringRef)) 429336809Sdim return as<llvm::StringRef>(); 430336809Sdim return llvm::None; 431336809Sdim } 432336809Sdim const json::Object *getAsObject() const { 433336809Sdim return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 434336809Sdim } 435336809Sdim json::Object *getAsObject() { 436336809Sdim return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 437336809Sdim } 438336809Sdim const json::Array *getAsArray() const { 439336809Sdim return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 440336809Sdim } 441336809Sdim json::Array *getAsArray() { 442336809Sdim return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 443336809Sdim } 444336809Sdim 445336809Sdimprivate: 446336809Sdim void destroy(); 447336809Sdim void copyFrom(const Value &M); 448336809Sdim // We allow moving from *const* Values, by marking all members as mutable! 449336809Sdim // This hack is needed to support initializer-list syntax efficiently. 450336809Sdim // (std::initializer_list<T> is a container of const T). 451336809Sdim void moveFrom(const Value &&M); 452336809Sdim friend class Array; 453336809Sdim friend class Object; 454336809Sdim 455336809Sdim template <typename T, typename... U> void create(U &&... V) { 456336809Sdim new (reinterpret_cast<T *>(Union.buffer)) T(std::forward<U>(V)...); 457336809Sdim } 458336809Sdim template <typename T> T &as() const { 459344779Sdim // Using this two-step static_cast via void * instead of reinterpret_cast 460344779Sdim // silences a -Wstrict-aliasing false positive from GCC6 and earlier. 461344779Sdim void *Storage = static_cast<void *>(Union.buffer); 462344779Sdim return *static_cast<T *>(Storage); 463336809Sdim } 464336809Sdim 465353358Sdim friend class OStream; 466336809Sdim 467336809Sdim enum ValueType : char { 468336809Sdim T_Null, 469336809Sdim T_Boolean, 470336809Sdim T_Double, 471336809Sdim T_Integer, 472336809Sdim T_StringRef, 473336809Sdim T_String, 474336809Sdim T_Object, 475336809Sdim T_Array, 476336809Sdim }; 477336809Sdim // All members mutable, see moveFrom(). 478336809Sdim mutable ValueType Type; 479336809Sdim mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef, 480336809Sdim std::string, json::Array, json::Object> 481336809Sdim Union; 482344779Sdim friend bool operator==(const Value &, const Value &); 483336809Sdim}; 484336809Sdim 485336809Sdimbool operator==(const Value &, const Value &); 486336809Sdiminline bool operator!=(const Value &L, const Value &R) { return !(L == R); } 487336809Sdim 488336809Sdim/// ObjectKey is a used to capture keys in Object. Like Value but: 489336809Sdim/// - only strings are allowed 490336809Sdim/// - it's optimized for the string literal case (Owned == nullptr) 491336809Sdim/// Like Value, strings must be UTF-8. See isUTF8 documentation for details. 492336809Sdimclass ObjectKey { 493336809Sdimpublic: 494336809Sdim ObjectKey(const char *S) : ObjectKey(StringRef(S)) {} 495336809Sdim ObjectKey(std::string S) : Owned(new std::string(std::move(S))) { 496336809Sdim if (LLVM_UNLIKELY(!isUTF8(*Owned))) { 497336809Sdim assert(false && "Invalid UTF-8 in value used as JSON"); 498336809Sdim *Owned = fixUTF8(std::move(*Owned)); 499336809Sdim } 500336809Sdim Data = *Owned; 501336809Sdim } 502336809Sdim ObjectKey(llvm::StringRef S) : Data(S) { 503336809Sdim if (LLVM_UNLIKELY(!isUTF8(Data))) { 504336809Sdim assert(false && "Invalid UTF-8 in value used as JSON"); 505336809Sdim *this = ObjectKey(fixUTF8(S)); 506336809Sdim } 507336809Sdim } 508336809Sdim ObjectKey(const llvm::SmallVectorImpl<char> &V) 509336809Sdim : ObjectKey(std::string(V.begin(), V.end())) {} 510336809Sdim ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {} 511336809Sdim 512336809Sdim ObjectKey(const ObjectKey &C) { *this = C; } 513336809Sdim ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {} 514336809Sdim ObjectKey &operator=(const ObjectKey &C) { 515336809Sdim if (C.Owned) { 516336809Sdim Owned.reset(new std::string(*C.Owned)); 517336809Sdim Data = *Owned; 518336809Sdim } else { 519336809Sdim Data = C.Data; 520336809Sdim } 521336809Sdim return *this; 522336809Sdim } 523336809Sdim ObjectKey &operator=(ObjectKey &&) = default; 524336809Sdim 525336809Sdim operator llvm::StringRef() const { return Data; } 526336809Sdim std::string str() const { return Data.str(); } 527336809Sdim 528336809Sdimprivate: 529336809Sdim // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned 530336809Sdim // could be 2 pointers at most. 531336809Sdim std::unique_ptr<std::string> Owned; 532336809Sdim llvm::StringRef Data; 533336809Sdim}; 534336809Sdim 535336809Sdiminline bool operator==(const ObjectKey &L, const ObjectKey &R) { 536336809Sdim return llvm::StringRef(L) == llvm::StringRef(R); 537336809Sdim} 538336809Sdiminline bool operator!=(const ObjectKey &L, const ObjectKey &R) { 539336809Sdim return !(L == R); 540336809Sdim} 541336809Sdiminline bool operator<(const ObjectKey &L, const ObjectKey &R) { 542336809Sdim return StringRef(L) < StringRef(R); 543336809Sdim} 544336809Sdim 545336809Sdimstruct Object::KV { 546336809Sdim ObjectKey K; 547336809Sdim Value V; 548336809Sdim}; 549336809Sdim 550336809Sdiminline Object::Object(std::initializer_list<KV> Properties) { 551336809Sdim for (const auto &P : Properties) { 552336809Sdim auto R = try_emplace(P.K, nullptr); 553336809Sdim if (R.second) 554336809Sdim R.first->getSecond().moveFrom(std::move(P.V)); 555336809Sdim } 556336809Sdim} 557336809Sdiminline std::pair<Object::iterator, bool> Object::insert(KV E) { 558336809Sdim return try_emplace(std::move(E.K), std::move(E.V)); 559336809Sdim} 560360784Sdiminline bool Object::erase(StringRef K) { 561360784Sdim return M.erase(ObjectKey(K)); 562360784Sdim} 563336809Sdim 564336809Sdim// Standard deserializers are provided for primitive types. 565336809Sdim// See comments on Value. 566336809Sdiminline bool fromJSON(const Value &E, std::string &Out) { 567336809Sdim if (auto S = E.getAsString()) { 568336809Sdim Out = *S; 569336809Sdim return true; 570336809Sdim } 571336809Sdim return false; 572336809Sdim} 573336809Sdiminline bool fromJSON(const Value &E, int &Out) { 574336809Sdim if (auto S = E.getAsInteger()) { 575336809Sdim Out = *S; 576336809Sdim return true; 577336809Sdim } 578336809Sdim return false; 579336809Sdim} 580336809Sdiminline bool fromJSON(const Value &E, int64_t &Out) { 581336809Sdim if (auto S = E.getAsInteger()) { 582336809Sdim Out = *S; 583336809Sdim return true; 584336809Sdim } 585336809Sdim return false; 586336809Sdim} 587336809Sdiminline bool fromJSON(const Value &E, double &Out) { 588336809Sdim if (auto S = E.getAsNumber()) { 589336809Sdim Out = *S; 590336809Sdim return true; 591336809Sdim } 592336809Sdim return false; 593336809Sdim} 594336809Sdiminline bool fromJSON(const Value &E, bool &Out) { 595336809Sdim if (auto S = E.getAsBoolean()) { 596336809Sdim Out = *S; 597336809Sdim return true; 598336809Sdim } 599336809Sdim return false; 600336809Sdim} 601336809Sdimtemplate <typename T> bool fromJSON(const Value &E, llvm::Optional<T> &Out) { 602336809Sdim if (E.getAsNull()) { 603336809Sdim Out = llvm::None; 604336809Sdim return true; 605336809Sdim } 606336809Sdim T Result; 607336809Sdim if (!fromJSON(E, Result)) 608336809Sdim return false; 609336809Sdim Out = std::move(Result); 610336809Sdim return true; 611336809Sdim} 612336809Sdimtemplate <typename T> bool fromJSON(const Value &E, std::vector<T> &Out) { 613336809Sdim if (auto *A = E.getAsArray()) { 614336809Sdim Out.clear(); 615336809Sdim Out.resize(A->size()); 616336809Sdim for (size_t I = 0; I < A->size(); ++I) 617336809Sdim if (!fromJSON((*A)[I], Out[I])) 618336809Sdim return false; 619336809Sdim return true; 620336809Sdim } 621336809Sdim return false; 622336809Sdim} 623336809Sdimtemplate <typename T> 624336809Sdimbool fromJSON(const Value &E, std::map<std::string, T> &Out) { 625336809Sdim if (auto *O = E.getAsObject()) { 626336809Sdim Out.clear(); 627336809Sdim for (const auto &KV : *O) 628336809Sdim if (!fromJSON(KV.second, Out[llvm::StringRef(KV.first)])) 629336809Sdim return false; 630336809Sdim return true; 631336809Sdim } 632336809Sdim return false; 633336809Sdim} 634336809Sdim 635336809Sdim// Allow serialization of Optional<T> for supported T. 636336809Sdimtemplate <typename T> Value toJSON(const llvm::Optional<T> &Opt) { 637336809Sdim return Opt ? Value(*Opt) : Value(nullptr); 638336809Sdim} 639336809Sdim 640336809Sdim/// Helper for mapping JSON objects onto protocol structs. 641336809Sdim/// 642336809Sdim/// Example: 643336809Sdim/// \code 644336809Sdim/// bool fromJSON(const Value &E, MyStruct &R) { 645336809Sdim/// ObjectMapper O(E); 646336809Sdim/// if (!O || !O.map("mandatory_field", R.MandatoryField)) 647336809Sdim/// return false; 648336809Sdim/// O.map("optional_field", R.OptionalField); 649336809Sdim/// return true; 650336809Sdim/// } 651336809Sdim/// \endcode 652336809Sdimclass ObjectMapper { 653336809Sdimpublic: 654336809Sdim ObjectMapper(const Value &E) : O(E.getAsObject()) {} 655336809Sdim 656336809Sdim /// True if the expression is an object. 657336809Sdim /// Must be checked before calling map(). 658336809Sdim operator bool() { return O; } 659336809Sdim 660336809Sdim /// Maps a property to a field, if it exists. 661336809Sdim template <typename T> bool map(StringRef Prop, T &Out) { 662336809Sdim assert(*this && "Must check this is an object before calling map()"); 663336809Sdim if (const Value *E = O->get(Prop)) 664336809Sdim return fromJSON(*E, Out); 665336809Sdim return false; 666336809Sdim } 667336809Sdim 668336809Sdim /// Maps a property to a field, if it exists. 669336809Sdim /// (Optional requires special handling, because missing keys are OK). 670336809Sdim template <typename T> bool map(StringRef Prop, llvm::Optional<T> &Out) { 671336809Sdim assert(*this && "Must check this is an object before calling map()"); 672336809Sdim if (const Value *E = O->get(Prop)) 673336809Sdim return fromJSON(*E, Out); 674336809Sdim Out = llvm::None; 675336809Sdim return true; 676336809Sdim } 677336809Sdim 678336809Sdimprivate: 679336809Sdim const Object *O; 680336809Sdim}; 681336809Sdim 682336809Sdim/// Parses the provided JSON source, or returns a ParseError. 683336809Sdim/// The returned Value is self-contained and owns its strings (they do not refer 684336809Sdim/// to the original source). 685336809Sdimllvm::Expected<Value> parse(llvm::StringRef JSON); 686336809Sdim 687336809Sdimclass ParseError : public llvm::ErrorInfo<ParseError> { 688336809Sdim const char *Msg; 689336809Sdim unsigned Line, Column, Offset; 690336809Sdim 691336809Sdimpublic: 692336809Sdim static char ID; 693336809Sdim ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset) 694336809Sdim : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {} 695336809Sdim void log(llvm::raw_ostream &OS) const override { 696336809Sdim OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg); 697336809Sdim } 698336809Sdim std::error_code convertToErrorCode() const override { 699336809Sdim return llvm::inconvertibleErrorCode(); 700336809Sdim } 701336809Sdim}; 702353358Sdim 703353358Sdim/// json::OStream allows writing well-formed JSON without materializing 704353358Sdim/// all structures as json::Value ahead of time. 705353358Sdim/// It's faster, lower-level, and less safe than OS << json::Value. 706353358Sdim/// 707353358Sdim/// Only one "top-level" object can be written to a stream. 708353358Sdim/// Simplest usage involves passing lambdas (Blocks) to fill in containers: 709353358Sdim/// 710353358Sdim/// json::OStream J(OS); 711353358Sdim/// J.array([&]{ 712353358Sdim/// for (const Event &E : Events) 713353358Sdim/// J.object([&] { 714353358Sdim/// J.attribute("timestamp", int64_t(E.Time)); 715353358Sdim/// J.attributeArray("participants", [&] { 716353358Sdim/// for (const Participant &P : E.Participants) 717360784Sdim/// J.value(P.toString()); 718353358Sdim/// }); 719353358Sdim/// }); 720353358Sdim/// }); 721353358Sdim/// 722353358Sdim/// This would produce JSON like: 723353358Sdim/// 724353358Sdim/// [ 725353358Sdim/// { 726353358Sdim/// "timestamp": 19287398741, 727353358Sdim/// "participants": [ 728353358Sdim/// "King Kong", 729353358Sdim/// "Miley Cyrus", 730353358Sdim/// "Cleopatra" 731353358Sdim/// ] 732353358Sdim/// }, 733353358Sdim/// ... 734353358Sdim/// ] 735353358Sdim/// 736353358Sdim/// The lower level begin/end methods (arrayBegin()) are more flexible but 737353358Sdim/// care must be taken to pair them correctly: 738353358Sdim/// 739353358Sdim/// json::OStream J(OS); 740353358Sdim// J.arrayBegin(); 741353358Sdim/// for (const Event &E : Events) { 742353358Sdim/// J.objectBegin(); 743353358Sdim/// J.attribute("timestamp", int64_t(E.Time)); 744353358Sdim/// J.attributeBegin("participants"); 745353358Sdim/// for (const Participant &P : E.Participants) 746353358Sdim/// J.value(P.toString()); 747353358Sdim/// J.attributeEnd(); 748353358Sdim/// J.objectEnd(); 749353358Sdim/// } 750353358Sdim/// J.arrayEnd(); 751353358Sdim/// 752353358Sdim/// If the call sequence isn't valid JSON, asserts will fire in debug mode. 753353358Sdim/// This can be mismatched begin()/end() pairs, trying to emit attributes inside 754353358Sdim/// an array, and so on. 755353358Sdim/// With asserts disabled, this is undefined behavior. 756353358Sdimclass OStream { 757353358Sdim public: 758353358Sdim using Block = llvm::function_ref<void()>; 759353358Sdim // If IndentSize is nonzero, output is pretty-printed. 760353358Sdim explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0) 761353358Sdim : OS(OS), IndentSize(IndentSize) { 762353358Sdim Stack.emplace_back(); 763353358Sdim } 764353358Sdim ~OStream() { 765353358Sdim assert(Stack.size() == 1 && "Unmatched begin()/end()"); 766353358Sdim assert(Stack.back().Ctx == Singleton); 767353358Sdim assert(Stack.back().HasValue && "Did not write top-level value"); 768353358Sdim } 769353358Sdim 770353358Sdim /// Flushes the underlying ostream. OStream does not buffer internally. 771353358Sdim void flush() { OS.flush(); } 772353358Sdim 773353358Sdim // High level functions to output a value. 774353358Sdim // Valid at top-level (exactly once), in an attribute value (exactly once), 775353358Sdim // or in an array (any number of times). 776353358Sdim 777353358Sdim /// Emit a self-contained value (number, string, vector<string> etc). 778353358Sdim void value(const Value &V); 779353358Sdim /// Emit an array whose elements are emitted in the provided Block. 780353358Sdim void array(Block Contents) { 781353358Sdim arrayBegin(); 782353358Sdim Contents(); 783353358Sdim arrayEnd(); 784353358Sdim } 785353358Sdim /// Emit an object whose elements are emitted in the provided Block. 786353358Sdim void object(Block Contents) { 787353358Sdim objectBegin(); 788353358Sdim Contents(); 789353358Sdim objectEnd(); 790353358Sdim } 791353358Sdim 792353358Sdim // High level functions to output object attributes. 793353358Sdim // Valid only within an object (any number of times). 794353358Sdim 795353358Sdim /// Emit an attribute whose value is self-contained (number, vector<int> etc). 796353358Sdim void attribute(llvm::StringRef Key, const Value& Contents) { 797353358Sdim attributeImpl(Key, [&] { value(Contents); }); 798353358Sdim } 799353358Sdim /// Emit an attribute whose value is an array with elements from the Block. 800353358Sdim void attributeArray(llvm::StringRef Key, Block Contents) { 801353358Sdim attributeImpl(Key, [&] { array(Contents); }); 802353358Sdim } 803353358Sdim /// Emit an attribute whose value is an object with attributes from the Block. 804353358Sdim void attributeObject(llvm::StringRef Key, Block Contents) { 805353358Sdim attributeImpl(Key, [&] { object(Contents); }); 806353358Sdim } 807353358Sdim 808353358Sdim // Low-level begin/end functions to output arrays, objects, and attributes. 809353358Sdim // Must be correctly paired. Allowed contexts are as above. 810353358Sdim 811353358Sdim void arrayBegin(); 812353358Sdim void arrayEnd(); 813353358Sdim void objectBegin(); 814353358Sdim void objectEnd(); 815353358Sdim void attributeBegin(llvm::StringRef Key); 816353358Sdim void attributeEnd(); 817353358Sdim 818353358Sdim private: 819353358Sdim void attributeImpl(llvm::StringRef Key, Block Contents) { 820353358Sdim attributeBegin(Key); 821353358Sdim Contents(); 822353358Sdim attributeEnd(); 823353358Sdim } 824353358Sdim 825353358Sdim void valueBegin(); 826353358Sdim void newline(); 827353358Sdim 828353358Sdim enum Context { 829353358Sdim Singleton, // Top level, or object attribute. 830353358Sdim Array, 831353358Sdim Object, 832353358Sdim }; 833353358Sdim struct State { 834353358Sdim Context Ctx = Singleton; 835353358Sdim bool HasValue = false; 836353358Sdim }; 837353358Sdim llvm::SmallVector<State, 16> Stack; // Never empty. 838353358Sdim llvm::raw_ostream &OS; 839353358Sdim unsigned IndentSize; 840353358Sdim unsigned Indent = 0; 841353358Sdim}; 842353358Sdim 843353358Sdim/// Serializes this Value to JSON, writing it to the provided stream. 844353358Sdim/// The formatting is compact (no extra whitespace) and deterministic. 845353358Sdim/// For pretty-printing, use the formatv() format_provider below. 846353358Sdiminline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) { 847353358Sdim OStream(OS).value(V); 848353358Sdim return OS; 849353358Sdim} 850336809Sdim} // namespace json 851336809Sdim 852336809Sdim/// Allow printing json::Value with formatv(). 853336809Sdim/// The default style is basic/compact formatting, like operator<<. 854336809Sdim/// A format string like formatv("{0:2}", Value) pretty-prints with indent 2. 855336809Sdimtemplate <> struct format_provider<llvm::json::Value> { 856336809Sdim static void format(const llvm::json::Value &, raw_ostream &, StringRef); 857336809Sdim}; 858336809Sdim} // namespace llvm 859336809Sdim 860336809Sdim#endif 861