1/*
2 * Copyright (c) 2010 Kungliga Tekniska Högskolan
3 * (Royal Institute of Technology, Stockholm, Sweden).
4 * All rights reserved.
5 *
6 * Portions Copyright (c) 2010 Apple Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * 3. Neither the name of the Institute nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include "baselocl.h"
37#include <ctype.h>
38#include <base64.h>
39
40static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT;
41static heim_string_t heim_tid_data_uuid_key = NULL;
42static const char base64_chars[] =
43    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
44
45static void
46json_init_once(void *arg)
47{
48    heim_tid_data_uuid_key = __heim_string_constant("heimdal-type-data-76d7fca2-d0da-4b20-a126-1a10f8a0eae6");
49}
50
51struct twojson {
52    void *ctx;
53    void (*out)(void *, const char *);
54    size_t indent;
55    heim_json_flags_t flags;
56    int ret;
57    int first;
58};
59
60struct strbuf {
61    char *str;
62    size_t len;
63    size_t alloced;
64    int	enomem;
65    heim_json_flags_t flags;
66};
67
68static int
69base2json(heim_object_t, struct twojson *);
70
71static void
72indent(struct twojson *j)
73{
74    size_t i = j->indent;
75    if (j->flags & HEIM_JSON_F_ONE_LINE)
76	return;
77    while (i--)
78	j->out(j->ctx, "\t");
79}
80
81static void
82array2json(heim_object_t value, int *stop, void *ctx)
83{
84    struct twojson *j = ctx;
85    if (j->ret)
86	return;
87    if (j->first) {
88	j->first = 0;
89    } else {
90	j->out(j->ctx, NULL); /* eat previous '\n' if possible */
91	j->out(j->ctx, ",\n");
92    }
93    j->ret = base2json(value, j);
94}
95
96static void
97dict2json(heim_object_t key, heim_object_t value, void *ctx)
98{
99    struct twojson *j = ctx;
100    if (j->ret)
101	return;
102    if (j->first) {
103	j->first = 0;
104    } else {
105	j->out(j->ctx, NULL); /* eat previous '\n' if possible */
106	j->out(j->ctx, ",\n");
107    }
108    j->ret = base2json(key, j);
109    if (j->ret)
110	return;
111    j->out(j->ctx, " : \n");
112    j->indent++;
113    j->ret = base2json(value, j);
114    if (j->ret)
115	return;
116    j->indent--;
117}
118
119static int
120base2json(heim_object_t obj, struct twojson *j)
121{
122    heim_tid_t type;
123    int first = 0;
124    char *str;
125
126    if (obj == NULL) {
127	if (j->flags & HEIM_JSON_F_CNULL2JSNULL) {
128	    obj = heim_null_create();
129	} else if (j->flags & HEIM_JSON_F_NO_C_NULL) {
130	    return EINVAL;
131	} else {
132	    indent(j);
133	    j->out(j->ctx, "<NULL>\n"); /* This is NOT valid JSON! */
134	    return 0;
135	}
136    }
137
138    type = heim_get_tid(obj);
139    switch (type) {
140    case HEIM_TID_ARRAY:
141	indent(j);
142	j->out(j->ctx, "[\n");
143	j->indent++;
144	first = j->first;
145	j->first = 1;
146	heim_array_iterate_f(obj, j, array2json);
147	j->indent--;
148	if (!j->first)
149	    j->out(j->ctx, "\n");
150	indent(j);
151	j->out(j->ctx, "]\n");
152	j->first = first;
153	break;
154
155    case HEIM_TID_DICT:
156	indent(j);
157	j->out(j->ctx, "{\n");
158	j->indent++;
159	first = j->first;
160	j->first = 1;
161	heim_dict_iterate_f(obj, j, dict2json);
162	j->indent--;
163	if (!j->first)
164	    j->out(j->ctx, "\n");
165	indent(j);
166	j->out(j->ctx, "}\n");
167	j->first = first;
168	break;
169
170    case HEIM_TID_STRING:
171	indent(j);
172	j->out(j->ctx, "\"");
173	str = heim_string_copy_utf8(obj);
174	j->out(j->ctx, str);
175	free(str);
176	j->out(j->ctx, "\"");
177	break;
178
179    case HEIM_TID_DATA: {
180	heim_dict_t d;
181	heim_string_t v;
182	char *b64 = NULL;
183	int ret;
184
185	if (j->flags & HEIM_JSON_F_NO_DATA)
186	    return EINVAL; /* JSON doesn't do binary */
187
188	ret = base64_encode(heim_data_get_bytes(obj), (int)heim_data_get_length(obj), &b64);
189	if (ret < 0 || b64 == NULL)
190	    return ENOMEM;
191
192	if (j->flags & HEIM_JSON_F_NO_DATA_DICT) {
193	    indent(j);
194	    j->out(j->ctx, "\"");
195	    j->out(j->ctx, b64); /* base64-encode; hope there's no aliasing */
196	    j->out(j->ctx, "\"");
197	    free(b64);
198	} else {
199	    /*
200	     * JSON has no way to represent binary data, therefore the
201	     * following is a Heimdal-specific convention.
202	     *
203	     * We encode binary data as a dict with a single very magic
204	     * key with a base64-encoded value.  The magic key includes
205	     * a uuid, so we're not likely to alias accidentally.
206	     */
207	    d = heim_dict_create(2);
208	    if (d == NULL) {
209		free(b64);
210		return ENOMEM;
211	    }
212	    v = heim_string_create(b64);
213	    free(b64);
214	    if (v == NULL) {
215		heim_release(d);
216		return ENOMEM;
217	    }
218	    ret = heim_dict_set_value(d, heim_tid_data_uuid_key, v);
219	    heim_release(v);
220	    if (ret) {
221		heim_release(d);
222		return ENOMEM;
223	    }
224	    ret = base2json(d, j);
225	    heim_release(d);
226	    if (ret)
227		return ret;
228	}
229	break;
230    }
231
232    case HEIM_TID_NUMBER: {
233	char num[32];
234	indent(j);
235	snprintf(num, sizeof (num), "%d", heim_number_get_int(obj));
236	j->out(j->ctx, num);
237	break;
238    }
239    case HEIM_TID_NULL:
240	indent(j);
241	j->out(j->ctx, "null");
242	break;
243    case HEIM_TID_BOOL:
244	indent(j);
245	j->out(j->ctx, heim_bool_val(obj) ? "true" : "false");
246	break;
247    default:
248	return 1;
249    }
250    return 0;
251}
252
253static int
254heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags,
255	       void (*out)(void *, const char *))
256{
257    struct twojson j;
258
259    if (flags & HEIM_JSON_F_STRICT_STRINGS)
260	return ENOTSUP; /* Sorry, not yet! */
261
262    heim_base_once_f(&heim_json_once, NULL, json_init_once);
263
264    j.indent = 0;
265    j.ctx = ctx;
266    j.out = out;
267    j.flags = flags;
268    j.ret = 0;
269    j.first = 1;
270
271    return base2json(obj, &j);
272}
273
274
275/*
276 *
277 */
278
279struct parse_ctx {
280    unsigned long lineno;
281    const uint8_t *p;
282    const uint8_t *pstart;
283    const uint8_t *pend;
284    heim_error_t error;
285    size_t depth;
286    heim_json_flags_t flags;
287};
288
289
290static heim_object_t
291parse_value(struct parse_ctx *ctx);
292
293/*
294 * This function eats whitespace, but, critically, it also succeeds
295 * only if there's anything left to parse.
296 */
297static int
298white_spaces(struct parse_ctx *ctx)
299{
300    while (ctx->p < ctx->pend) {
301	uint8_t c = *ctx->p;
302	if (c == ' ' || c == '\t' || c == '\r') {
303
304	} else if (c == '\n') {
305	    ctx->lineno++;
306	} else
307	    return 0;
308	(ctx->p)++;
309    }
310    return -1;
311}
312
313static int
314is_number(uint8_t n)
315{
316    return ('0' <= n && n <= '9');
317}
318
319static heim_number_t
320parse_number(struct parse_ctx *ctx)
321{
322    int number = 0, neg = 1;
323
324    if (ctx->p >= ctx->pend)
325	return NULL;
326
327    if (*ctx->p == '-') {
328	if (ctx->p + 1 >= ctx->pend)
329	    return NULL;
330	neg = -1;
331	ctx->p += 1;
332    }
333
334    while (ctx->p < ctx->pend) {
335	if (is_number(*ctx->p)) {
336	    number = (number * 10) + (*ctx->p - '0');
337	} else {
338	    break;
339	}
340	ctx->p += 1;
341    }
342
343    return heim_number_create(number * neg);
344}
345
346static heim_string_t
347parse_string(struct parse_ctx *ctx)
348{
349    const uint8_t *start;
350    int quote = 0;
351
352    if (ctx->flags & HEIM_JSON_F_STRICT_STRINGS) {
353	ctx->error = heim_error_create(EINVAL, "Strict JSON string encoding "
354				       "not yet supported");
355	return NULL;
356    }
357
358    if (*ctx->p != '"') {
359	ctx->error = heim_error_create(EINVAL, "Expected a JSON string but "
360				       "found something else at line %lu",
361				       ctx->lineno);
362	return NULL;
363    }
364    start = ++ctx->p;
365
366    while (ctx->p < ctx->pend) {
367	if (*ctx->p == '\n') {
368	    ctx->lineno++;
369	} else if (*ctx->p == '\\') {
370	    if (ctx->p + 1 == ctx->pend)
371		goto out;
372	    ctx->p++;
373	    quote = 1;
374	} else if (*ctx->p == '"') {
375	    heim_object_t o;
376
377	    if (quote) {
378		char *p0, *p;
379		p = p0 = malloc(ctx->p - start);
380		if (p == NULL)
381		    goto out;
382		while (start < ctx->p) {
383		    if (*start == '\\') {
384			start++;
385			/* XXX validate quoted char */
386		    }
387		    *p++ = *start++;
388		}
389		o = heim_string_create_with_bytes(p0, p - p0);
390		free(p0);
391	    } else {
392		o = heim_string_create_with_bytes(start, ctx->p - start);
393		if (o == NULL) {
394		    ctx->error = heim_error_create_enomem();
395		    return NULL;
396		}
397
398		/* If we can decode as base64, then let's */
399		if (ctx->flags & HEIM_JSON_F_TRY_DECODE_DATA) {
400		    void *buf;
401		    size_t len;
402		    char *s;
403
404		    s = heim_string_copy_utf8(o);
405		    len = strlen(s);
406
407		    if (len >= 4 && strspn(s, base64_chars) >= len - 2) {
408			buf = malloc(len);
409			if (buf == NULL) {
410			    free(s);
411			    heim_release(o);
412			    ctx->error = heim_error_create_enomem();
413			    return NULL;
414			}
415			len = base64_decode(s, buf);
416			if (len == (size_t)-1) {
417			    free(s);
418			    free(buf);
419			    return o;
420			}
421			heim_release(o);
422			o = heim_data_create(buf, len);
423			free(buf);
424		    }
425		    free(s);
426		}
427	    }
428	    ctx->p += 1;
429
430	    return o;
431	}
432	ctx->p += 1;
433    }
434    out:
435    ctx->error = heim_error_create(EINVAL, "ran out of string");
436    return NULL;
437}
438
439static int
440parse_pair(heim_dict_t dict, struct parse_ctx *ctx)
441{
442    heim_string_t key;
443    heim_object_t value;
444
445    if (white_spaces(ctx))
446	return -1;
447
448    if (*ctx->p == '}') {
449	ctx->p++;
450	return 0;
451    }
452
453    if (ctx->flags & HEIM_JSON_F_STRICT_DICT)
454	/* JSON allows only string keys */
455	key = parse_string(ctx);
456    else
457	/* heim_dict_t allows any heim_object_t as key */
458	key = parse_value(ctx);
459    if (key == NULL)
460	/* Even heim_dict_t does not allow C NULLs as keys though! */
461	return -1;
462
463    if (white_spaces(ctx)) {
464	heim_release(key);
465	return -1;
466    }
467
468    if (*ctx->p != ':') {
469	heim_release(key);
470	return -1;
471    }
472
473    ctx->p += 1; /* safe because we call white_spaces() next */
474
475    if (white_spaces(ctx)) {
476	heim_release(key);
477	return -1;
478    }
479
480    value = parse_value(ctx);
481    if (value == NULL &&
482	(ctx->error != NULL || (ctx->flags & HEIM_JSON_F_NO_C_NULL))) {
483	if (ctx->error == NULL)
484	    ctx->error = heim_error_create(EINVAL, "Invalid JSON encoding");
485	heim_release(key);
486	return -1;
487    }
488    heim_dict_set_value(dict, key, value);
489    heim_release(key);
490    heim_release(value);
491
492    if (white_spaces(ctx))
493	return -1;
494
495    if (*ctx->p == '}') {
496	/*
497	 * Return 1 but don't consume the '}' so we can count the one
498	 * pair in a one-pair dict
499	 */
500	return 1;
501    } else if (*ctx->p == ',') {
502	ctx->p++;
503	return 1;
504    }
505    return -1;
506}
507
508static heim_dict_t
509parse_dict(struct parse_ctx *ctx)
510{
511    heim_dict_t dict;
512    size_t count = 0;
513    int ret;
514
515    heim_assert(*ctx->p == '{', "string doesn't start with {");
516
517    dict = heim_dict_create(11);
518    if (dict == NULL) {
519	ctx->error = heim_error_create_enomem();
520	return NULL;
521    }
522
523    ctx->p += 1; /* safe because parse_pair() calls white_spaces() first */
524
525    while ((ret = parse_pair(dict, ctx)) > 0)
526	count++;
527    if (ret < 0) {
528	heim_release(dict);
529	return NULL;
530    }
531    if (count == 1 && !(ctx->flags & HEIM_JSON_F_NO_DATA_DICT)) {
532	heim_object_t v = heim_dict_copy_value(dict, heim_tid_data_uuid_key);
533
534	/*
535	 * Binary data encoded as a dict with a single magic key with
536	 * base64-encoded value?  Decode as heim_data_t.
537	 */
538	if (v != NULL && heim_get_tid(v) == HEIM_TID_STRING) {
539	    char *str;
540	    void *buf;
541	    int len;
542
543	    str = heim_string_copy_utf8(v);
544	    if (str == NULL) {
545		heim_release(dict);
546		heim_release(v);
547		ctx->error = heim_error_create_enomem();
548		return NULL;
549	    }
550	    buf = malloc(strlen(str));
551	    if (buf == NULL) {
552		free(str);
553		heim_release(dict);
554		heim_release(v);
555		ctx->error = heim_error_create_enomem();
556		return NULL;
557	    }
558	    len = base64_decode(str, buf);
559	    free(str);
560	    heim_release(v);
561	    if (len == -1) {
562		free(buf);
563		return dict; /* assume aliasing accident */
564	    }
565	    heim_release(dict);
566	    dict = (heim_dict_t)heim_data_create(buf, len);
567	    free(buf);
568	    return dict;
569	}
570    }
571    return dict;
572}
573
574static int
575parse_item(heim_array_t array, struct parse_ctx *ctx)
576{
577    heim_object_t value;
578
579    if (white_spaces(ctx))
580	return -1;
581
582    if (*ctx->p == ']') {
583	ctx->p++; /* safe because parse_value() calls white_spaces() first */
584	return 0;
585    }
586
587    value = parse_value(ctx);
588    if (value == NULL &&
589	(ctx->error || (ctx->flags & HEIM_JSON_F_NO_C_NULL)))
590	return -1;
591
592    heim_array_append_value(array, value);
593    heim_release(value);
594
595    if (white_spaces(ctx))
596	return -1;
597
598    if (*ctx->p == ']') {
599	ctx->p++;
600	return 0;
601    } else if (*ctx->p == ',') {
602	ctx->p++;
603	return 1;
604    }
605    return -1;
606}
607
608static heim_array_t
609parse_array(struct parse_ctx *ctx)
610{
611    heim_array_t array = heim_array_create();
612    int ret;
613
614    heim_assert(*ctx->p == '[', "array doesn't start with [");
615    ctx->p += 1;
616
617    while ((ret = parse_item(array, ctx)) > 0)
618	;
619    if (ret < 0) {
620	heim_release(array);
621	return NULL;
622    }
623    return array;
624}
625
626static heim_object_t
627parse_value(struct parse_ctx *ctx)
628{
629    size_t len;
630    heim_object_t o;
631
632    if (white_spaces(ctx))
633	return NULL;
634
635    if (*ctx->p == '"') {
636	return parse_string(ctx);
637    } else if (*ctx->p == '{') {
638	if (ctx->depth-- == 1) {
639	    ctx->error = heim_error_create(EINVAL, "JSON object too deep");
640	    return NULL;
641	}
642	o = parse_dict(ctx);
643	ctx->depth++;
644	return o;
645    } else if (*ctx->p == '[') {
646	if (ctx->depth-- == 1) {
647	    ctx->error = heim_error_create(EINVAL, "JSON object too deep");
648	    return NULL;
649	}
650	o = parse_array(ctx);
651	ctx->depth++;
652	return o;
653    } else if (is_number(*ctx->p) || *ctx->p == '-') {
654	return parse_number(ctx);
655    }
656
657    len = ctx->pend - ctx->p;
658
659    if ((ctx->flags & HEIM_JSON_F_NO_C_NULL) == 0 &&
660	len >= 6 && memcmp(ctx->p, "<NULL>", 6) == 0) {
661	ctx->p += 6;
662	return heim_null_create();
663    } else if (len >= 4 && memcmp(ctx->p, "null", 4) == 0) {
664	ctx->p += 4;
665	return heim_null_create();
666    } else if (len >= 4 && strncasecmp((char *)ctx->p, "true", 4) == 0) {
667	ctx->p += 4;
668	return heim_bool_create(1);
669    } else if (len >= 5 && strncasecmp((char *)ctx->p, "false", 5) == 0) {
670	ctx->p += 5;
671	return heim_bool_create(0);
672    }
673
674    ctx->error = heim_error_create(EINVAL, "unknown char %c at %lu line %lu",
675				   (char)*ctx->p,
676				   (unsigned long)(ctx->p - ctx->pstart),
677				   ctx->lineno);
678    return NULL;
679}
680
681
682heim_object_t
683heim_json_create(const char *string, size_t max_depth, heim_json_flags_t flags,
684		 heim_error_t *error)
685{
686    return heim_json_create_with_bytes(string, strlen(string), max_depth, flags,
687				       error);
688}
689
690heim_object_t
691heim_json_create_with_bytes(const void *data, size_t length, size_t max_depth,
692			    heim_json_flags_t flags, heim_error_t *error)
693{
694    struct parse_ctx ctx;
695    heim_object_t o;
696
697    heim_base_once_f(&heim_json_once, NULL, json_init_once);
698
699    ctx.lineno = 1;
700    ctx.p = data;
701    ctx.pstart = data;
702    ctx.pend = ((uint8_t *)data) + length;
703    ctx.error = NULL;
704    ctx.flags = flags;
705    ctx.depth = max_depth;
706
707    o = parse_value(&ctx);
708
709    if (o == NULL && error) {
710	*error = ctx.error;
711    } else if (ctx.error) {
712	heim_release(ctx.error);
713    }
714
715    return o;
716}
717
718
719static void
720show_printf(void *ctx, const char *str)
721{
722    if (str == NULL)
723	return;
724    fprintf(ctx, "%s", str);
725}
726
727/**
728 * Dump a heimbase object to stderr (useful from the debugger!)
729 *
730 * @param obj object to dump using JSON or JSON-like format
731 *
732 * @addtogroup heimbase
733 */
734void
735heim_show(heim_object_t obj)
736{
737    heim_base2json(obj, stderr, HEIM_JSON_F_NO_DATA_DICT, show_printf);
738}
739
740static void
741strbuf_add(void *ctx, const char *str)
742{
743    struct strbuf *strbuf = ctx;
744    size_t len;
745
746    if (strbuf->enomem)
747	return;
748
749    if (str == NULL) {
750	/*
751	 * Eat the last '\n'; this is used when formatting dict pairs
752	 * and array items so that the ',' separating them is never
753	 * preceded by a '\n'.
754	 */
755	if (strbuf->len > 0 && strbuf->str[strbuf->len - 1] == '\n')
756	    strbuf->len--;
757	return;
758    }
759
760    len = strlen(str);
761    if ((len + 1) > (strbuf->alloced - strbuf->len)) {
762	size_t new_len = strbuf->alloced + (strbuf->alloced >> 2) + len + 1;
763	char *s;
764
765	s = realloc(strbuf->str, new_len);
766	if (s == NULL) {
767	    strbuf->enomem = 1;
768	    return;
769	}
770	strbuf->str = s;
771	strbuf->alloced = new_len;
772    }
773    /* +1 so we copy the NUL */
774    (void) memcpy(strbuf->str + strbuf->len, str, len + 1);
775    strbuf->len += len;
776    if (strbuf->str[strbuf->len - 1] == '\n' &&
777	strbuf->flags & HEIM_JSON_F_ONE_LINE)
778	strbuf->len--;
779}
780
781#define STRBUF_INIT_SZ 64
782
783heim_string_t
784heim_json_copy_serialize(heim_object_t obj, heim_json_flags_t flags, heim_error_t *error)
785{
786    heim_string_t str;
787    struct strbuf strbuf;
788    int ret;
789
790    if (error)
791	*error = NULL;
792
793    memset(&strbuf, 0, sizeof (strbuf));
794    strbuf.str = malloc(STRBUF_INIT_SZ);
795    if (strbuf.str == NULL) {
796	if (error)
797	    *error = heim_error_create_enomem();
798	return NULL;
799    }
800    strbuf.len = 0;
801    strbuf.alloced = STRBUF_INIT_SZ;
802    strbuf.str[0] = '\0';
803    strbuf.flags = flags;
804
805    ret = heim_base2json(obj, &strbuf, flags, strbuf_add);
806    if (ret || strbuf.enomem) {
807	if (error) {
808	    if (strbuf.enomem || ret == ENOMEM)
809		*error = heim_error_create_enomem();
810	    else
811		*error = heim_error_create(1, "Impossible to JSON-encode "
812					   "object");
813	}
814	free(strbuf.str);
815	return NULL;
816    }
817    if (flags & HEIM_JSON_F_ONE_LINE) {
818	strbuf.flags &= ~HEIM_JSON_F_ONE_LINE;
819	strbuf_add(&strbuf, "\n");
820    }
821    str = heim_string_create(strbuf.str);
822    free(strbuf.str);
823    if (str == NULL) {
824	if (error)
825	    *error = heim_error_create_enomem();
826    }
827    return str;
828}
829