1/*-
2 * Copyright (c) 2010 Alistair Crooks <agc@NetBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include <sys/types.h>
26
27#include <inttypes.h>
28#include <regex.h>
29#include <stdarg.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <unistd.h>
34
35#include "mj.h"
36#include "defs.h"
37
38/* save 'n' chars of 's' in malloc'd memory */
39static char *
40strnsave(const char *s, int n, unsigned encoded)
41{
42	char	*newc;
43	char	*cp;
44	int	 i;
45
46	if (n < 0) {
47		n = (int)strlen(s);
48	}
49	NEWARRAY(char, cp, n + n + 1, "strnsave", return NULL);
50	if (encoded) {
51		newc = cp;
52		for (i = 0 ; i < n ; i++) {
53			if ((uint8_t)*s == 0xac) {
54				*newc++ = (char)0xac;
55				*newc++ = '1';
56				s += 1;
57			} else if (*s == '"') {
58				*newc++ = (char)0xac;
59				*newc++ = '2';
60				s += 1;
61			} else if (*s == 0x0) {
62				*newc++ = (char)0xac;
63				*newc++ = '0';
64				s += 1;
65			} else {
66				*newc++ = *s++;
67			}
68		}
69		*newc = 0x0;
70	} else {
71		(void) memcpy(cp, s, (unsigned)n);
72		cp[n] = 0x0;
73	}
74	return cp;
75}
76
77/* look in an object for the item */
78static int
79findentry(mj_t *atom, const char *name, const unsigned from, const unsigned incr)
80{
81	unsigned	i;
82
83	for (i = from ; i < atom->c ; i += incr) {
84		if (strcmp(name, atom->value.v[i].value.s) == 0) {
85			return i;
86		}
87	}
88	return -1;
89}
90
91/* create a real number */
92static void
93create_number(mj_t *atom, double d)
94{
95	char	number[128];
96
97	atom->type = MJ_NUMBER;
98	atom->c = snprintf(number, sizeof(number), "%g", d);
99	atom->value.s = strnsave(number, (int)atom->c, MJ_HUMAN);
100}
101
102/* create an integer */
103static void
104create_integer(mj_t *atom, int64_t i)
105{
106	char	number[128];
107
108	atom->type = MJ_NUMBER;
109	atom->c = snprintf(number, sizeof(number), "%" PRIi64, i);
110	atom->value.s = strnsave(number, (int)atom->c, MJ_HUMAN);
111}
112
113/* create a string */
114static void
115create_string(mj_t *atom, const char *s, ssize_t len)
116{
117	atom->type = MJ_STRING;
118	atom->value.s = strnsave(s, (int)len, MJ_JSON_ENCODE);
119	atom->c = (unsigned)strlen(atom->value.s);
120}
121
122#define MJ_OPEN_BRACKET		(MJ_OBJECT + 1)		/* 8 */
123#define MJ_CLOSE_BRACKET	(MJ_OPEN_BRACKET + 1)	/* 9 */
124#define MJ_OPEN_BRACE		(MJ_CLOSE_BRACKET + 1)	/* 10 */
125#define MJ_CLOSE_BRACE		(MJ_OPEN_BRACE + 1)	/* 11 */
126#define MJ_COLON		(MJ_CLOSE_BRACE + 1)	/* 12 */
127#define MJ_COMMA		(MJ_COLON + 1)		/* 13 */
128
129/* return the token type, and start and finish locations in string */
130static int
131gettok(const char *s, int *from, int *to, int *tok)
132{
133	static regex_t	tokregex;
134	regmatch_t	matches[15];
135	static int	compiled;
136
137	if (!compiled) {
138		compiled = 1;
139		(void) regcomp(&tokregex,
140			"[ \t\r\n]*(([+-]?[0-9]{1,21}(\\.[0-9]*)?([eE][-+][0-9]+)?)|"
141			"(\"([^\"]|\\\\.)*\")|(null)|(false)|(true)|([][{}:,]))",
142			REG_EXTENDED);
143	}
144	if (regexec(&tokregex, &s[*from = *to], 15, matches, 0) != 0) {
145		return *tok = -1;
146	}
147	*to = *from + (int)(matches[1].rm_eo);
148	*tok = (matches[2].rm_so >= 0) ? MJ_NUMBER :
149		(matches[5].rm_so >= 0) ? MJ_STRING :
150		(matches[7].rm_so >= 0) ? MJ_NULL :
151		(matches[8].rm_so >= 0) ? MJ_FALSE :
152		(matches[9].rm_so >= 0) ? MJ_TRUE :
153		(matches[10].rm_so < 0) ? -1 :
154			(s[*from + (int)(matches[10].rm_so)] == '[') ? MJ_OPEN_BRACKET :
155			(s[*from + (int)(matches[10].rm_so)] == ']') ? MJ_CLOSE_BRACKET :
156			(s[*from + (int)(matches[10].rm_so)] == '{') ? MJ_OPEN_BRACE :
157			(s[*from + (int)(matches[10].rm_so)] == '}') ? MJ_CLOSE_BRACE :
158			(s[*from + (int)(matches[10].rm_so)] == ':') ? MJ_COLON :
159				MJ_COMMA;
160	*from += (int)(matches[1].rm_so);
161	return *tok;
162}
163
164/* minor function used to indent a JSON field */
165static void
166indent(FILE *fp, unsigned depth, const char *trailer)
167{
168	unsigned	i;
169
170	for (i = 0 ; i < depth ; i++) {
171		(void) fprintf(fp, "    ");
172	}
173	if (trailer) {
174		(void) fprintf(fp, "%s", trailer);
175	}
176}
177
178/***************************************************************************/
179
180/* return the number of entries in the array */
181int
182mj_arraycount(mj_t *atom)
183{
184	return atom->c;
185}
186
187/* create a new JSON node */
188int
189mj_create(mj_t *atom, const char *type, ...)
190{
191	va_list	 args;
192	ssize_t	 len;
193	char	*s;
194
195	if (strcmp(type, "false") == 0) {
196		atom->type = MJ_FALSE;
197		atom->c = 0;
198	} else if (strcmp(type, "true") == 0) {
199		atom->type = MJ_TRUE;
200		atom->c = 1;
201	} else if (strcmp(type, "null") == 0) {
202		atom->type = MJ_NULL;
203	} else if (strcmp(type, "number") == 0) {
204		va_start(args, type);
205		create_number(atom, (double)va_arg(args, double));
206		va_end(args);
207	} else if (strcmp(type, "integer") == 0) {
208		va_start(args, type);
209		create_integer(atom, (int64_t)va_arg(args, int64_t));
210		va_end(args);
211	} else if (strcmp(type, "string") == 0) {
212		va_start(args, type);
213		s = (char *)va_arg(args, char *);
214		len = (size_t)va_arg(args, size_t);
215		va_end(args);
216		create_string(atom, s, len);
217	} else if (strcmp(type, "array") == 0) {
218		atom->type = MJ_ARRAY;
219	} else if (strcmp(type, "object") == 0) {
220		atom->type = MJ_OBJECT;
221	} else {
222		(void) fprintf(stderr, "weird type '%s'\n", type);
223		return 0;
224	}
225	return 1;
226}
227
228/* put a JSON tree into a text string */
229int
230mj_snprint(char *buf, size_t size, mj_t *atom, int encoded)
231{
232	unsigned	 i;
233	char		*s;
234	char		*bp;
235	int		 cc;
236
237	switch(atom->type) {
238	case MJ_NULL:
239		return snprintf(buf, size, "null");
240	case MJ_FALSE:
241		return snprintf(buf, size, "false");
242	case MJ_TRUE:
243		return snprintf(buf, size, "true");
244	case MJ_NUMBER:
245		return snprintf(buf, size, "%s", atom->value.s);
246	case MJ_STRING:
247		if (encoded) {
248			return snprintf(buf, size, "\"%s\"", atom->value.s);
249		}
250		for (bp = buf, *bp++ = '"', s = atom->value.s ;
251		     (size_t)(bp - buf) < size && (unsigned)(s - atom->value.s) < atom->c ; ) {
252			if ((uint8_t)*s == 0xac) {
253				switch(s[1]) {
254				case '0':
255					*bp++ = 0x0;
256					s += 2;
257					break;
258				case '1':
259					*bp++ = (char)0xac;
260					s += 2;
261					break;
262				case '2':
263					*bp++ = '"';
264					s += 2;
265					break;
266				default:
267					(void) fprintf(stderr, "unrecognised character '%02x'\n", (uint8_t)s[1]);
268					s += 1;
269					break;
270				}
271			} else {
272				*bp++ = *s++;
273			}
274		}
275		*bp++ = '"';
276		*bp = 0x0;
277		return (int)(bp - buf) - 1;
278	case MJ_ARRAY:
279		cc = snprintf(buf, size, "[ ");
280		for (i = 0 ; i < atom->c ; i++) {
281			cc += mj_snprint(&buf[cc], size - cc, &atom->value.v[i], encoded);
282			if (i < atom->c - 1) {
283				cc += snprintf(&buf[cc], size - cc, ", ");
284			}
285		}
286		return cc + snprintf(&buf[cc], size - cc, "]\n");
287	case MJ_OBJECT:
288		cc = snprintf(buf, size, "{ ");
289		for (i = 0 ; i < atom->c ; i += 2) {
290			cc += mj_snprint(&buf[cc], size - cc, &atom->value.v[i], encoded);
291			cc += snprintf(&buf[cc], size - cc, ":");
292			cc += mj_snprint(&buf[cc], size - cc, &atom->value.v[i + 1], encoded);
293			if (i + 1 < atom->c - 1) {
294				cc += snprintf(&buf[cc], size - cc, ", ");
295			}
296		}
297		return cc + snprintf(&buf[cc], size - cc, "}\n");
298	default:
299		(void) fprintf(stderr, "mj_snprint: weird type %d\n", atom->type);
300		return 0;
301	}
302}
303
304/* allocate and print the atom */
305int
306mj_asprint(char **buf, mj_t *atom, int encoded)
307{
308	int	 size;
309
310	size = mj_string_size(atom);
311	if ((*buf = calloc(1, (unsigned)(size + 1))) == NULL) {
312		return -1;
313	}
314	return mj_snprint(*buf, (unsigned)(size + 1), atom, encoded) + 1;
315}
316
317/* read into a JSON tree from a string */
318int
319mj_parse(mj_t *atom, const char *s, int *from, int *to, int *tok)
320{
321	int	i;
322
323	switch(atom->type = *tok = gettok(s, from, to, tok)) {
324	case MJ_NUMBER:
325		atom->value.s = strnsave(&s[*from], *to - *from, MJ_JSON_ENCODE);
326		atom->c = atom->size = (unsigned)strlen(atom->value.s);
327		return gettok(s, from, to, tok);
328	case MJ_STRING:
329		atom->value.s = strnsave(&s[*from + 1], *to - *from - 2, MJ_HUMAN);
330		atom->c = atom->size = (unsigned)strlen(atom->value.s);
331		return gettok(s, from, to, tok);
332	case MJ_NULL:
333	case MJ_FALSE:
334	case MJ_TRUE:
335		atom->c = (unsigned)*to;
336		return gettok(s, from, to, tok);
337	case MJ_OPEN_BRACKET:
338		mj_create(atom, "array");
339		ALLOC(mj_t, atom->value.v, atom->size, atom->c, 10, 10, "mj_parse()", return 0);
340		while (mj_parse(&atom->value.v[atom->c++], s, from, to, tok) >= 0 && *tok != MJ_CLOSE_BRACKET) {
341			if (*tok != MJ_COMMA) {
342				(void) fprintf(stderr, "1. expected comma (got %d) at '%s'\n", *tok, &s[*from]);
343				break;
344			}
345			ALLOC(mj_t, atom->value.v, atom->size, atom->c, 10, 10, "mj_parse()", return 0);
346		}
347		return gettok(s, from, to, tok);
348	case MJ_OPEN_BRACE:
349		mj_create(atom, "object");
350		ALLOC(mj_t, atom->value.v, atom->size, atom->c, 10, 10, "mj_parse()", return 0);
351		for (i = 0 ; mj_parse(&atom->value.v[atom->c++], s, from, to, tok) >= 0 && *tok != MJ_CLOSE_BRACE ; i++) {
352			if (((i % 2) == 0 && *tok != MJ_COLON) || ((i % 2) == 1 && *tok != MJ_COMMA)) {
353				(void) fprintf(stderr, "2. expected comma (got %d) at '%s'\n", *tok, &s[*from]);
354				break;
355			}
356			ALLOC(mj_t, atom->value.v, atom->size, atom->c, 10, 10, "mj_parse()", return 0);
357		}
358		return gettok(s, from, to, tok);
359	default:
360		return *tok;
361	}
362}
363
364/* return the index of the item which corresponds to the name in the array */
365int
366mj_object_find(mj_t *atom, const char *name, const unsigned from, const unsigned incr)
367{
368	return findentry(atom, name, from, incr);
369}
370
371/* find an atom in a composite mj JSON node */
372mj_t *
373mj_get_atom(mj_t *atom, ...)
374{
375	unsigned	 i;
376	va_list		 args;
377	char		*name;
378	int		 n;
379
380	switch(atom->type) {
381	case MJ_ARRAY:
382		va_start(args, atom);
383		i = va_arg(args, int);
384		va_end(args);
385		return (i < atom->c) ? &atom->value.v[i] : NULL;
386	case MJ_OBJECT:
387		va_start(args, atom);
388		name = va_arg(args, char *);
389		va_end(args);
390		return ((n = findentry(atom, name, 0, 2)) >= 0) ? &atom->value.v[n + 1] : NULL;
391	default:
392		return NULL;
393	}
394}
395
396/* perform a deep copy on an mj JSON atom */
397int
398mj_deepcopy(mj_t *dst, mj_t *src)
399{
400	unsigned	i;
401
402	switch(src->type) {
403	case MJ_FALSE:
404	case MJ_TRUE:
405	case MJ_NULL:
406		(void) memcpy(dst, src, sizeof(*dst));
407		return 1;
408	case MJ_STRING:
409	case MJ_NUMBER:
410		(void) memcpy(dst, src, sizeof(*dst));
411		dst->value.s = strnsave(src->value.s, -1, MJ_HUMAN);
412		dst->c = dst->size = (unsigned)strlen(dst->value.s);
413		return 1;
414	case MJ_ARRAY:
415	case MJ_OBJECT:
416		(void) memcpy(dst, src, sizeof(*dst));
417		NEWARRAY(mj_t, dst->value.v, dst->size, "mj_deepcopy()", return 0);
418		for (i = 0 ; i < src->c ; i++) {
419			if (!mj_deepcopy(&dst->value.v[i], &src->value.v[i])) {
420				return 0;
421			}
422		}
423		return 1;
424	default:
425		(void) fprintf(stderr, "weird type '%d'\n", src->type);
426		return 0;
427	}
428}
429
430/* do a deep delete on the object */
431void
432mj_delete(mj_t *atom)
433{
434	unsigned	i;
435
436	switch(atom->type) {
437	case MJ_STRING:
438	case MJ_NUMBER:
439		free(atom->value.s);
440		break;
441	case MJ_ARRAY:
442	case MJ_OBJECT:
443		for (i = 0 ; i < atom->c ; i++) {
444			mj_delete(&atom->value.v[i]);
445		}
446		/* XXX - agc - causing problems? free(atom->value.v); */
447		break;
448	default:
449		break;
450	}
451}
452
453/* return the string size needed for the textual output of the JSON node */
454int
455mj_string_size(mj_t *atom)
456{
457	unsigned	i;
458	int		cc;
459
460	switch(atom->type) {
461	case MJ_NULL:
462	case MJ_TRUE:
463		return 4;
464	case MJ_FALSE:
465		return 5;
466	case MJ_NUMBER:
467		return atom->c;
468	case MJ_STRING:
469		return atom->c + 2;
470	case MJ_ARRAY:
471		for (cc = 2, i = 0 ; i < atom->c ; i++) {
472			cc += mj_string_size(&atom->value.v[i]);
473			if (i < atom->c - 1) {
474				cc += 2;
475			}
476		}
477		return cc + 1 + 1;
478	case MJ_OBJECT:
479		for (cc = 2, i = 0 ; i < atom->c ; i += 2) {
480			cc += mj_string_size(&atom->value.v[i]) + 1 + mj_string_size(&atom->value.v[i + 1]);
481			if (i + 1 < atom->c - 1) {
482				cc += 2;
483			}
484		}
485		return cc + 1 + 1;
486	default:
487		(void) fprintf(stderr, "mj_string_size: weird type %d\n", atom->type);
488		return 0;
489	}
490}
491
492/* create a new atom, and append it to the array or object */
493int
494mj_append(mj_t *atom, const char *type, ...)
495{
496	va_list	 args;
497	ssize_t	 len;
498	char	*s;
499
500	if (atom->type != MJ_ARRAY && atom->type != MJ_OBJECT) {
501		return 0;
502	}
503	ALLOC(mj_t, atom->value.v, atom->size, atom->c, 10, 10, "mj_append()", return 0);
504	va_start(args, type);
505	if (strcmp(type, "string") == 0) {
506		s = (char *)va_arg(args, char *);
507		len = (ssize_t)va_arg(args, ssize_t);
508		create_string(&atom->value.v[atom->c++], s, len);
509	} else if (strcmp(type, "integer") == 0) {
510		create_integer(&atom->value.v[atom->c++], (int64_t)va_arg(args, int64_t));
511	} else if (strcmp(type, "object") == 0 || strcmp(type, "array") == 0) {
512		mj_deepcopy(&atom->value.v[atom->c++], (mj_t *)va_arg(args, mj_t *));
513	} else {
514		(void) fprintf(stderr, "mj_append: weird type '%s'\n", type);
515	}
516	va_end(args);
517	return 1;
518}
519
520/* append a field to an object */
521int
522mj_append_field(mj_t *atom, const char *name, const char *type, ...)
523{
524	va_list	 args;
525	ssize_t	 len;
526	char	*s;
527
528	if (atom->type != MJ_OBJECT) {
529		return 0;
530	}
531	mj_append(atom, "string", name, -1);
532	ALLOC(mj_t, atom->value.v, atom->size, atom->c, 10, 10, "mj_append_field()", return 0);
533	va_start(args, type);
534	if (strcmp(type, "string") == 0) {
535		s = (char *)va_arg(args, char *);
536		len = (ssize_t)va_arg(args, ssize_t);
537		create_string(&atom->value.v[atom->c++], s, len);
538	} else if (strcmp(type, "integer") == 0) {
539		create_integer(&atom->value.v[atom->c++], (int64_t)va_arg(args, int64_t));
540	} else if (strcmp(type, "object") == 0 || strcmp(type, "array") == 0) {
541		mj_deepcopy(&atom->value.v[atom->c++], (mj_t *)va_arg(args, mj_t *));
542	} else {
543		(void) fprintf(stderr, "mj_append_field: weird type '%s'\n", type);
544	}
545	va_end(args);
546	return 1;
547}
548
549/* make sure a JSON object is politically correct */
550int
551mj_lint(mj_t *obj)
552{
553	unsigned	i;
554	int		ret;
555
556	switch(obj->type) {
557	case MJ_NULL:
558	case MJ_FALSE:
559	case MJ_TRUE:
560		if (obj->value.s != NULL) {
561			(void) fprintf(stderr, "null/false/true: non zero string\n");
562			return 0;
563		}
564		return 1;
565	case MJ_NUMBER:
566	case MJ_STRING:
567		if (obj->c > obj->size) {
568			(void) fprintf(stderr, "string/number lint c (%u) > size (%u)\n", obj->c, obj->size);
569			return 0;
570		}
571		return 1;
572	case MJ_ARRAY:
573	case MJ_OBJECT:
574		if (obj->c > obj->size) {
575			(void) fprintf(stderr, "array/object lint c (%u) > size (%u)\n", obj->c, obj->size);
576			return 0;
577		}
578		for (ret = 1, i = 0 ; i < obj->c ; i++) {
579			if (!mj_lint(&obj->value.v[i])) {
580				(void) fprintf(stderr, "array/object lint found at %d of %p\n", i, obj);
581				ret = 0;
582			}
583		}
584		return ret;
585	default:
586		(void) fprintf(stderr, "problem type %d in %p\n", obj->type, obj);
587		return 0;
588	}
589}
590
591/* pretty-print a JSON struct - can be called recursively */
592int
593mj_pretty(mj_t *mj, void *vp, unsigned depth, const char *trailer)
594{
595	unsigned	 i;
596	FILE		*fp;
597	char		*s;
598
599	fp = (FILE *)vp;
600	switch(mj->type) {
601	case MJ_NUMBER:
602	case MJ_TRUE:
603	case MJ_FALSE:
604	case MJ_NULL:
605		indent(fp, depth, mj->value.s);
606		break;
607	case MJ_STRING:
608		indent(fp, depth, NULL);
609		mj_asprint(&s, mj, MJ_HUMAN);
610		(void) fprintf(fp, "\"%s\"", s);
611		free(s);
612		break;
613	case MJ_ARRAY:
614		indent(fp, depth, "[\n");
615		for (i = 0 ; i < mj->c ; i++) {
616			mj_pretty(&mj->value.v[i], fp, depth + 1, (i < mj->c - 1) ? ",\n" : "\n");
617		}
618		indent(fp, depth, "]");
619		break;
620	case MJ_OBJECT:
621		indent(fp, depth, "{\n");
622		for (i = 0 ; i < mj->c ; i += 2) {
623			mj_pretty(&mj->value.v[i], fp, depth + 1, " : ");
624			mj_pretty(&mj->value.v[i + 1], fp, 0, (i < mj->c - 2) ? ",\n" : "\n");
625		}
626		indent(fp, depth, "}");
627		break;
628	}
629	indent(fp, 0, trailer);
630	return 1;
631}
632
633/* show the contents of the simple atom as a string representation */
634const char *
635mj_string_rep(mj_t *atom)
636{
637	if (atom == NULL) {
638		return 0;
639	}
640	switch(atom->type) {
641	case MJ_STRING:
642	case MJ_NUMBER:
643		return atom->value.s;
644	case MJ_NULL:
645		return "null";
646	case MJ_FALSE:
647		return "false";
648	case MJ_TRUE:
649		return "true";
650	default:
651		return NULL;
652	}
653}
654