1/*-
2 * Copyright (c) 2018 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*
28 * Parse JSON object serialization format (RFC-7159)
29 */
30
31#ifndef TEST
32#include "file.h"
33
34#ifndef lint
35FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $")
36#endif
37
38#include <string.h>
39#include "magic.h"
40#endif
41
42#ifdef DEBUG
43#include <stdio.h>
44#define DPRINTF(a, b, c)	\
45    printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
46#else
47#define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
48#endif
49
50#define JSON_ARRAY	0
51#define JSON_CONSTANT	1
52#define JSON_NUMBER	2
53#define JSON_OBJECT	3
54#define JSON_STRING	4
55#define JSON_ARRAYN	5
56#define JSON_MAX	6
57
58/*
59 * if JSON_COUNT != 0:
60 *	count all the objects, require that we have the whole data file
61 * otherwise:
62 *	stop if we find an object or an array
63 */
64#ifndef JSON_COUNT
65#define JSON_COUNT 0
66#endif
67
68static int json_parse(const unsigned char **, const unsigned char *, size_t *,
69	size_t);
70
71static int
72json_isspace(const unsigned char uc)
73{
74	switch (uc) {
75	case ' ':
76	case '\n':
77	case '\r':
78	case '\t':
79		return 1;
80	default:
81		return 0;
82	}
83}
84
85static int
86json_isdigit(unsigned char uc)
87{
88	switch (uc) {
89	case '0': case '1': case '2': case '3': case '4':
90	case '5': case '6': case '7': case '8': case '9':
91		return 1;
92	default:
93		return 0;
94	}
95}
96
97static int
98json_isxdigit(unsigned char uc)
99{
100	if (json_isdigit(uc))
101		return 1;
102	switch (uc) {
103	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
104	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
105		return 1;
106	default:
107		return 0;
108	}
109}
110
111static const unsigned char *
112json_skip_space(const unsigned char *uc, const unsigned char *ue)
113{
114	while (uc < ue && json_isspace(*uc))
115		uc++;
116	return uc;
117}
118
119static int
120json_parse_string(const unsigned char **ucp, const unsigned char *ue)
121{
122	const unsigned char *uc = *ucp;
123	size_t i;
124
125	DPRINTF("Parse string: ", uc, *ucp);
126	while (uc < ue) {
127		switch (*uc++) {
128		case '\0':
129			goto out;
130		case '\\':
131			if (uc == ue)
132				goto out;
133			switch (*uc++) {
134			case '\0':
135				goto out;
136			case '"':
137			case '\\':
138			case '/':
139			case 'b':
140			case 'f':
141			case 'n':
142			case 'r':
143			case 't':
144				continue;
145			case 'u':
146				if (ue - uc < 4) {
147					uc = ue;
148					goto out;
149				}
150				for (i = 0; i < 4; i++)
151					if (!json_isxdigit(*uc++))
152						goto out;
153				continue;
154			default:
155				goto out;
156			}
157		case '"':
158			*ucp = uc;
159			DPRINTF("Good string: ", uc, *ucp);
160			return 1;
161		default:
162			continue;
163		}
164	}
165out:
166	DPRINTF("Bad string: ", uc, *ucp);
167	*ucp = uc;
168	return 0;
169}
170
171static int
172json_parse_array(const unsigned char **ucp, const unsigned char *ue,
173	size_t *st, size_t lvl)
174{
175	const unsigned char *uc = *ucp;
176
177	DPRINTF("Parse array: ", uc, *ucp);
178	while (uc < ue) {
179		if (*uc == ']')
180			goto done;
181		if (!json_parse(&uc, ue, st, lvl + 1))
182			goto out;
183		if (uc == ue)
184			goto out;
185		switch (*uc) {
186		case ',':
187			uc++;
188			continue;
189		case ']':
190		done:
191			st[JSON_ARRAYN]++;
192			*ucp = uc + 1;
193			DPRINTF("Good array: ", uc, *ucp);
194			return 1;
195		default:
196			goto out;
197		}
198	}
199out:
200	DPRINTF("Bad array: ", uc,  *ucp);
201	*ucp = uc;
202	return 0;
203}
204
205static int
206json_parse_object(const unsigned char **ucp, const unsigned char *ue,
207	size_t *st, size_t lvl)
208{
209	const unsigned char *uc = *ucp;
210	DPRINTF("Parse object: ", uc, *ucp);
211	while (uc < ue) {
212		uc = json_skip_space(uc, ue);
213		if (uc == ue)
214			goto out;
215		if (*uc == '}') {
216			uc++;
217			goto done;
218		}
219		if (*uc++ != '"') {
220			DPRINTF("not string", uc, *ucp);
221			goto out;
222		}
223		DPRINTF("next field", uc, *ucp);
224		if (!json_parse_string(&uc, ue)) {
225			DPRINTF("not string", uc, *ucp);
226			goto out;
227		}
228		uc = json_skip_space(uc, ue);
229		if (uc == ue)
230			goto out;
231		if (*uc++ != ':') {
232			DPRINTF("not colon", uc, *ucp);
233			goto out;
234		}
235		if (!json_parse(&uc, ue, st, lvl + 1)) {
236			DPRINTF("not json", uc, *ucp);
237			goto out;
238		}
239		if (uc == ue)
240			goto out;
241		switch (*uc++) {
242		case ',':
243			continue;
244		case '}': /* { */
245		done:
246			*ucp = uc;
247			DPRINTF("Good object: ", uc, *ucp);
248			return 1;
249		default:
250			*ucp = uc - 1;
251			DPRINTF("not more", uc, *ucp);
252			goto out;
253		}
254	}
255out:
256	DPRINTF("Bad object: ", uc, *ucp);
257	*ucp = uc;
258	return 0;
259}
260
261static int
262json_parse_number(const unsigned char **ucp, const unsigned char *ue)
263{
264	const unsigned char *uc = *ucp;
265	int got = 0;
266
267	DPRINTF("Parse number: ", uc, *ucp);
268	if (uc == ue)
269		return 0;
270	if (*uc == '-')
271		uc++;
272
273	for (; uc < ue; uc++) {
274		if (!json_isdigit(*uc))
275			break;
276		got = 1;
277	}
278	if (uc == ue)
279		goto out;
280	if (*uc == '.')
281		uc++;
282	for (; uc < ue; uc++) {
283		if (!json_isdigit(*uc))
284			break;
285		got = 1;
286	}
287	if (uc == ue)
288		goto out;
289	if (got && (*uc == 'e' || *uc == 'E')) {
290		uc++;
291		got = 0;
292		if (uc == ue)
293			goto out;
294		if (*uc == '+' || *uc == '-')
295			uc++;
296		for (; uc < ue; uc++) {
297			if (!json_isdigit(*uc))
298				break;
299			got = 1;
300		}
301	}
302out:
303	if (!got)
304		DPRINTF("Bad number: ", uc, *ucp);
305	else
306		DPRINTF("Good number: ", uc, *ucp);
307	*ucp = uc;
308	return got;
309}
310
311static int
312json_parse_const(const unsigned char **ucp, const unsigned char *ue,
313    const char *str, size_t len)
314{
315	const unsigned char *uc = *ucp;
316
317	DPRINTF("Parse const: ", uc, *ucp);
318	for (len--; uc < ue && --len;) {
319		if (*uc++ == *++str)
320			continue;
321	}
322	if (len)
323		DPRINTF("Bad const: ", uc, *ucp);
324	*ucp = uc;
325	return len == 0;
326}
327
328static int
329json_parse(const unsigned char **ucp, const unsigned char *ue,
330    size_t *st, size_t lvl)
331{
332	const unsigned char *uc;
333	int rv = 0;
334	int t;
335
336	uc = json_skip_space(*ucp, ue);
337	if (uc == ue)
338		goto out;
339
340	// Avoid recursion
341	if (lvl > 20)
342		return 0;
343#if JSON_COUNT
344	/* bail quickly if not counting */
345	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
346		return 1;
347#endif
348
349	DPRINTF("Parse general: ", uc, *ucp);
350	switch (*uc++) {
351	case '"':
352		rv = json_parse_string(&uc, ue);
353		t = JSON_STRING;
354		break;
355	case '[':
356		rv = json_parse_array(&uc, ue, st, lvl + 1);
357		t = JSON_ARRAY;
358		break;
359	case '{': /* '}' */
360		rv = json_parse_object(&uc, ue, st, lvl + 1);
361		t = JSON_OBJECT;
362		break;
363	case 't':
364		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
365		t = JSON_CONSTANT;
366		break;
367	case 'f':
368		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
369		t = JSON_CONSTANT;
370		break;
371	case 'n':
372		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
373		t = JSON_CONSTANT;
374		break;
375	default:
376		--uc;
377		rv = json_parse_number(&uc, ue);
378		t = JSON_NUMBER;
379		break;
380	}
381	if (rv)
382		st[t]++;
383	uc = json_skip_space(uc, ue);
384out:
385	*ucp = uc;
386	DPRINTF("End general: ", uc, *ucp);
387	if (lvl == 0)
388		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
389	return rv;
390}
391
392#ifndef TEST
393int
394file_is_json(struct magic_set *ms, const struct buffer *b)
395{
396	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
397	const unsigned char *ue = uc + b->flen;
398	size_t st[JSON_MAX];
399	int mime = ms->flags & MAGIC_MIME;
400
401
402	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
403		return 0;
404
405	memset(st, 0, sizeof(st));
406
407	if (!json_parse(&uc, ue, st, 0))
408		return 0;
409
410	if (mime == MAGIC_MIME_ENCODING)
411		return 1;
412	if (mime) {
413		if (file_printf(ms, "application/json") == -1)
414			return -1;
415		return 1;
416	}
417	if (file_printf(ms, "JSON data") == -1)
418		return -1;
419#if JSON_COUNT
420#define P(n) st[n], st[n] > 1 ? "s" : ""
421	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
422	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
423	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
424	    "u >1array%s)",
425	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
426	    P(JSON_NUMBER), P(JSON_ARRAYN))
427	    == -1)
428		return -1;
429#endif
430	return 1;
431}
432
433#else
434
435#include <sys/types.h>
436#include <sys/stat.h>
437#include <stdio.h>
438#include <fcntl.h>
439#include <unistd.h>
440#include <stdlib.h>
441#include <stdint.h>
442#include <err.h>
443
444int
445main(int argc, char *argv[])
446{
447	int fd, rv;
448	struct stat st;
449	unsigned char *p;
450	size_t stats[JSON_MAX];
451
452	if ((fd = open(argv[1], O_RDONLY)) == -1)
453		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
454
455	if (fstat(fd, &st) == -1)
456		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
457
458	if ((p = malloc(st.st_size)) == NULL)
459		err(EXIT_FAILURE, "Can't allocate %jd bytes",
460		    (intmax_t)st.st_size);
461	if (read(fd, p, st.st_size) != st.st_size)
462		err(EXIT_FAILURE, "Can't read %jd bytes",
463		    (intmax_t)st.st_size);
464	memset(stats, 0, sizeof(stats));
465	printf("is json %d\n", json_parse((const unsigned char **)&p,
466	    p + st.st_size, stats, 0));
467	return 0;
468}
469#endif
470