is_json.c revision 1.3
1/*	$NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $	*/
2
3/*-
4 * Copyright (c) 2018 Christos Zoulas
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Parse JSON object serialization format (RFC-7159)
31 */
32
33#ifndef TEST
34#include "file.h"
35
36#ifndef lint
37#if 0
38FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
39#else
40__RCSID("$NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $");
41#endif
42#endif
43
44#include <string.h>
45#include "magic.h"
46#endif
47
48#ifdef DEBUG
49#include <stdio.h>
50#define DPRINTF(a, b, c)	\
51    printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
52#else
53#define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
54#endif
55
56#define JSON_ARRAY	0
57#define JSON_CONSTANT	1
58#define JSON_NUMBER	2
59#define JSON_OBJECT	3
60#define JSON_STRING	4
61#define JSON_ARRAYN	5
62#define JSON_MAX	6
63
64/*
65 * if JSON_COUNT != 0:
66 *	count all the objects, require that we have the whole data file
67 * otherwise:
68 *	stop if we find an object or an array
69 */
70#ifndef JSON_COUNT
71#define JSON_COUNT 0
72#endif
73
74static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75	size_t);
76
77static int
78json_isspace(const unsigned char uc)
79{
80	switch (uc) {
81	case ' ':
82	case '\n':
83	case '\r':
84	case '\t':
85		return 1;
86	default:
87		return 0;
88	}
89}
90
91static int
92json_isdigit(unsigned char uc)
93{
94	switch (uc) {
95	case '0': case '1': case '2': case '3': case '4':
96	case '5': case '6': case '7': case '8': case '9':
97		return 1;
98	default:
99		return 0;
100	}
101}
102
103static int
104json_isxdigit(unsigned char uc)
105{
106	if (json_isdigit(uc))
107		return 1;
108	switch (uc) {
109	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111		return 1;
112	default:
113		return 0;
114	}
115}
116
117static const unsigned char *
118json_skip_space(const unsigned char *uc, const unsigned char *ue)
119{
120	while (uc < ue && json_isspace(*uc))
121		uc++;
122	return uc;
123}
124
125static int
126json_parse_string(const unsigned char **ucp, const unsigned char *ue)
127{
128	const unsigned char *uc = *ucp;
129	size_t i;
130
131	DPRINTF("Parse string: ", uc, *ucp);
132	while (uc < ue) {
133		switch (*uc++) {
134		case '\0':
135			goto out;
136		case '\\':
137			if (uc == ue)
138				goto out;
139			switch (*uc++) {
140			case '\0':
141				goto out;
142			case '"':
143			case '\\':
144			case '/':
145			case 'b':
146			case 'f':
147			case 'n':
148			case 'r':
149			case 't':
150				continue;
151			case 'u':
152				if (ue - uc < 4) {
153					uc = ue;
154					goto out;
155				}
156				for (i = 0; i < 4; i++)
157					if (!json_isxdigit(*uc++))
158						goto out;
159				continue;
160			default:
161				goto out;
162			}
163		case '"':
164			*ucp = uc;
165			return 1;
166		default:
167			continue;
168		}
169	}
170out:
171	DPRINTF("Bad string: ", uc, *ucp);
172	*ucp = uc;
173	return 0;
174}
175
176static int
177json_parse_array(const unsigned char **ucp, const unsigned char *ue,
178	size_t *st, size_t lvl)
179{
180	const unsigned char *uc = *ucp;
181	int more = 0;	/* Array has more than 1 element */
182
183	DPRINTF("Parse array: ", uc, *ucp);
184	while (uc < ue) {
185		if (!json_parse(&uc, ue, st, lvl + 1))
186			goto out;
187		if (uc == ue)
188			goto out;
189		switch (*uc) {
190		case ',':
191			more++;
192			uc++;
193			continue;
194		case ']':
195			if (more)
196				st[JSON_ARRAYN]++;
197			*ucp = uc + 1;
198			return 1;
199		default:
200			goto out;
201		}
202	}
203out:
204	DPRINTF("Bad array: ", uc,  *ucp);
205	*ucp = uc;
206	return 0;
207}
208
209static int
210json_parse_object(const unsigned char **ucp, const unsigned char *ue,
211	size_t *st, size_t lvl)
212{
213	const unsigned char *uc = *ucp;
214	DPRINTF("Parse object: ", uc, *ucp);
215	while (uc < ue) {
216		uc = json_skip_space(uc, ue);
217		if (uc == ue)
218			goto out;
219		if (*uc++ != '"') {
220			DPRINTF("not string", uc, *ucp);
221			goto out;
222		}
223		DPRINTF("next field", uc, *ucp);
224		if (!json_parse_string(&uc, ue)) {
225			DPRINTF("not string", uc, *ucp);
226			goto out;
227		}
228		uc = json_skip_space(uc, ue);
229		if (uc == ue)
230			goto out;
231		if (*uc++ != ':') {
232			DPRINTF("not colon", uc, *ucp);
233			goto out;
234		}
235		if (!json_parse(&uc, ue, st, lvl + 1)) {
236			DPRINTF("not json", uc, *ucp);
237			goto out;
238		}
239		if (uc == ue)
240			goto out;
241		switch (*uc++) {
242		case ',':
243			continue;
244		case '}': /* { */
245			*ucp = uc;
246			DPRINTF("Good object: ", uc, *ucp);
247			return 1;
248		default:
249			*ucp = uc - 1;
250			DPRINTF("not more", uc, *ucp);
251			goto out;
252		}
253	}
254out:
255	DPRINTF("Bad object: ", uc, *ucp);
256	*ucp = uc;
257	return 0;
258}
259
260static int
261json_parse_number(const unsigned char **ucp, const unsigned char *ue)
262{
263	const unsigned char *uc = *ucp;
264	int got = 0;
265
266	DPRINTF("Parse number: ", uc, *ucp);
267	if (uc == ue)
268		return 0;
269	if (*uc == '-')
270		uc++;
271
272	for (; uc < ue; uc++) {
273		if (!json_isdigit(*uc))
274			break;
275		got = 1;
276	}
277	if (uc == ue)
278		goto out;
279	if (*uc == '.')
280		uc++;
281	for (; uc < ue; uc++) {
282		if (!json_isdigit(*uc))
283			break;
284		got = 1;
285	}
286	if (uc == ue)
287		goto out;
288	if (got && (*uc == 'e' || *uc == 'E')) {
289		uc++;
290		got = 0;
291		if (uc == ue)
292			goto out;
293		if (*uc == '+' || *uc == '-')
294			uc++;
295		for (; uc < ue; uc++) {
296			if (!json_isdigit(*uc))
297				break;
298			got = 1;
299		}
300	}
301out:
302	if (!got)
303		DPRINTF("Bad number: ", uc, *ucp);
304	else
305		DPRINTF("Good number: ", uc, *ucp);
306	*ucp = uc;
307	return got;
308}
309
310static int
311json_parse_const(const unsigned char **ucp, const unsigned char *ue,
312    const char *str, size_t len)
313{
314	const unsigned char *uc = *ucp;
315
316	DPRINTF("Parse const: ", uc, *ucp);
317	for (len--; uc < ue && --len;) {
318		if (*uc++ == *++str)
319			continue;
320	}
321	if (len)
322		DPRINTF("Bad const: ", uc, *ucp);
323	*ucp = uc;
324	return len == 0;
325}
326
327static int
328json_parse(const unsigned char **ucp, const unsigned char *ue,
329    size_t *st, size_t lvl)
330{
331	const unsigned char *uc;
332	int rv = 0;
333	int t;
334
335	uc = json_skip_space(*ucp, ue);
336	if (uc == ue)
337		goto out;
338
339	// Avoid recursion
340	if (lvl > 20)
341		return 0;
342#if JSON_COUNT
343	/* bail quickly if not counting */
344	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
345		return 1;
346#endif
347
348	DPRINTF("Parse general: ", uc, *ucp);
349	switch (*uc++) {
350	case '"':
351		rv = json_parse_string(&uc, ue);
352		t = JSON_STRING;
353		break;
354	case '[':
355		rv = json_parse_array(&uc, ue, st, lvl + 1);
356		t = JSON_ARRAY;
357		break;
358	case '{': /* '}' */
359		rv = json_parse_object(&uc, ue, st, lvl + 1);
360		t = JSON_OBJECT;
361		break;
362	case 't':
363		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
364		t = JSON_CONSTANT;
365		break;
366	case 'f':
367		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
368		t = JSON_CONSTANT;
369		break;
370	case 'n':
371		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
372		t = JSON_CONSTANT;
373		break;
374	default:
375		--uc;
376		rv = json_parse_number(&uc, ue);
377		t = JSON_NUMBER;
378		break;
379	}
380	if (rv)
381		st[t]++;
382	uc = json_skip_space(uc, ue);
383out:
384	*ucp = uc;
385	DPRINTF("End general: ", uc, *ucp);
386	if (lvl == 0)
387		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
388	return rv;
389}
390
391#ifndef TEST
392int
393file_is_json(struct magic_set *ms, const struct buffer *b)
394{
395	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
396	const unsigned char *ue = uc + b->flen;
397	size_t st[JSON_MAX];
398	int mime = ms->flags & MAGIC_MIME;
399
400
401	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
402		return 0;
403
404	memset(st, 0, sizeof(st));
405
406	if (!json_parse(&uc, ue, st, 0))
407		return 0;
408
409	if (mime == MAGIC_MIME_ENCODING)
410		return 1;
411	if (mime) {
412		if (file_printf(ms, "application/json") == -1)
413			return -1;
414		return 1;
415	}
416	if (file_printf(ms, "JSON data") == -1)
417		return -1;
418#if JSON_COUNT
419#define P(n) st[n], st[n] > 1 ? "s" : ""
420	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
421	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
422	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
423	    "u >1array%s)",
424	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
425	    P(JSON_NUMBER), P(JSON_ARRAYN))
426	    == -1)
427		return -1;
428#endif
429	return 1;
430}
431
432#else
433
434#include <sys/types.h>
435#include <sys/stat.h>
436#include <stdio.h>
437#include <fcntl.h>
438#include <unistd.h>
439#include <stdlib.h>
440#include <stdint.h>
441#include <err.h>
442
443int
444main(int argc, char *argv[])
445{
446	int fd, rv;
447	struct stat st;
448	unsigned char *p;
449	size_t stats[JSON_MAX];
450
451	if ((fd = open(argv[1], O_RDONLY)) == -1)
452		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
453
454	if (fstat(fd, &st) == -1)
455		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
456
457	if ((p = malloc(st.st_size)) == NULL)
458		err(EXIT_FAILURE, "Can't allocate %jd bytes",
459		    (intmax_t)st.st_size);
460	if (read(fd, p, st.st_size) != st.st_size)
461		err(EXIT_FAILURE, "Can't read %jd bytes",
462		    (intmax_t)st.st_size);
463	memset(stats, 0, sizeof(stats));
464	printf("is json %d\n", json_parse((const unsigned char **)&p,
465	    p + st.st_size, stats, 0));
466	return 0;
467}
468#endif
469