is_json.c revision 354939
1/*-
2 * Copyright (c) 2018 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*
28 * Parse JSON object serialization format (RFC-7159)
29 */
30
31#ifndef TEST
32#include "file.h"
33
34#ifndef lint
35FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
36#endif
37
38#include <string.h>
39#include "magic.h"
40#endif
41
42#ifdef DEBUG
43#include <stdio.h>
44#define DPRINTF(a, b, c)	\
45    printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
46#else
47#define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
48#endif
49
50#define JSON_ARRAY	0
51#define JSON_CONSTANT	1
52#define JSON_NUMBER	2
53#define JSON_OBJECT	3
54#define JSON_STRING	4
55#define JSON_ARRAYN	5
56#define JSON_MAX	6
57
58/*
59 * if JSON_COUNT != 0:
60 *	count all the objects, require that we have the whole data file
61 * otherwise:
62 *	stop if we find an object or an array
63 */
64#ifndef JSON_COUNT
65#define JSON_COUNT 0
66#endif
67
68static int json_parse(const unsigned char **, const unsigned char *, size_t *,
69	size_t);
70
71static int
72json_isspace(const unsigned char uc)
73{
74	switch (uc) {
75	case ' ':
76	case '\n':
77	case '\r':
78	case '\t':
79		return 1;
80	default:
81		return 0;
82	}
83}
84
85static int
86json_isdigit(unsigned char uc)
87{
88	switch (uc) {
89	case '0': case '1': case '2': case '3': case '4':
90	case '5': case '6': case '7': case '8': case '9':
91		return 1;
92	default:
93		return 0;
94	}
95}
96
97static int
98json_isxdigit(unsigned char uc)
99{
100	if (json_isdigit(uc))
101		return 1;
102	switch (uc) {
103	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
104	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
105		return 1;
106	default:
107		return 0;
108	}
109}
110
111static const unsigned char *
112json_skip_space(const unsigned char *uc, const unsigned char *ue)
113{
114	while (uc < ue && json_isspace(*uc))
115		uc++;
116	return uc;
117}
118
119static int
120json_parse_string(const unsigned char **ucp, const unsigned char *ue)
121{
122	const unsigned char *uc = *ucp;
123	size_t i;
124
125	DPRINTF("Parse string: ", uc, *ucp);
126	while (uc < ue) {
127		switch (*uc++) {
128		case '\0':
129			goto out;
130		case '\\':
131			if (uc == ue)
132				goto out;
133			switch (*uc++) {
134			case '\0':
135				goto out;
136			case '"':
137			case '\\':
138			case '/':
139			case 'b':
140			case 'f':
141			case 'n':
142			case 'r':
143			case 't':
144				continue;
145			case 'u':
146				if (ue - uc < 4) {
147					uc = ue;
148					goto out;
149				}
150				for (i = 0; i < 4; i++)
151					if (!json_isxdigit(*uc++))
152						goto out;
153				continue;
154			default:
155				goto out;
156			}
157		case '"':
158			*ucp = uc;
159			return 1;
160		default:
161			continue;
162		}
163	}
164out:
165	DPRINTF("Bad string: ", uc, *ucp);
166	*ucp = uc;
167	return 0;
168}
169
170static int
171json_parse_array(const unsigned char **ucp, const unsigned char *ue,
172	size_t *st, size_t lvl)
173{
174	const unsigned char *uc = *ucp;
175	int more = 0;	/* Array has more than 1 element */
176
177	DPRINTF("Parse array: ", uc, *ucp);
178	while (uc < ue) {
179		if (!json_parse(&uc, ue, st, lvl + 1))
180			goto out;
181		if (uc == ue)
182			goto out;
183		switch (*uc) {
184		case ',':
185			more++;
186			uc++;
187			continue;
188		case ']':
189			if (more)
190				st[JSON_ARRAYN]++;
191			*ucp = uc + 1;
192			return 1;
193		default:
194			goto out;
195		}
196	}
197out:
198	DPRINTF("Bad array: ", uc,  *ucp);
199	*ucp = uc;
200	return 0;
201}
202
203static int
204json_parse_object(const unsigned char **ucp, const unsigned char *ue,
205	size_t *st, size_t lvl)
206{
207	const unsigned char *uc = *ucp;
208	DPRINTF("Parse object: ", uc, *ucp);
209	while (uc < ue) {
210		uc = json_skip_space(uc, ue);
211		if (uc == ue)
212			goto out;
213		if (*uc++ != '"') {
214			DPRINTF("not string", uc, *ucp);
215			goto out;
216		}
217		DPRINTF("next field", uc, *ucp);
218		if (!json_parse_string(&uc, ue)) {
219			DPRINTF("not string", uc, *ucp);
220			goto out;
221		}
222		uc = json_skip_space(uc, ue);
223		if (uc == ue)
224			goto out;
225		if (*uc++ != ':') {
226			DPRINTF("not colon", uc, *ucp);
227			goto out;
228		}
229		if (!json_parse(&uc, ue, st, lvl + 1)) {
230			DPRINTF("not json", uc, *ucp);
231			goto out;
232		}
233		if (uc == ue)
234			goto out;
235		switch (*uc++) {
236		case ',':
237			continue;
238		case '}': /* { */
239			*ucp = uc;
240			DPRINTF("Good object: ", uc, *ucp);
241			return 1;
242		default:
243			*ucp = uc - 1;
244			DPRINTF("not more", uc, *ucp);
245			goto out;
246		}
247	}
248out:
249	DPRINTF("Bad object: ", uc, *ucp);
250	*ucp = uc;
251	return 0;
252}
253
254static int
255json_parse_number(const unsigned char **ucp, const unsigned char *ue)
256{
257	const unsigned char *uc = *ucp;
258	int got = 0;
259
260	DPRINTF("Parse number: ", uc, *ucp);
261	if (uc == ue)
262		return 0;
263	if (*uc == '-')
264		uc++;
265
266	for (; uc < ue; uc++) {
267		if (!json_isdigit(*uc))
268			break;
269		got = 1;
270	}
271	if (uc == ue)
272		goto out;
273	if (*uc == '.')
274		uc++;
275	for (; uc < ue; uc++) {
276		if (!json_isdigit(*uc))
277			break;
278		got = 1;
279	}
280	if (uc == ue)
281		goto out;
282	if (got && (*uc == 'e' || *uc == 'E')) {
283		uc++;
284		got = 0;
285		if (uc == ue)
286			goto out;
287		if (*uc == '+' || *uc == '-')
288			uc++;
289		for (; uc < ue; uc++) {
290			if (!json_isdigit(*uc))
291				break;
292			got = 1;
293		}
294	}
295out:
296	if (!got)
297		DPRINTF("Bad number: ", uc, *ucp);
298	else
299		DPRINTF("Good number: ", uc, *ucp);
300	*ucp = uc;
301	return got;
302}
303
304static int
305json_parse_const(const unsigned char **ucp, const unsigned char *ue,
306    const char *str, size_t len)
307{
308	const unsigned char *uc = *ucp;
309
310	DPRINTF("Parse const: ", uc, *ucp);
311	for (len--; uc < ue && --len;) {
312		if (*uc++ == *++str)
313			continue;
314	}
315	if (len)
316		DPRINTF("Bad const: ", uc, *ucp);
317	*ucp = uc;
318	return len == 0;
319}
320
321static int
322json_parse(const unsigned char **ucp, const unsigned char *ue,
323    size_t *st, size_t lvl)
324{
325	const unsigned char *uc;
326	int rv = 0;
327	int t;
328
329	uc = json_skip_space(*ucp, ue);
330	if (uc == ue)
331		goto out;
332
333	// Avoid recursion
334	if (lvl > 20)
335		return 0;
336#if JSON_COUNT
337	/* bail quickly if not counting */
338	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
339		return 1;
340#endif
341
342	DPRINTF("Parse general: ", uc, *ucp);
343	switch (*uc++) {
344	case '"':
345		rv = json_parse_string(&uc, ue);
346		t = JSON_STRING;
347		break;
348	case '[':
349		rv = json_parse_array(&uc, ue, st, lvl + 1);
350		t = JSON_ARRAY;
351		break;
352	case '{': /* '}' */
353		rv = json_parse_object(&uc, ue, st, lvl + 1);
354		t = JSON_OBJECT;
355		break;
356	case 't':
357		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
358		t = JSON_CONSTANT;
359		break;
360	case 'f':
361		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
362		t = JSON_CONSTANT;
363		break;
364	case 'n':
365		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
366		t = JSON_CONSTANT;
367		break;
368	default:
369		--uc;
370		rv = json_parse_number(&uc, ue);
371		t = JSON_NUMBER;
372		break;
373	}
374	if (rv)
375		st[t]++;
376	uc = json_skip_space(uc, ue);
377out:
378	*ucp = uc;
379	DPRINTF("End general: ", uc, *ucp);
380	if (lvl == 0)
381		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
382	return rv;
383}
384
385#ifndef TEST
386int
387file_is_json(struct magic_set *ms, const struct buffer *b)
388{
389	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
390	const unsigned char *ue = uc + b->flen;
391	size_t st[JSON_MAX];
392	int mime = ms->flags & MAGIC_MIME;
393
394
395	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
396		return 0;
397
398	memset(st, 0, sizeof(st));
399
400	if (!json_parse(&uc, ue, st, 0))
401		return 0;
402
403	if (mime == MAGIC_MIME_ENCODING)
404		return 1;
405	if (mime) {
406		if (file_printf(ms, "application/json") == -1)
407			return -1;
408		return 1;
409	}
410	if (file_printf(ms, "JSON data") == -1)
411		return -1;
412#if JSON_COUNT
413#define P(n) st[n], st[n] > 1 ? "s" : ""
414	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
415	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
416	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
417	    "u >1array%s)",
418	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
419	    P(JSON_NUMBER), P(JSON_ARRAYN))
420	    == -1)
421		return -1;
422#endif
423	return 1;
424}
425
426#else
427
428#include <sys/types.h>
429#include <sys/stat.h>
430#include <stdio.h>
431#include <fcntl.h>
432#include <unistd.h>
433#include <stdlib.h>
434#include <stdint.h>
435#include <err.h>
436
437int
438main(int argc, char *argv[])
439{
440	int fd, rv;
441	struct stat st;
442	unsigned char *p;
443	size_t stats[JSON_MAX];
444
445	if ((fd = open(argv[1], O_RDONLY)) == -1)
446		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
447
448	if (fstat(fd, &st) == -1)
449		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
450
451	if ((p = malloc(st.st_size)) == NULL)
452		err(EXIT_FAILURE, "Can't allocate %jd bytes",
453		    (intmax_t)st.st_size);
454	if (read(fd, p, st.st_size) != st.st_size)
455		err(EXIT_FAILURE, "Can't read %jd bytes",
456		    (intmax_t)st.st_size);
457	memset(stats, 0, sizeof(stats));
458	printf("is json %d\n", json_parse((const unsigned char **)&p,
459	    p + st.st_size, stats, 0));
460	return 0;
461}
462#endif
463