is_json.c revision 354582
1354582Sdelphij/*-
2354582Sdelphij * Copyright (c) 2018 Christos Zoulas
3354582Sdelphij * All rights reserved.
4354582Sdelphij *
5354582Sdelphij * Redistribution and use in source and binary forms, with or without
6354582Sdelphij * modification, are permitted provided that the following conditions
7354582Sdelphij * are met:
8354582Sdelphij * 1. Redistributions of source code must retain the above copyright
9354582Sdelphij *    notice, this list of conditions and the following disclaimer.
10354582Sdelphij * 2. Redistributions in binary form must reproduce the above copyright
11354582Sdelphij *    notice, this list of conditions and the following disclaimer in the
12354582Sdelphij *    documentation and/or other materials provided with the distribution.
13354582Sdelphij *
14354582Sdelphij * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15354582Sdelphij * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16354582Sdelphij * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17354582Sdelphij * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18354582Sdelphij * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19354582Sdelphij * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20354582Sdelphij * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21354582Sdelphij * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22354582Sdelphij * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23354582Sdelphij * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24354582Sdelphij * POSSIBILITY OF SUCH DAMAGE.
25354582Sdelphij */
26354582Sdelphij
27354582Sdelphij/*
28354582Sdelphij * Parse JSON object serialization format (RFC-7159)
29354582Sdelphij */
30354582Sdelphij
31354582Sdelphij#ifndef TEST
32354582Sdelphij#include "file.h"
33354582Sdelphij
34354582Sdelphij#ifndef lint
35354582SdelphijFILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
36354582Sdelphij#endif
37354582Sdelphij
38354582Sdelphij#include <string.h>
39354582Sdelphij#include "magic.h"
40354582Sdelphij#endif
41354582Sdelphij
42354582Sdelphij#ifdef DEBUG
43354582Sdelphij#include <stdio.h>
44354582Sdelphij#define DPRINTF(a, b, c)	\
45354582Sdelphij    printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
46354582Sdelphij#else
47354582Sdelphij#define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
48354582Sdelphij#endif
49354582Sdelphij
50354582Sdelphij#define JSON_ARRAY	0
51354582Sdelphij#define JSON_CONSTANT	1
52354582Sdelphij#define JSON_NUMBER	2
53354582Sdelphij#define JSON_OBJECT	3
54354582Sdelphij#define JSON_STRING	4
55354582Sdelphij#define JSON_ARRAYN	5
56354582Sdelphij#define JSON_MAX	6
57354582Sdelphij
58354582Sdelphij/*
59354582Sdelphij * if JSON_COUNT != 0:
60354582Sdelphij *	count all the objects, require that we have the whole data file
61354582Sdelphij * otherwise:
62354582Sdelphij *	stop if we find an object or an array
63354582Sdelphij */
64354582Sdelphij#ifndef JSON_COUNT
65354582Sdelphij#define JSON_COUNT 0
66354582Sdelphij#endif
67354582Sdelphij
68354582Sdelphijstatic int json_parse(const unsigned char **, const unsigned char *, size_t *,
69354582Sdelphij	size_t);
70354582Sdelphij
71354582Sdelphijstatic int
72354582Sdelphijjson_isspace(const unsigned char uc)
73354582Sdelphij{
74354582Sdelphij	switch (uc) {
75354582Sdelphij	case ' ':
76354582Sdelphij	case '\n':
77354582Sdelphij	case '\r':
78354582Sdelphij	case '\t':
79354582Sdelphij		return 1;
80354582Sdelphij	default:
81354582Sdelphij		return 0;
82354582Sdelphij	}
83354582Sdelphij}
84354582Sdelphij
85354582Sdelphijstatic int
86354582Sdelphijjson_isdigit(unsigned char uc)
87354582Sdelphij{
88354582Sdelphij	switch (uc) {
89354582Sdelphij	case '0': case '1': case '2': case '3': case '4':
90354582Sdelphij	case '5': case '6': case '7': case '8': case '9':
91354582Sdelphij		return 1;
92354582Sdelphij	default:
93354582Sdelphij		return 0;
94354582Sdelphij	}
95354582Sdelphij}
96354582Sdelphij
97354582Sdelphijstatic int
98354582Sdelphijjson_isxdigit(unsigned char uc)
99354582Sdelphij{
100354582Sdelphij	if (json_isdigit(uc))
101354582Sdelphij		return 1;
102354582Sdelphij	switch (uc) {
103354582Sdelphij	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
104354582Sdelphij	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
105354582Sdelphij		return 1;
106354582Sdelphij	default:
107354582Sdelphij		return 0;
108354582Sdelphij	}
109354582Sdelphij}
110354582Sdelphij
111354582Sdelphijstatic const unsigned char *
112354582Sdelphijjson_skip_space(const unsigned char *uc, const unsigned char *ue)
113354582Sdelphij{
114354582Sdelphij	while (uc < ue && json_isspace(*uc))
115354582Sdelphij		uc++;
116354582Sdelphij	return uc;
117354582Sdelphij}
118354582Sdelphij
119354582Sdelphijstatic int
120354582Sdelphijjson_parse_string(const unsigned char **ucp, const unsigned char *ue)
121354582Sdelphij{
122354582Sdelphij	const unsigned char *uc = *ucp;
123354582Sdelphij	size_t i;
124354582Sdelphij
125354582Sdelphij	DPRINTF("Parse string: ", uc, *ucp);
126354582Sdelphij	while (uc < ue) {
127354582Sdelphij		switch (*uc++) {
128354582Sdelphij		case '\0':
129354582Sdelphij			goto out;
130354582Sdelphij		case '\\':
131354582Sdelphij			if (uc == ue)
132354582Sdelphij				goto out;
133354582Sdelphij			switch (*uc++) {
134354582Sdelphij			case '\0':
135354582Sdelphij				goto out;
136354582Sdelphij			case '"':
137354582Sdelphij			case '\\':
138354582Sdelphij			case '/':
139354582Sdelphij			case 'b':
140354582Sdelphij			case 'f':
141354582Sdelphij			case 'n':
142354582Sdelphij			case 'r':
143354582Sdelphij			case 't':
144354582Sdelphij				continue;
145354582Sdelphij			case 'u':
146354582Sdelphij				if (ue - uc < 4) {
147354582Sdelphij					uc = ue;
148354582Sdelphij					goto out;
149354582Sdelphij				}
150354582Sdelphij				for (i = 0; i < 4; i++)
151354582Sdelphij					if (!json_isxdigit(*uc++))
152354582Sdelphij						goto out;
153354582Sdelphij				continue;
154354582Sdelphij			default:
155354582Sdelphij				goto out;
156354582Sdelphij			}
157354582Sdelphij		case '"':
158354582Sdelphij			*ucp = uc;
159354582Sdelphij			return 1;
160354582Sdelphij		default:
161354582Sdelphij			continue;
162354582Sdelphij		}
163354582Sdelphij	}
164354582Sdelphijout:
165354582Sdelphij	DPRINTF("Bad string: ", uc, *ucp);
166354582Sdelphij	*ucp = uc;
167354582Sdelphij	return 0;
168354582Sdelphij}
169354582Sdelphij
170354582Sdelphijstatic int
171354582Sdelphijjson_parse_array(const unsigned char **ucp, const unsigned char *ue,
172354582Sdelphij	size_t *st, size_t lvl)
173354582Sdelphij{
174354582Sdelphij	const unsigned char *uc = *ucp;
175354582Sdelphij	int more = 0;	/* Array has more than 1 element */
176354582Sdelphij
177354582Sdelphij	DPRINTF("Parse array: ", uc, *ucp);
178354582Sdelphij	while (uc < ue) {
179354582Sdelphij		if (!json_parse(&uc, ue, st, lvl + 1))
180354582Sdelphij			goto out;
181354582Sdelphij		if (uc == ue)
182354582Sdelphij			goto out;
183354582Sdelphij		switch (*uc) {
184354582Sdelphij		case ',':
185354582Sdelphij			more++;
186354582Sdelphij			uc++;
187354582Sdelphij			continue;
188354582Sdelphij		case ']':
189354582Sdelphij			if (more)
190354582Sdelphij				st[JSON_ARRAYN]++;
191354582Sdelphij			*ucp = uc + 1;
192354582Sdelphij			return 1;
193354582Sdelphij		default:
194354582Sdelphij			goto out;
195354582Sdelphij		}
196354582Sdelphij	}
197354582Sdelphijout:
198354582Sdelphij	DPRINTF("Bad array: ", uc,  *ucp);
199354582Sdelphij	*ucp = uc;
200354582Sdelphij	return 0;
201354582Sdelphij}
202354582Sdelphij
203354582Sdelphijstatic int
204354582Sdelphijjson_parse_object(const unsigned char **ucp, const unsigned char *ue,
205354582Sdelphij	size_t *st, size_t lvl)
206354582Sdelphij{
207354582Sdelphij	const unsigned char *uc = *ucp;
208354582Sdelphij	DPRINTF("Parse object: ", uc, *ucp);
209354582Sdelphij	while (uc < ue) {
210354582Sdelphij		uc = json_skip_space(uc, ue);
211354582Sdelphij		if (uc == ue)
212354582Sdelphij			goto out;
213354582Sdelphij		if (*uc++ != '"') {
214354582Sdelphij			DPRINTF("not string", uc, *ucp);
215354582Sdelphij			goto out;
216354582Sdelphij		}
217354582Sdelphij		DPRINTF("next field", uc, *ucp);
218354582Sdelphij		if (!json_parse_string(&uc, ue)) {
219354582Sdelphij			DPRINTF("not string", uc, *ucp);
220354582Sdelphij			goto out;
221354582Sdelphij		}
222354582Sdelphij		uc = json_skip_space(uc, ue);
223354582Sdelphij		if (uc == ue)
224354582Sdelphij			goto out;
225354582Sdelphij		if (*uc++ != ':') {
226354582Sdelphij			DPRINTF("not colon", uc, *ucp);
227354582Sdelphij			goto out;
228354582Sdelphij		}
229354582Sdelphij		if (!json_parse(&uc, ue, st, lvl + 1)) {
230354582Sdelphij			DPRINTF("not json", uc, *ucp);
231354582Sdelphij			goto out;
232354582Sdelphij		}
233354582Sdelphij		if (uc == ue)
234354582Sdelphij			goto out;
235354582Sdelphij		switch (*uc++) {
236354582Sdelphij		case ',':
237354582Sdelphij			continue;
238354582Sdelphij		case '}': /* { */
239354582Sdelphij			*ucp = uc;
240354582Sdelphij			DPRINTF("Good object: ", uc, *ucp);
241354582Sdelphij			return 1;
242354582Sdelphij		default:
243354582Sdelphij			*ucp = uc - 1;
244354582Sdelphij			DPRINTF("not more", uc, *ucp);
245354582Sdelphij			goto out;
246354582Sdelphij		}
247354582Sdelphij	}
248354582Sdelphijout:
249354582Sdelphij	DPRINTF("Bad object: ", uc, *ucp);
250354582Sdelphij	*ucp = uc;
251354582Sdelphij	return 0;
252354582Sdelphij}
253354582Sdelphij
254354582Sdelphijstatic int
255354582Sdelphijjson_parse_number(const unsigned char **ucp, const unsigned char *ue)
256354582Sdelphij{
257354582Sdelphij	const unsigned char *uc = *ucp;
258354582Sdelphij	int got = 0;
259354582Sdelphij
260354582Sdelphij	DPRINTF("Parse number: ", uc, *ucp);
261354582Sdelphij	if (uc == ue)
262354582Sdelphij		return 0;
263354582Sdelphij	if (*uc == '-')
264354582Sdelphij		uc++;
265354582Sdelphij
266354582Sdelphij	for (; uc < ue; uc++) {
267354582Sdelphij		if (!json_isdigit(*uc))
268354582Sdelphij			break;
269354582Sdelphij		got = 1;
270354582Sdelphij	}
271354582Sdelphij	if (uc == ue)
272354582Sdelphij		goto out;
273354582Sdelphij	if (*uc == '.')
274354582Sdelphij		uc++;
275354582Sdelphij	for (; uc < ue; uc++) {
276354582Sdelphij		if (!json_isdigit(*uc))
277354582Sdelphij			break;
278354582Sdelphij		got = 1;
279354582Sdelphij	}
280354582Sdelphij	if (uc == ue)
281354582Sdelphij		goto out;
282354582Sdelphij	if (got && (*uc == 'e' || *uc == 'E')) {
283354582Sdelphij		uc++;
284354582Sdelphij		got = 0;
285354582Sdelphij		if (uc == ue)
286354582Sdelphij			goto out;
287354582Sdelphij		if (*uc == '+' || *uc == '-')
288354582Sdelphij			uc++;
289354582Sdelphij		for (; uc < ue; uc++) {
290354582Sdelphij			if (!json_isdigit(*uc))
291354582Sdelphij				break;
292354582Sdelphij			got = 1;
293354582Sdelphij		}
294354582Sdelphij	}
295354582Sdelphijout:
296354582Sdelphij	if (!got)
297354582Sdelphij		DPRINTF("Bad number: ", uc, *ucp);
298354582Sdelphij	else
299354582Sdelphij		DPRINTF("Good number: ", uc, *ucp);
300354582Sdelphij	*ucp = uc;
301354582Sdelphij	return got;
302354582Sdelphij}
303354582Sdelphij
304354582Sdelphijstatic int
305354582Sdelphijjson_parse_const(const unsigned char **ucp, const unsigned char *ue,
306354582Sdelphij    const char *str, size_t len)
307354582Sdelphij{
308354582Sdelphij	const unsigned char *uc = *ucp;
309354582Sdelphij
310354582Sdelphij	DPRINTF("Parse const: ", uc, *ucp);
311354582Sdelphij	for (len--; uc < ue && --len;) {
312354582Sdelphij		if (*uc++ == *++str)
313354582Sdelphij			continue;
314354582Sdelphij	}
315354582Sdelphij	if (len)
316354582Sdelphij		DPRINTF("Bad const: ", uc, *ucp);
317354582Sdelphij	*ucp = uc;
318354582Sdelphij	return len == 0;
319354582Sdelphij}
320354582Sdelphij
321354582Sdelphijstatic int
322354582Sdelphijjson_parse(const unsigned char **ucp, const unsigned char *ue,
323354582Sdelphij    size_t *st, size_t lvl)
324354582Sdelphij{
325354582Sdelphij	const unsigned char *uc;
326354582Sdelphij	int rv = 0;
327354582Sdelphij	int t;
328354582Sdelphij
329354582Sdelphij	uc = json_skip_space(*ucp, ue);
330354582Sdelphij	if (uc == ue)
331354582Sdelphij		goto out;
332354582Sdelphij
333354582Sdelphij	// Avoid recursion
334354582Sdelphij	if (lvl > 20)
335354582Sdelphij		return 0;
336354582Sdelphij#if JSON_COUNT
337354582Sdelphij	/* bail quickly if not counting */
338354582Sdelphij	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
339354582Sdelphij		return 1;
340354582Sdelphij#endif
341354582Sdelphij
342354582Sdelphij	DPRINTF("Parse general: ", uc, *ucp);
343354582Sdelphij	switch (*uc++) {
344354582Sdelphij	case '"':
345354582Sdelphij		rv = json_parse_string(&uc, ue);
346354582Sdelphij		t = JSON_STRING;
347354582Sdelphij		break;
348354582Sdelphij	case '[':
349354582Sdelphij		rv = json_parse_array(&uc, ue, st, lvl + 1);
350354582Sdelphij		t = JSON_ARRAY;
351354582Sdelphij		break;
352354582Sdelphij	case '{': /* '}' */
353354582Sdelphij		rv = json_parse_object(&uc, ue, st, lvl + 1);
354354582Sdelphij		t = JSON_OBJECT;
355354582Sdelphij		break;
356354582Sdelphij	case 't':
357354582Sdelphij		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
358354582Sdelphij		t = JSON_CONSTANT;
359354582Sdelphij		break;
360354582Sdelphij	case 'f':
361354582Sdelphij		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
362354582Sdelphij		t = JSON_CONSTANT;
363354582Sdelphij		break;
364354582Sdelphij	case 'n':
365354582Sdelphij		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
366354582Sdelphij		t = JSON_CONSTANT;
367354582Sdelphij		break;
368354582Sdelphij	default:
369354582Sdelphij		--uc;
370354582Sdelphij		rv = json_parse_number(&uc, ue);
371354582Sdelphij		t = JSON_NUMBER;
372354582Sdelphij		break;
373354582Sdelphij	}
374354582Sdelphij	if (rv)
375354582Sdelphij		st[t]++;
376354582Sdelphij	uc = json_skip_space(uc, ue);
377354582Sdelphijout:
378354582Sdelphij	*ucp = uc;
379354582Sdelphij	DPRINTF("End general: ", uc, *ucp);
380354582Sdelphij	if (lvl == 0)
381354582Sdelphij		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
382354582Sdelphij	return rv;
383354582Sdelphij}
384354582Sdelphij
385354582Sdelphij#ifndef TEST
386354582Sdelphijint
387354582Sdelphijfile_is_json(struct magic_set *ms, const struct buffer *b)
388354582Sdelphij{
389354582Sdelphij	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
390354582Sdelphij	const unsigned char *ue = uc + b->flen;
391354582Sdelphij	size_t st[JSON_MAX];
392354582Sdelphij	int mime = ms->flags & MAGIC_MIME;
393354582Sdelphij
394354582Sdelphij
395354582Sdelphij	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
396354582Sdelphij		return 0;
397354582Sdelphij
398354582Sdelphij	memset(st, 0, sizeof(st));
399354582Sdelphij
400354582Sdelphij	if (!json_parse(&uc, ue, st, 0))
401354582Sdelphij		return 0;
402354582Sdelphij
403354582Sdelphij	if (mime == MAGIC_MIME_ENCODING)
404354582Sdelphij		return 1;
405354582Sdelphij	if (mime) {
406354582Sdelphij		if (file_printf(ms, "application/json") == -1)
407354582Sdelphij			return -1;
408354582Sdelphij		return 1;
409354582Sdelphij	}
410354582Sdelphij	if (file_printf(ms, "JSON data") == -1)
411354582Sdelphij		return -1;
412354582Sdelphij#if JSON_COUNT
413354582Sdelphij#define P(n) st[n], st[n] > 1 ? "s" : ""
414354582Sdelphij	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
415354582Sdelphij	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
416354582Sdelphij	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
417354582Sdelphij	    "u >1array%s)",
418354582Sdelphij	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
419354582Sdelphij	    P(JSON_NUMBER), P(JSON_ARRAYN))
420354582Sdelphij	    == -1)
421354582Sdelphij		return -1;
422354582Sdelphij#endif
423354582Sdelphij	return 1;
424354582Sdelphij}
425354582Sdelphij
426354582Sdelphij#else
427354582Sdelphij
428354582Sdelphij#include <sys/types.h>
429354582Sdelphij#include <sys/stat.h>
430354582Sdelphij#include <stdio.h>
431354582Sdelphij#include <fcntl.h>
432354582Sdelphij#include <unistd.h>
433354582Sdelphij#include <stdlib.h>
434354582Sdelphij#include <stdint.h>
435354582Sdelphij#include <err.h>
436354582Sdelphij
437354582Sdelphijint
438354582Sdelphijmain(int argc, char *argv[])
439354582Sdelphij{
440354582Sdelphij	int fd, rv;
441354582Sdelphij	struct stat st;
442354582Sdelphij	unsigned char *p;
443354582Sdelphij	size_t stats[JSON_MAX];
444354582Sdelphij
445354582Sdelphij	if ((fd = open(argv[1], O_RDONLY)) == -1)
446354582Sdelphij		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
447354582Sdelphij
448354582Sdelphij	if (fstat(fd, &st) == -1)
449354582Sdelphij		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
450354582Sdelphij
451354582Sdelphij	if ((p = malloc(st.st_size)) == NULL)
452354582Sdelphij		err(EXIT_FAILURE, "Can't allocate %jd bytes",
453354582Sdelphij		    (intmax_t)st.st_size);
454354582Sdelphij	if (read(fd, p, st.st_size) != st.st_size)
455354582Sdelphij		err(EXIT_FAILURE, "Can't read %jd bytes",
456354582Sdelphij		    (intmax_t)st.st_size);
457354582Sdelphij	memset(stats, 0, sizeof(stats));
458354582Sdelphij	printf("is json %d\n", json_parse((const unsigned char **)&p,
459354582Sdelphij	    p + st.st_size, stats, 0));
460354582Sdelphij	return 0;
461354582Sdelphij}
462354582Sdelphij#endif
463