1354582Sdelphij/*-
2354582Sdelphij * Copyright (c) 2018 Christos Zoulas
3354582Sdelphij * All rights reserved.
4354582Sdelphij *
5354582Sdelphij * Redistribution and use in source and binary forms, with or without
6354582Sdelphij * modification, are permitted provided that the following conditions
7354582Sdelphij * are met:
8354582Sdelphij * 1. Redistributions of source code must retain the above copyright
9354582Sdelphij *    notice, this list of conditions and the following disclaimer.
10354582Sdelphij * 2. Redistributions in binary form must reproduce the above copyright
11354582Sdelphij *    notice, this list of conditions and the following disclaimer in the
12354582Sdelphij *    documentation and/or other materials provided with the distribution.
13354582Sdelphij *
14354582Sdelphij * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15354582Sdelphij * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16354582Sdelphij * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17354582Sdelphij * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18354582Sdelphij * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19354582Sdelphij * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20354582Sdelphij * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21354582Sdelphij * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22354582Sdelphij * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23354582Sdelphij * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24354582Sdelphij * POSSIBILITY OF SUCH DAMAGE.
25354582Sdelphij */
26354582Sdelphij
27354582Sdelphij/*
28354582Sdelphij * Parse JSON object serialization format (RFC-7159)
29354582Sdelphij */
30354582Sdelphij
31354582Sdelphij#ifndef TEST
32354582Sdelphij#include "file.h"
33354582Sdelphij
34354582Sdelphij#ifndef lint
35362844SdelphijFILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $")
36354582Sdelphij#endif
37354582Sdelphij
38354582Sdelphij#include <string.h>
39354582Sdelphij#include "magic.h"
40354582Sdelphij#endif
41354582Sdelphij
42354582Sdelphij#ifdef DEBUG
43354582Sdelphij#include <stdio.h>
44354582Sdelphij#define DPRINTF(a, b, c)	\
45354582Sdelphij    printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
46354582Sdelphij#else
47354582Sdelphij#define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
48354582Sdelphij#endif
49354582Sdelphij
50354582Sdelphij#define JSON_ARRAY	0
51354582Sdelphij#define JSON_CONSTANT	1
52354582Sdelphij#define JSON_NUMBER	2
53354582Sdelphij#define JSON_OBJECT	3
54354582Sdelphij#define JSON_STRING	4
55354582Sdelphij#define JSON_ARRAYN	5
56354582Sdelphij#define JSON_MAX	6
57354582Sdelphij
58354582Sdelphij/*
59354582Sdelphij * if JSON_COUNT != 0:
60354582Sdelphij *	count all the objects, require that we have the whole data file
61354582Sdelphij * otherwise:
62354582Sdelphij *	stop if we find an object or an array
63354582Sdelphij */
64354582Sdelphij#ifndef JSON_COUNT
65354582Sdelphij#define JSON_COUNT 0
66354582Sdelphij#endif
67354582Sdelphij
68354582Sdelphijstatic int json_parse(const unsigned char **, const unsigned char *, size_t *,
69354582Sdelphij	size_t);
70354582Sdelphij
71354582Sdelphijstatic int
72354582Sdelphijjson_isspace(const unsigned char uc)
73354582Sdelphij{
74354582Sdelphij	switch (uc) {
75354582Sdelphij	case ' ':
76354582Sdelphij	case '\n':
77354582Sdelphij	case '\r':
78354582Sdelphij	case '\t':
79354582Sdelphij		return 1;
80354582Sdelphij	default:
81354582Sdelphij		return 0;
82354582Sdelphij	}
83354582Sdelphij}
84354582Sdelphij
85354582Sdelphijstatic int
86354582Sdelphijjson_isdigit(unsigned char uc)
87354582Sdelphij{
88354582Sdelphij	switch (uc) {
89354582Sdelphij	case '0': case '1': case '2': case '3': case '4':
90354582Sdelphij	case '5': case '6': case '7': case '8': case '9':
91354582Sdelphij		return 1;
92354582Sdelphij	default:
93354582Sdelphij		return 0;
94354582Sdelphij	}
95354582Sdelphij}
96354582Sdelphij
97354582Sdelphijstatic int
98354582Sdelphijjson_isxdigit(unsigned char uc)
99354582Sdelphij{
100354582Sdelphij	if (json_isdigit(uc))
101354582Sdelphij		return 1;
102354582Sdelphij	switch (uc) {
103354582Sdelphij	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
104354582Sdelphij	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
105354582Sdelphij		return 1;
106354582Sdelphij	default:
107354582Sdelphij		return 0;
108354582Sdelphij	}
109354582Sdelphij}
110354582Sdelphij
111354582Sdelphijstatic const unsigned char *
112354582Sdelphijjson_skip_space(const unsigned char *uc, const unsigned char *ue)
113354582Sdelphij{
114354582Sdelphij	while (uc < ue && json_isspace(*uc))
115354582Sdelphij		uc++;
116354582Sdelphij	return uc;
117354582Sdelphij}
118354582Sdelphij
119354582Sdelphijstatic int
120354582Sdelphijjson_parse_string(const unsigned char **ucp, const unsigned char *ue)
121354582Sdelphij{
122354582Sdelphij	const unsigned char *uc = *ucp;
123354582Sdelphij	size_t i;
124354582Sdelphij
125354582Sdelphij	DPRINTF("Parse string: ", uc, *ucp);
126354582Sdelphij	while (uc < ue) {
127354582Sdelphij		switch (*uc++) {
128354582Sdelphij		case '\0':
129354582Sdelphij			goto out;
130354582Sdelphij		case '\\':
131354582Sdelphij			if (uc == ue)
132354582Sdelphij				goto out;
133354582Sdelphij			switch (*uc++) {
134354582Sdelphij			case '\0':
135354582Sdelphij				goto out;
136354582Sdelphij			case '"':
137354582Sdelphij			case '\\':
138354582Sdelphij			case '/':
139354582Sdelphij			case 'b':
140354582Sdelphij			case 'f':
141354582Sdelphij			case 'n':
142354582Sdelphij			case 'r':
143354582Sdelphij			case 't':
144354582Sdelphij				continue;
145354582Sdelphij			case 'u':
146354582Sdelphij				if (ue - uc < 4) {
147354582Sdelphij					uc = ue;
148354582Sdelphij					goto out;
149354582Sdelphij				}
150354582Sdelphij				for (i = 0; i < 4; i++)
151354582Sdelphij					if (!json_isxdigit(*uc++))
152354582Sdelphij						goto out;
153354582Sdelphij				continue;
154354582Sdelphij			default:
155354582Sdelphij				goto out;
156354582Sdelphij			}
157354582Sdelphij		case '"':
158354582Sdelphij			*ucp = uc;
159362844Sdelphij			DPRINTF("Good string: ", uc, *ucp);
160354582Sdelphij			return 1;
161354582Sdelphij		default:
162354582Sdelphij			continue;
163354582Sdelphij		}
164354582Sdelphij	}
165354582Sdelphijout:
166354582Sdelphij	DPRINTF("Bad string: ", uc, *ucp);
167354582Sdelphij	*ucp = uc;
168354582Sdelphij	return 0;
169354582Sdelphij}
170354582Sdelphij
171354582Sdelphijstatic int
172354582Sdelphijjson_parse_array(const unsigned char **ucp, const unsigned char *ue,
173354582Sdelphij	size_t *st, size_t lvl)
174354582Sdelphij{
175354582Sdelphij	const unsigned char *uc = *ucp;
176354582Sdelphij
177354582Sdelphij	DPRINTF("Parse array: ", uc, *ucp);
178354582Sdelphij	while (uc < ue) {
179362844Sdelphij		if (*uc == ']')
180362844Sdelphij			goto done;
181354582Sdelphij		if (!json_parse(&uc, ue, st, lvl + 1))
182354582Sdelphij			goto out;
183354582Sdelphij		if (uc == ue)
184354582Sdelphij			goto out;
185354582Sdelphij		switch (*uc) {
186354582Sdelphij		case ',':
187354582Sdelphij			uc++;
188354582Sdelphij			continue;
189354582Sdelphij		case ']':
190362844Sdelphij		done:
191362844Sdelphij			st[JSON_ARRAYN]++;
192354582Sdelphij			*ucp = uc + 1;
193362844Sdelphij			DPRINTF("Good array: ", uc, *ucp);
194354582Sdelphij			return 1;
195354582Sdelphij		default:
196354582Sdelphij			goto out;
197354582Sdelphij		}
198354582Sdelphij	}
199354582Sdelphijout:
200354582Sdelphij	DPRINTF("Bad array: ", uc,  *ucp);
201354582Sdelphij	*ucp = uc;
202354582Sdelphij	return 0;
203354582Sdelphij}
204354582Sdelphij
205354582Sdelphijstatic int
206354582Sdelphijjson_parse_object(const unsigned char **ucp, const unsigned char *ue,
207354582Sdelphij	size_t *st, size_t lvl)
208354582Sdelphij{
209354582Sdelphij	const unsigned char *uc = *ucp;
210354582Sdelphij	DPRINTF("Parse object: ", uc, *ucp);
211354582Sdelphij	while (uc < ue) {
212354582Sdelphij		uc = json_skip_space(uc, ue);
213354582Sdelphij		if (uc == ue)
214354582Sdelphij			goto out;
215362844Sdelphij		if (*uc == '}') {
216362844Sdelphij			uc++;
217362844Sdelphij			goto done;
218362844Sdelphij		}
219354582Sdelphij		if (*uc++ != '"') {
220354582Sdelphij			DPRINTF("not string", uc, *ucp);
221354582Sdelphij			goto out;
222354582Sdelphij		}
223354582Sdelphij		DPRINTF("next field", uc, *ucp);
224354582Sdelphij		if (!json_parse_string(&uc, ue)) {
225354582Sdelphij			DPRINTF("not string", uc, *ucp);
226354582Sdelphij			goto out;
227354582Sdelphij		}
228354582Sdelphij		uc = json_skip_space(uc, ue);
229354582Sdelphij		if (uc == ue)
230354582Sdelphij			goto out;
231354582Sdelphij		if (*uc++ != ':') {
232354582Sdelphij			DPRINTF("not colon", uc, *ucp);
233354582Sdelphij			goto out;
234354582Sdelphij		}
235354582Sdelphij		if (!json_parse(&uc, ue, st, lvl + 1)) {
236354582Sdelphij			DPRINTF("not json", uc, *ucp);
237354582Sdelphij			goto out;
238354582Sdelphij		}
239354582Sdelphij		if (uc == ue)
240354582Sdelphij			goto out;
241354582Sdelphij		switch (*uc++) {
242354582Sdelphij		case ',':
243354582Sdelphij			continue;
244354582Sdelphij		case '}': /* { */
245362844Sdelphij		done:
246354582Sdelphij			*ucp = uc;
247354582Sdelphij			DPRINTF("Good object: ", uc, *ucp);
248354582Sdelphij			return 1;
249354582Sdelphij		default:
250354582Sdelphij			*ucp = uc - 1;
251354582Sdelphij			DPRINTF("not more", uc, *ucp);
252354582Sdelphij			goto out;
253354582Sdelphij		}
254354582Sdelphij	}
255354582Sdelphijout:
256354582Sdelphij	DPRINTF("Bad object: ", uc, *ucp);
257354582Sdelphij	*ucp = uc;
258354582Sdelphij	return 0;
259354582Sdelphij}
260354582Sdelphij
261354582Sdelphijstatic int
262354582Sdelphijjson_parse_number(const unsigned char **ucp, const unsigned char *ue)
263354582Sdelphij{
264354582Sdelphij	const unsigned char *uc = *ucp;
265354582Sdelphij	int got = 0;
266354582Sdelphij
267354582Sdelphij	DPRINTF("Parse number: ", uc, *ucp);
268354582Sdelphij	if (uc == ue)
269354582Sdelphij		return 0;
270354582Sdelphij	if (*uc == '-')
271354582Sdelphij		uc++;
272354582Sdelphij
273354582Sdelphij	for (; uc < ue; uc++) {
274354582Sdelphij		if (!json_isdigit(*uc))
275354582Sdelphij			break;
276354582Sdelphij		got = 1;
277354582Sdelphij	}
278354582Sdelphij	if (uc == ue)
279354582Sdelphij		goto out;
280354582Sdelphij	if (*uc == '.')
281354582Sdelphij		uc++;
282354582Sdelphij	for (; uc < ue; uc++) {
283354582Sdelphij		if (!json_isdigit(*uc))
284354582Sdelphij			break;
285354582Sdelphij		got = 1;
286354582Sdelphij	}
287354582Sdelphij	if (uc == ue)
288354582Sdelphij		goto out;
289354582Sdelphij	if (got && (*uc == 'e' || *uc == 'E')) {
290354582Sdelphij		uc++;
291354582Sdelphij		got = 0;
292354582Sdelphij		if (uc == ue)
293354582Sdelphij			goto out;
294354582Sdelphij		if (*uc == '+' || *uc == '-')
295354582Sdelphij			uc++;
296354582Sdelphij		for (; uc < ue; uc++) {
297354582Sdelphij			if (!json_isdigit(*uc))
298354582Sdelphij				break;
299354582Sdelphij			got = 1;
300354582Sdelphij		}
301354582Sdelphij	}
302354582Sdelphijout:
303354582Sdelphij	if (!got)
304354582Sdelphij		DPRINTF("Bad number: ", uc, *ucp);
305354582Sdelphij	else
306354582Sdelphij		DPRINTF("Good number: ", uc, *ucp);
307354582Sdelphij	*ucp = uc;
308354582Sdelphij	return got;
309354582Sdelphij}
310354582Sdelphij
311354582Sdelphijstatic int
312354582Sdelphijjson_parse_const(const unsigned char **ucp, const unsigned char *ue,
313354582Sdelphij    const char *str, size_t len)
314354582Sdelphij{
315354582Sdelphij	const unsigned char *uc = *ucp;
316354582Sdelphij
317354582Sdelphij	DPRINTF("Parse const: ", uc, *ucp);
318354582Sdelphij	for (len--; uc < ue && --len;) {
319354582Sdelphij		if (*uc++ == *++str)
320354582Sdelphij			continue;
321354582Sdelphij	}
322354582Sdelphij	if (len)
323354582Sdelphij		DPRINTF("Bad const: ", uc, *ucp);
324354582Sdelphij	*ucp = uc;
325354582Sdelphij	return len == 0;
326354582Sdelphij}
327354582Sdelphij
328354582Sdelphijstatic int
329354582Sdelphijjson_parse(const unsigned char **ucp, const unsigned char *ue,
330354582Sdelphij    size_t *st, size_t lvl)
331354582Sdelphij{
332354582Sdelphij	const unsigned char *uc;
333354582Sdelphij	int rv = 0;
334354582Sdelphij	int t;
335354582Sdelphij
336354582Sdelphij	uc = json_skip_space(*ucp, ue);
337354582Sdelphij	if (uc == ue)
338354582Sdelphij		goto out;
339354582Sdelphij
340354582Sdelphij	// Avoid recursion
341354582Sdelphij	if (lvl > 20)
342354582Sdelphij		return 0;
343354582Sdelphij#if JSON_COUNT
344354582Sdelphij	/* bail quickly if not counting */
345354582Sdelphij	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
346354582Sdelphij		return 1;
347354582Sdelphij#endif
348354582Sdelphij
349354582Sdelphij	DPRINTF("Parse general: ", uc, *ucp);
350354582Sdelphij	switch (*uc++) {
351354582Sdelphij	case '"':
352354582Sdelphij		rv = json_parse_string(&uc, ue);
353354582Sdelphij		t = JSON_STRING;
354354582Sdelphij		break;
355354582Sdelphij	case '[':
356354582Sdelphij		rv = json_parse_array(&uc, ue, st, lvl + 1);
357354582Sdelphij		t = JSON_ARRAY;
358354582Sdelphij		break;
359354582Sdelphij	case '{': /* '}' */
360354582Sdelphij		rv = json_parse_object(&uc, ue, st, lvl + 1);
361354582Sdelphij		t = JSON_OBJECT;
362354582Sdelphij		break;
363354582Sdelphij	case 't':
364354582Sdelphij		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
365354582Sdelphij		t = JSON_CONSTANT;
366354582Sdelphij		break;
367354582Sdelphij	case 'f':
368354582Sdelphij		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
369354582Sdelphij		t = JSON_CONSTANT;
370354582Sdelphij		break;
371354582Sdelphij	case 'n':
372354582Sdelphij		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
373354582Sdelphij		t = JSON_CONSTANT;
374354582Sdelphij		break;
375354582Sdelphij	default:
376354582Sdelphij		--uc;
377354582Sdelphij		rv = json_parse_number(&uc, ue);
378354582Sdelphij		t = JSON_NUMBER;
379354582Sdelphij		break;
380354582Sdelphij	}
381354582Sdelphij	if (rv)
382354582Sdelphij		st[t]++;
383354582Sdelphij	uc = json_skip_space(uc, ue);
384354582Sdelphijout:
385354582Sdelphij	*ucp = uc;
386354582Sdelphij	DPRINTF("End general: ", uc, *ucp);
387354582Sdelphij	if (lvl == 0)
388354582Sdelphij		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
389354582Sdelphij	return rv;
390354582Sdelphij}
391354582Sdelphij
392354582Sdelphij#ifndef TEST
393354582Sdelphijint
394354582Sdelphijfile_is_json(struct magic_set *ms, const struct buffer *b)
395354582Sdelphij{
396354582Sdelphij	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
397354582Sdelphij	const unsigned char *ue = uc + b->flen;
398354582Sdelphij	size_t st[JSON_MAX];
399354582Sdelphij	int mime = ms->flags & MAGIC_MIME;
400354582Sdelphij
401354582Sdelphij
402354582Sdelphij	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
403354582Sdelphij		return 0;
404354582Sdelphij
405354582Sdelphij	memset(st, 0, sizeof(st));
406354582Sdelphij
407354582Sdelphij	if (!json_parse(&uc, ue, st, 0))
408354582Sdelphij		return 0;
409354582Sdelphij
410354582Sdelphij	if (mime == MAGIC_MIME_ENCODING)
411354582Sdelphij		return 1;
412354582Sdelphij	if (mime) {
413354582Sdelphij		if (file_printf(ms, "application/json") == -1)
414354582Sdelphij			return -1;
415354582Sdelphij		return 1;
416354582Sdelphij	}
417354582Sdelphij	if (file_printf(ms, "JSON data") == -1)
418354582Sdelphij		return -1;
419354582Sdelphij#if JSON_COUNT
420354582Sdelphij#define P(n) st[n], st[n] > 1 ? "s" : ""
421354582Sdelphij	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
422354582Sdelphij	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
423354582Sdelphij	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
424354582Sdelphij	    "u >1array%s)",
425354582Sdelphij	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
426354582Sdelphij	    P(JSON_NUMBER), P(JSON_ARRAYN))
427354582Sdelphij	    == -1)
428354582Sdelphij		return -1;
429354582Sdelphij#endif
430354582Sdelphij	return 1;
431354582Sdelphij}
432354582Sdelphij
433354582Sdelphij#else
434354582Sdelphij
435354582Sdelphij#include <sys/types.h>
436354582Sdelphij#include <sys/stat.h>
437354582Sdelphij#include <stdio.h>
438354582Sdelphij#include <fcntl.h>
439354582Sdelphij#include <unistd.h>
440354582Sdelphij#include <stdlib.h>
441354582Sdelphij#include <stdint.h>
442354582Sdelphij#include <err.h>
443354582Sdelphij
444354582Sdelphijint
445354582Sdelphijmain(int argc, char *argv[])
446354582Sdelphij{
447354582Sdelphij	int fd, rv;
448354582Sdelphij	struct stat st;
449354582Sdelphij	unsigned char *p;
450354582Sdelphij	size_t stats[JSON_MAX];
451354582Sdelphij
452354582Sdelphij	if ((fd = open(argv[1], O_RDONLY)) == -1)
453354582Sdelphij		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
454354582Sdelphij
455354582Sdelphij	if (fstat(fd, &st) == -1)
456354582Sdelphij		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
457354582Sdelphij
458354582Sdelphij	if ((p = malloc(st.st_size)) == NULL)
459354582Sdelphij		err(EXIT_FAILURE, "Can't allocate %jd bytes",
460354582Sdelphij		    (intmax_t)st.st_size);
461354582Sdelphij	if (read(fd, p, st.st_size) != st.st_size)
462354582Sdelphij		err(EXIT_FAILURE, "Can't read %jd bytes",
463354582Sdelphij		    (intmax_t)st.st_size);
464354582Sdelphij	memset(stats, 0, sizeof(stats));
465354582Sdelphij	printf("is json %d\n", json_parse((const unsigned char **)&p,
466354582Sdelphij	    p + st.st_size, stats, 0));
467354582Sdelphij	return 0;
468354582Sdelphij}
469354582Sdelphij#endif
470