1/*- 2 * Copyright (c) 2018 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27/* 28 * Parse JSON object serialization format (RFC-7159) 29 */ 30 31#ifndef TEST 32#include "file.h" 33 34#ifndef lint 35FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $") 36#endif 37 38#include <string.h> 39#include "magic.h" 40#endif 41 42#ifdef DEBUG 43#include <stdio.h> 44#define DPRINTF(a, b, c) \ 45 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 46#else 47#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 48#endif 49 50#define JSON_ARRAY 0 51#define JSON_CONSTANT 1 52#define JSON_NUMBER 2 53#define JSON_OBJECT 3 54#define JSON_STRING 4 55#define JSON_ARRAYN 5 56#define JSON_MAX 6 57 58/* 59 * if JSON_COUNT != 0: 60 * count all the objects, require that we have the whole data file 61 * otherwise: 62 * stop if we find an object or an array 63 */ 64#ifndef JSON_COUNT 65#define JSON_COUNT 0 66#endif 67 68static int json_parse(const unsigned char **, const unsigned char *, size_t *, 69 size_t); 70 71static int 72json_isspace(const unsigned char uc) 73{ 74 switch (uc) { 75 case ' ': 76 case '\n': 77 case '\r': 78 case '\t': 79 return 1; 80 default: 81 return 0; 82 } 83} 84 85static int 86json_isdigit(unsigned char uc) 87{ 88 switch (uc) { 89 case '0': case '1': case '2': case '3': case '4': 90 case '5': case '6': case '7': case '8': case '9': 91 return 1; 92 default: 93 return 0; 94 } 95} 96 97static int 98json_isxdigit(unsigned char uc) 99{ 100 if (json_isdigit(uc)) 101 return 1; 102 switch (uc) { 103 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 105 return 1; 106 default: 107 return 0; 108 } 109} 110 111static const unsigned char * 112json_skip_space(const unsigned char *uc, const unsigned char *ue) 113{ 114 while (uc < ue && json_isspace(*uc)) 115 uc++; 116 return uc; 117} 118 119static int 120json_parse_string(const unsigned char **ucp, const unsigned char *ue) 121{ 122 const unsigned char *uc = *ucp; 123 size_t i; 124 125 DPRINTF("Parse string: ", uc, *ucp); 126 while (uc < ue) { 127 switch (*uc++) { 128 case '\0': 129 goto out; 130 case '\\': 131 if (uc == ue) 132 goto out; 133 switch (*uc++) { 134 case '\0': 135 goto out; 136 case '"': 137 case '\\': 138 case '/': 139 case 'b': 140 case 'f': 141 case 'n': 142 case 'r': 143 case 't': 144 continue; 145 case 'u': 146 if (ue - uc < 4) { 147 uc = ue; 148 goto out; 149 } 150 for (i = 0; i < 4; i++) 151 if (!json_isxdigit(*uc++)) 152 goto out; 153 continue; 154 default: 155 goto out; 156 } 157 case '"': 158 *ucp = uc; 159 DPRINTF("Good string: ", uc, *ucp); 160 return 1; 161 default: 162 continue; 163 } 164 } 165out: 166 DPRINTF("Bad string: ", uc, *ucp); 167 *ucp = uc; 168 return 0; 169} 170 171static int 172json_parse_array(const unsigned char **ucp, const unsigned char *ue, 173 size_t *st, size_t lvl) 174{ 175 const unsigned char *uc = *ucp; 176 177 DPRINTF("Parse array: ", uc, *ucp); 178 while (uc < ue) { 179 if (*uc == ']') 180 goto done; 181 if (!json_parse(&uc, ue, st, lvl + 1)) 182 goto out; 183 if (uc == ue) 184 goto out; 185 switch (*uc) { 186 case ',': 187 uc++; 188 continue; 189 case ']': 190 done: 191 st[JSON_ARRAYN]++; 192 *ucp = uc + 1; 193 DPRINTF("Good array: ", uc, *ucp); 194 return 1; 195 default: 196 goto out; 197 } 198 } 199out: 200 DPRINTF("Bad array: ", uc, *ucp); 201 *ucp = uc; 202 return 0; 203} 204 205static int 206json_parse_object(const unsigned char **ucp, const unsigned char *ue, 207 size_t *st, size_t lvl) 208{ 209 const unsigned char *uc = *ucp; 210 DPRINTF("Parse object: ", uc, *ucp); 211 while (uc < ue) { 212 uc = json_skip_space(uc, ue); 213 if (uc == ue) 214 goto out; 215 if (*uc == '}') { 216 uc++; 217 goto done; 218 } 219 if (*uc++ != '"') { 220 DPRINTF("not string", uc, *ucp); 221 goto out; 222 } 223 DPRINTF("next field", uc, *ucp); 224 if (!json_parse_string(&uc, ue)) { 225 DPRINTF("not string", uc, *ucp); 226 goto out; 227 } 228 uc = json_skip_space(uc, ue); 229 if (uc == ue) 230 goto out; 231 if (*uc++ != ':') { 232 DPRINTF("not colon", uc, *ucp); 233 goto out; 234 } 235 if (!json_parse(&uc, ue, st, lvl + 1)) { 236 DPRINTF("not json", uc, *ucp); 237 goto out; 238 } 239 if (uc == ue) 240 goto out; 241 switch (*uc++) { 242 case ',': 243 continue; 244 case '}': /* { */ 245 done: 246 *ucp = uc; 247 DPRINTF("Good object: ", uc, *ucp); 248 return 1; 249 default: 250 *ucp = uc - 1; 251 DPRINTF("not more", uc, *ucp); 252 goto out; 253 } 254 } 255out: 256 DPRINTF("Bad object: ", uc, *ucp); 257 *ucp = uc; 258 return 0; 259} 260 261static int 262json_parse_number(const unsigned char **ucp, const unsigned char *ue) 263{ 264 const unsigned char *uc = *ucp; 265 int got = 0; 266 267 DPRINTF("Parse number: ", uc, *ucp); 268 if (uc == ue) 269 return 0; 270 if (*uc == '-') 271 uc++; 272 273 for (; uc < ue; uc++) { 274 if (!json_isdigit(*uc)) 275 break; 276 got = 1; 277 } 278 if (uc == ue) 279 goto out; 280 if (*uc == '.') 281 uc++; 282 for (; uc < ue; uc++) { 283 if (!json_isdigit(*uc)) 284 break; 285 got = 1; 286 } 287 if (uc == ue) 288 goto out; 289 if (got && (*uc == 'e' || *uc == 'E')) { 290 uc++; 291 got = 0; 292 if (uc == ue) 293 goto out; 294 if (*uc == '+' || *uc == '-') 295 uc++; 296 for (; uc < ue; uc++) { 297 if (!json_isdigit(*uc)) 298 break; 299 got = 1; 300 } 301 } 302out: 303 if (!got) 304 DPRINTF("Bad number: ", uc, *ucp); 305 else 306 DPRINTF("Good number: ", uc, *ucp); 307 *ucp = uc; 308 return got; 309} 310 311static int 312json_parse_const(const unsigned char **ucp, const unsigned char *ue, 313 const char *str, size_t len) 314{ 315 const unsigned char *uc = *ucp; 316 317 DPRINTF("Parse const: ", uc, *ucp); 318 for (len--; uc < ue && --len;) { 319 if (*uc++ == *++str) 320 continue; 321 } 322 if (len) 323 DPRINTF("Bad const: ", uc, *ucp); 324 *ucp = uc; 325 return len == 0; 326} 327 328static int 329json_parse(const unsigned char **ucp, const unsigned char *ue, 330 size_t *st, size_t lvl) 331{ 332 const unsigned char *uc; 333 int rv = 0; 334 int t; 335 336 uc = json_skip_space(*ucp, ue); 337 if (uc == ue) 338 goto out; 339 340 // Avoid recursion 341 if (lvl > 20) 342 return 0; 343#if JSON_COUNT 344 /* bail quickly if not counting */ 345 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 346 return 1; 347#endif 348 349 DPRINTF("Parse general: ", uc, *ucp); 350 switch (*uc++) { 351 case '"': 352 rv = json_parse_string(&uc, ue); 353 t = JSON_STRING; 354 break; 355 case '[': 356 rv = json_parse_array(&uc, ue, st, lvl + 1); 357 t = JSON_ARRAY; 358 break; 359 case '{': /* '}' */ 360 rv = json_parse_object(&uc, ue, st, lvl + 1); 361 t = JSON_OBJECT; 362 break; 363 case 't': 364 rv = json_parse_const(&uc, ue, "true", sizeof("true")); 365 t = JSON_CONSTANT; 366 break; 367 case 'f': 368 rv = json_parse_const(&uc, ue, "false", sizeof("false")); 369 t = JSON_CONSTANT; 370 break; 371 case 'n': 372 rv = json_parse_const(&uc, ue, "null", sizeof("null")); 373 t = JSON_CONSTANT; 374 break; 375 default: 376 --uc; 377 rv = json_parse_number(&uc, ue); 378 t = JSON_NUMBER; 379 break; 380 } 381 if (rv) 382 st[t]++; 383 uc = json_skip_space(uc, ue); 384out: 385 *ucp = uc; 386 DPRINTF("End general: ", uc, *ucp); 387 if (lvl == 0) 388 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 389 return rv; 390} 391 392#ifndef TEST 393int 394file_is_json(struct magic_set *ms, const struct buffer *b) 395{ 396 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 397 const unsigned char *ue = uc + b->flen; 398 size_t st[JSON_MAX]; 399 int mime = ms->flags & MAGIC_MIME; 400 401 402 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 403 return 0; 404 405 memset(st, 0, sizeof(st)); 406 407 if (!json_parse(&uc, ue, st, 0)) 408 return 0; 409 410 if (mime == MAGIC_MIME_ENCODING) 411 return 1; 412 if (mime) { 413 if (file_printf(ms, "application/json") == -1) 414 return -1; 415 return 1; 416 } 417 if (file_printf(ms, "JSON data") == -1) 418 return -1; 419#if JSON_COUNT 420#define P(n) st[n], st[n] > 1 ? "s" : "" 421 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 422 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 423 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 424 "u >1array%s)", 425 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 426 P(JSON_NUMBER), P(JSON_ARRAYN)) 427 == -1) 428 return -1; 429#endif 430 return 1; 431} 432 433#else 434 435#include <sys/types.h> 436#include <sys/stat.h> 437#include <stdio.h> 438#include <fcntl.h> 439#include <unistd.h> 440#include <stdlib.h> 441#include <stdint.h> 442#include <err.h> 443 444int 445main(int argc, char *argv[]) 446{ 447 int fd, rv; 448 struct stat st; 449 unsigned char *p; 450 size_t stats[JSON_MAX]; 451 452 if ((fd = open(argv[1], O_RDONLY)) == -1) 453 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 454 455 if (fstat(fd, &st) == -1) 456 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 457 458 if ((p = malloc(st.st_size)) == NULL) 459 err(EXIT_FAILURE, "Can't allocate %jd bytes", 460 (intmax_t)st.st_size); 461 if (read(fd, p, st.st_size) != st.st_size) 462 err(EXIT_FAILURE, "Can't read %jd bytes", 463 (intmax_t)st.st_size); 464 memset(stats, 0, sizeof(stats)); 465 printf("is json %d\n", json_parse((const unsigned char **)&p, 466 p + st.st_size, stats, 0)); 467 return 0; 468} 469#endif 470