is_json.c revision 1.3
1/* $NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2018 Christos Zoulas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * Parse JSON object serialization format (RFC-7159) 31 */ 32 33#ifndef TEST 34#include "file.h" 35 36#ifndef lint 37#if 0 38FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $") 39#else 40__RCSID("$NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $"); 41#endif 42#endif 43 44#include <string.h> 45#include "magic.h" 46#endif 47 48#ifdef DEBUG 49#include <stdio.h> 50#define DPRINTF(a, b, c) \ 51 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 52#else 53#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 54#endif 55 56#define JSON_ARRAY 0 57#define JSON_CONSTANT 1 58#define JSON_NUMBER 2 59#define JSON_OBJECT 3 60#define JSON_STRING 4 61#define JSON_ARRAYN 5 62#define JSON_MAX 6 63 64/* 65 * if JSON_COUNT != 0: 66 * count all the objects, require that we have the whole data file 67 * otherwise: 68 * stop if we find an object or an array 69 */ 70#ifndef JSON_COUNT 71#define JSON_COUNT 0 72#endif 73 74static int json_parse(const unsigned char **, const unsigned char *, size_t *, 75 size_t); 76 77static int 78json_isspace(const unsigned char uc) 79{ 80 switch (uc) { 81 case ' ': 82 case '\n': 83 case '\r': 84 case '\t': 85 return 1; 86 default: 87 return 0; 88 } 89} 90 91static int 92json_isdigit(unsigned char uc) 93{ 94 switch (uc) { 95 case '0': case '1': case '2': case '3': case '4': 96 case '5': case '6': case '7': case '8': case '9': 97 return 1; 98 default: 99 return 0; 100 } 101} 102 103static int 104json_isxdigit(unsigned char uc) 105{ 106 if (json_isdigit(uc)) 107 return 1; 108 switch (uc) { 109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 110 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 111 return 1; 112 default: 113 return 0; 114 } 115} 116 117static const unsigned char * 118json_skip_space(const unsigned char *uc, const unsigned char *ue) 119{ 120 while (uc < ue && json_isspace(*uc)) 121 uc++; 122 return uc; 123} 124 125static int 126json_parse_string(const unsigned char **ucp, const unsigned char *ue) 127{ 128 const unsigned char *uc = *ucp; 129 size_t i; 130 131 DPRINTF("Parse string: ", uc, *ucp); 132 while (uc < ue) { 133 switch (*uc++) { 134 case '\0': 135 goto out; 136 case '\\': 137 if (uc == ue) 138 goto out; 139 switch (*uc++) { 140 case '\0': 141 goto out; 142 case '"': 143 case '\\': 144 case '/': 145 case 'b': 146 case 'f': 147 case 'n': 148 case 'r': 149 case 't': 150 continue; 151 case 'u': 152 if (ue - uc < 4) { 153 uc = ue; 154 goto out; 155 } 156 for (i = 0; i < 4; i++) 157 if (!json_isxdigit(*uc++)) 158 goto out; 159 continue; 160 default: 161 goto out; 162 } 163 case '"': 164 *ucp = uc; 165 return 1; 166 default: 167 continue; 168 } 169 } 170out: 171 DPRINTF("Bad string: ", uc, *ucp); 172 *ucp = uc; 173 return 0; 174} 175 176static int 177json_parse_array(const unsigned char **ucp, const unsigned char *ue, 178 size_t *st, size_t lvl) 179{ 180 const unsigned char *uc = *ucp; 181 int more = 0; /* Array has more than 1 element */ 182 183 DPRINTF("Parse array: ", uc, *ucp); 184 while (uc < ue) { 185 if (!json_parse(&uc, ue, st, lvl + 1)) 186 goto out; 187 if (uc == ue) 188 goto out; 189 switch (*uc) { 190 case ',': 191 more++; 192 uc++; 193 continue; 194 case ']': 195 if (more) 196 st[JSON_ARRAYN]++; 197 *ucp = uc + 1; 198 return 1; 199 default: 200 goto out; 201 } 202 } 203out: 204 DPRINTF("Bad array: ", uc, *ucp); 205 *ucp = uc; 206 return 0; 207} 208 209static int 210json_parse_object(const unsigned char **ucp, const unsigned char *ue, 211 size_t *st, size_t lvl) 212{ 213 const unsigned char *uc = *ucp; 214 DPRINTF("Parse object: ", uc, *ucp); 215 while (uc < ue) { 216 uc = json_skip_space(uc, ue); 217 if (uc == ue) 218 goto out; 219 if (*uc++ != '"') { 220 DPRINTF("not string", uc, *ucp); 221 goto out; 222 } 223 DPRINTF("next field", uc, *ucp); 224 if (!json_parse_string(&uc, ue)) { 225 DPRINTF("not string", uc, *ucp); 226 goto out; 227 } 228 uc = json_skip_space(uc, ue); 229 if (uc == ue) 230 goto out; 231 if (*uc++ != ':') { 232 DPRINTF("not colon", uc, *ucp); 233 goto out; 234 } 235 if (!json_parse(&uc, ue, st, lvl + 1)) { 236 DPRINTF("not json", uc, *ucp); 237 goto out; 238 } 239 if (uc == ue) 240 goto out; 241 switch (*uc++) { 242 case ',': 243 continue; 244 case '}': /* { */ 245 *ucp = uc; 246 DPRINTF("Good object: ", uc, *ucp); 247 return 1; 248 default: 249 *ucp = uc - 1; 250 DPRINTF("not more", uc, *ucp); 251 goto out; 252 } 253 } 254out: 255 DPRINTF("Bad object: ", uc, *ucp); 256 *ucp = uc; 257 return 0; 258} 259 260static int 261json_parse_number(const unsigned char **ucp, const unsigned char *ue) 262{ 263 const unsigned char *uc = *ucp; 264 int got = 0; 265 266 DPRINTF("Parse number: ", uc, *ucp); 267 if (uc == ue) 268 return 0; 269 if (*uc == '-') 270 uc++; 271 272 for (; uc < ue; uc++) { 273 if (!json_isdigit(*uc)) 274 break; 275 got = 1; 276 } 277 if (uc == ue) 278 goto out; 279 if (*uc == '.') 280 uc++; 281 for (; uc < ue; uc++) { 282 if (!json_isdigit(*uc)) 283 break; 284 got = 1; 285 } 286 if (uc == ue) 287 goto out; 288 if (got && (*uc == 'e' || *uc == 'E')) { 289 uc++; 290 got = 0; 291 if (uc == ue) 292 goto out; 293 if (*uc == '+' || *uc == '-') 294 uc++; 295 for (; uc < ue; uc++) { 296 if (!json_isdigit(*uc)) 297 break; 298 got = 1; 299 } 300 } 301out: 302 if (!got) 303 DPRINTF("Bad number: ", uc, *ucp); 304 else 305 DPRINTF("Good number: ", uc, *ucp); 306 *ucp = uc; 307 return got; 308} 309 310static int 311json_parse_const(const unsigned char **ucp, const unsigned char *ue, 312 const char *str, size_t len) 313{ 314 const unsigned char *uc = *ucp; 315 316 DPRINTF("Parse const: ", uc, *ucp); 317 for (len--; uc < ue && --len;) { 318 if (*uc++ == *++str) 319 continue; 320 } 321 if (len) 322 DPRINTF("Bad const: ", uc, *ucp); 323 *ucp = uc; 324 return len == 0; 325} 326 327static int 328json_parse(const unsigned char **ucp, const unsigned char *ue, 329 size_t *st, size_t lvl) 330{ 331 const unsigned char *uc; 332 int rv = 0; 333 int t; 334 335 uc = json_skip_space(*ucp, ue); 336 if (uc == ue) 337 goto out; 338 339 // Avoid recursion 340 if (lvl > 20) 341 return 0; 342#if JSON_COUNT 343 /* bail quickly if not counting */ 344 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 345 return 1; 346#endif 347 348 DPRINTF("Parse general: ", uc, *ucp); 349 switch (*uc++) { 350 case '"': 351 rv = json_parse_string(&uc, ue); 352 t = JSON_STRING; 353 break; 354 case '[': 355 rv = json_parse_array(&uc, ue, st, lvl + 1); 356 t = JSON_ARRAY; 357 break; 358 case '{': /* '}' */ 359 rv = json_parse_object(&uc, ue, st, lvl + 1); 360 t = JSON_OBJECT; 361 break; 362 case 't': 363 rv = json_parse_const(&uc, ue, "true", sizeof("true")); 364 t = JSON_CONSTANT; 365 break; 366 case 'f': 367 rv = json_parse_const(&uc, ue, "false", sizeof("false")); 368 t = JSON_CONSTANT; 369 break; 370 case 'n': 371 rv = json_parse_const(&uc, ue, "null", sizeof("null")); 372 t = JSON_CONSTANT; 373 break; 374 default: 375 --uc; 376 rv = json_parse_number(&uc, ue); 377 t = JSON_NUMBER; 378 break; 379 } 380 if (rv) 381 st[t]++; 382 uc = json_skip_space(uc, ue); 383out: 384 *ucp = uc; 385 DPRINTF("End general: ", uc, *ucp); 386 if (lvl == 0) 387 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 388 return rv; 389} 390 391#ifndef TEST 392int 393file_is_json(struct magic_set *ms, const struct buffer *b) 394{ 395 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 396 const unsigned char *ue = uc + b->flen; 397 size_t st[JSON_MAX]; 398 int mime = ms->flags & MAGIC_MIME; 399 400 401 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 402 return 0; 403 404 memset(st, 0, sizeof(st)); 405 406 if (!json_parse(&uc, ue, st, 0)) 407 return 0; 408 409 if (mime == MAGIC_MIME_ENCODING) 410 return 1; 411 if (mime) { 412 if (file_printf(ms, "application/json") == -1) 413 return -1; 414 return 1; 415 } 416 if (file_printf(ms, "JSON data") == -1) 417 return -1; 418#if JSON_COUNT 419#define P(n) st[n], st[n] > 1 ? "s" : "" 420 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 421 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 422 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 423 "u >1array%s)", 424 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 425 P(JSON_NUMBER), P(JSON_ARRAYN)) 426 == -1) 427 return -1; 428#endif 429 return 1; 430} 431 432#else 433 434#include <sys/types.h> 435#include <sys/stat.h> 436#include <stdio.h> 437#include <fcntl.h> 438#include <unistd.h> 439#include <stdlib.h> 440#include <stdint.h> 441#include <err.h> 442 443int 444main(int argc, char *argv[]) 445{ 446 int fd, rv; 447 struct stat st; 448 unsigned char *p; 449 size_t stats[JSON_MAX]; 450 451 if ((fd = open(argv[1], O_RDONLY)) == -1) 452 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 453 454 if (fstat(fd, &st) == -1) 455 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 456 457 if ((p = malloc(st.st_size)) == NULL) 458 err(EXIT_FAILURE, "Can't allocate %jd bytes", 459 (intmax_t)st.st_size); 460 if (read(fd, p, st.st_size) != st.st_size) 461 err(EXIT_FAILURE, "Can't read %jd bytes", 462 (intmax_t)st.st_size); 463 memset(stats, 0, sizeof(stats)); 464 printf("is json %d\n", json_parse((const unsigned char **)&p, 465 p + st.st_size, stats, 0)); 466 return 0; 467} 468#endif 469