is_json.c revision 354939
1/*- 2 * Copyright (c) 2018 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27/* 28 * Parse JSON object serialization format (RFC-7159) 29 */ 30 31#ifndef TEST 32#include "file.h" 33 34#ifndef lint 35FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $") 36#endif 37 38#include <string.h> 39#include "magic.h" 40#endif 41 42#ifdef DEBUG 43#include <stdio.h> 44#define DPRINTF(a, b, c) \ 45 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 46#else 47#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 48#endif 49 50#define JSON_ARRAY 0 51#define JSON_CONSTANT 1 52#define JSON_NUMBER 2 53#define JSON_OBJECT 3 54#define JSON_STRING 4 55#define JSON_ARRAYN 5 56#define JSON_MAX 6 57 58/* 59 * if JSON_COUNT != 0: 60 * count all the objects, require that we have the whole data file 61 * otherwise: 62 * stop if we find an object or an array 63 */ 64#ifndef JSON_COUNT 65#define JSON_COUNT 0 66#endif 67 68static int json_parse(const unsigned char **, const unsigned char *, size_t *, 69 size_t); 70 71static int 72json_isspace(const unsigned char uc) 73{ 74 switch (uc) { 75 case ' ': 76 case '\n': 77 case '\r': 78 case '\t': 79 return 1; 80 default: 81 return 0; 82 } 83} 84 85static int 86json_isdigit(unsigned char uc) 87{ 88 switch (uc) { 89 case '0': case '1': case '2': case '3': case '4': 90 case '5': case '6': case '7': case '8': case '9': 91 return 1; 92 default: 93 return 0; 94 } 95} 96 97static int 98json_isxdigit(unsigned char uc) 99{ 100 if (json_isdigit(uc)) 101 return 1; 102 switch (uc) { 103 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 105 return 1; 106 default: 107 return 0; 108 } 109} 110 111static const unsigned char * 112json_skip_space(const unsigned char *uc, const unsigned char *ue) 113{ 114 while (uc < ue && json_isspace(*uc)) 115 uc++; 116 return uc; 117} 118 119static int 120json_parse_string(const unsigned char **ucp, const unsigned char *ue) 121{ 122 const unsigned char *uc = *ucp; 123 size_t i; 124 125 DPRINTF("Parse string: ", uc, *ucp); 126 while (uc < ue) { 127 switch (*uc++) { 128 case '\0': 129 goto out; 130 case '\\': 131 if (uc == ue) 132 goto out; 133 switch (*uc++) { 134 case '\0': 135 goto out; 136 case '"': 137 case '\\': 138 case '/': 139 case 'b': 140 case 'f': 141 case 'n': 142 case 'r': 143 case 't': 144 continue; 145 case 'u': 146 if (ue - uc < 4) { 147 uc = ue; 148 goto out; 149 } 150 for (i = 0; i < 4; i++) 151 if (!json_isxdigit(*uc++)) 152 goto out; 153 continue; 154 default: 155 goto out; 156 } 157 case '"': 158 *ucp = uc; 159 return 1; 160 default: 161 continue; 162 } 163 } 164out: 165 DPRINTF("Bad string: ", uc, *ucp); 166 *ucp = uc; 167 return 0; 168} 169 170static int 171json_parse_array(const unsigned char **ucp, const unsigned char *ue, 172 size_t *st, size_t lvl) 173{ 174 const unsigned char *uc = *ucp; 175 int more = 0; /* Array has more than 1 element */ 176 177 DPRINTF("Parse array: ", uc, *ucp); 178 while (uc < ue) { 179 if (!json_parse(&uc, ue, st, lvl + 1)) 180 goto out; 181 if (uc == ue) 182 goto out; 183 switch (*uc) { 184 case ',': 185 more++; 186 uc++; 187 continue; 188 case ']': 189 if (more) 190 st[JSON_ARRAYN]++; 191 *ucp = uc + 1; 192 return 1; 193 default: 194 goto out; 195 } 196 } 197out: 198 DPRINTF("Bad array: ", uc, *ucp); 199 *ucp = uc; 200 return 0; 201} 202 203static int 204json_parse_object(const unsigned char **ucp, const unsigned char *ue, 205 size_t *st, size_t lvl) 206{ 207 const unsigned char *uc = *ucp; 208 DPRINTF("Parse object: ", uc, *ucp); 209 while (uc < ue) { 210 uc = json_skip_space(uc, ue); 211 if (uc == ue) 212 goto out; 213 if (*uc++ != '"') { 214 DPRINTF("not string", uc, *ucp); 215 goto out; 216 } 217 DPRINTF("next field", uc, *ucp); 218 if (!json_parse_string(&uc, ue)) { 219 DPRINTF("not string", uc, *ucp); 220 goto out; 221 } 222 uc = json_skip_space(uc, ue); 223 if (uc == ue) 224 goto out; 225 if (*uc++ != ':') { 226 DPRINTF("not colon", uc, *ucp); 227 goto out; 228 } 229 if (!json_parse(&uc, ue, st, lvl + 1)) { 230 DPRINTF("not json", uc, *ucp); 231 goto out; 232 } 233 if (uc == ue) 234 goto out; 235 switch (*uc++) { 236 case ',': 237 continue; 238 case '}': /* { */ 239 *ucp = uc; 240 DPRINTF("Good object: ", uc, *ucp); 241 return 1; 242 default: 243 *ucp = uc - 1; 244 DPRINTF("not more", uc, *ucp); 245 goto out; 246 } 247 } 248out: 249 DPRINTF("Bad object: ", uc, *ucp); 250 *ucp = uc; 251 return 0; 252} 253 254static int 255json_parse_number(const unsigned char **ucp, const unsigned char *ue) 256{ 257 const unsigned char *uc = *ucp; 258 int got = 0; 259 260 DPRINTF("Parse number: ", uc, *ucp); 261 if (uc == ue) 262 return 0; 263 if (*uc == '-') 264 uc++; 265 266 for (; uc < ue; uc++) { 267 if (!json_isdigit(*uc)) 268 break; 269 got = 1; 270 } 271 if (uc == ue) 272 goto out; 273 if (*uc == '.') 274 uc++; 275 for (; uc < ue; uc++) { 276 if (!json_isdigit(*uc)) 277 break; 278 got = 1; 279 } 280 if (uc == ue) 281 goto out; 282 if (got && (*uc == 'e' || *uc == 'E')) { 283 uc++; 284 got = 0; 285 if (uc == ue) 286 goto out; 287 if (*uc == '+' || *uc == '-') 288 uc++; 289 for (; uc < ue; uc++) { 290 if (!json_isdigit(*uc)) 291 break; 292 got = 1; 293 } 294 } 295out: 296 if (!got) 297 DPRINTF("Bad number: ", uc, *ucp); 298 else 299 DPRINTF("Good number: ", uc, *ucp); 300 *ucp = uc; 301 return got; 302} 303 304static int 305json_parse_const(const unsigned char **ucp, const unsigned char *ue, 306 const char *str, size_t len) 307{ 308 const unsigned char *uc = *ucp; 309 310 DPRINTF("Parse const: ", uc, *ucp); 311 for (len--; uc < ue && --len;) { 312 if (*uc++ == *++str) 313 continue; 314 } 315 if (len) 316 DPRINTF("Bad const: ", uc, *ucp); 317 *ucp = uc; 318 return len == 0; 319} 320 321static int 322json_parse(const unsigned char **ucp, const unsigned char *ue, 323 size_t *st, size_t lvl) 324{ 325 const unsigned char *uc; 326 int rv = 0; 327 int t; 328 329 uc = json_skip_space(*ucp, ue); 330 if (uc == ue) 331 goto out; 332 333 // Avoid recursion 334 if (lvl > 20) 335 return 0; 336#if JSON_COUNT 337 /* bail quickly if not counting */ 338 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 339 return 1; 340#endif 341 342 DPRINTF("Parse general: ", uc, *ucp); 343 switch (*uc++) { 344 case '"': 345 rv = json_parse_string(&uc, ue); 346 t = JSON_STRING; 347 break; 348 case '[': 349 rv = json_parse_array(&uc, ue, st, lvl + 1); 350 t = JSON_ARRAY; 351 break; 352 case '{': /* '}' */ 353 rv = json_parse_object(&uc, ue, st, lvl + 1); 354 t = JSON_OBJECT; 355 break; 356 case 't': 357 rv = json_parse_const(&uc, ue, "true", sizeof("true")); 358 t = JSON_CONSTANT; 359 break; 360 case 'f': 361 rv = json_parse_const(&uc, ue, "false", sizeof("false")); 362 t = JSON_CONSTANT; 363 break; 364 case 'n': 365 rv = json_parse_const(&uc, ue, "null", sizeof("null")); 366 t = JSON_CONSTANT; 367 break; 368 default: 369 --uc; 370 rv = json_parse_number(&uc, ue); 371 t = JSON_NUMBER; 372 break; 373 } 374 if (rv) 375 st[t]++; 376 uc = json_skip_space(uc, ue); 377out: 378 *ucp = uc; 379 DPRINTF("End general: ", uc, *ucp); 380 if (lvl == 0) 381 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 382 return rv; 383} 384 385#ifndef TEST 386int 387file_is_json(struct magic_set *ms, const struct buffer *b) 388{ 389 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 390 const unsigned char *ue = uc + b->flen; 391 size_t st[JSON_MAX]; 392 int mime = ms->flags & MAGIC_MIME; 393 394 395 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 396 return 0; 397 398 memset(st, 0, sizeof(st)); 399 400 if (!json_parse(&uc, ue, st, 0)) 401 return 0; 402 403 if (mime == MAGIC_MIME_ENCODING) 404 return 1; 405 if (mime) { 406 if (file_printf(ms, "application/json") == -1) 407 return -1; 408 return 1; 409 } 410 if (file_printf(ms, "JSON data") == -1) 411 return -1; 412#if JSON_COUNT 413#define P(n) st[n], st[n] > 1 ? "s" : "" 414 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 415 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 416 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 417 "u >1array%s)", 418 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 419 P(JSON_NUMBER), P(JSON_ARRAYN)) 420 == -1) 421 return -1; 422#endif 423 return 1; 424} 425 426#else 427 428#include <sys/types.h> 429#include <sys/stat.h> 430#include <stdio.h> 431#include <fcntl.h> 432#include <unistd.h> 433#include <stdlib.h> 434#include <stdint.h> 435#include <err.h> 436 437int 438main(int argc, char *argv[]) 439{ 440 int fd, rv; 441 struct stat st; 442 unsigned char *p; 443 size_t stats[JSON_MAX]; 444 445 if ((fd = open(argv[1], O_RDONLY)) == -1) 446 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 447 448 if (fstat(fd, &st) == -1) 449 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 450 451 if ((p = malloc(st.st_size)) == NULL) 452 err(EXIT_FAILURE, "Can't allocate %jd bytes", 453 (intmax_t)st.st_size); 454 if (read(fd, p, st.st_size) != st.st_size) 455 err(EXIT_FAILURE, "Can't read %jd bytes", 456 (intmax_t)st.st_size); 457 memset(stats, 0, sizeof(stats)); 458 printf("is json %d\n", json_parse((const unsigned char **)&p, 459 p + st.st_size, stats, 0)); 460 return 0; 461} 462#endif 463