1354582Sdelphij/*- 2354582Sdelphij * Copyright (c) 2018 Christos Zoulas 3354582Sdelphij * All rights reserved. 4354582Sdelphij * 5354582Sdelphij * Redistribution and use in source and binary forms, with or without 6354582Sdelphij * modification, are permitted provided that the following conditions 7354582Sdelphij * are met: 8354582Sdelphij * 1. Redistributions of source code must retain the above copyright 9354582Sdelphij * notice, this list of conditions and the following disclaimer. 10354582Sdelphij * 2. Redistributions in binary form must reproduce the above copyright 11354582Sdelphij * notice, this list of conditions and the following disclaimer in the 12354582Sdelphij * documentation and/or other materials provided with the distribution. 13354582Sdelphij * 14354582Sdelphij * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15354582Sdelphij * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16354582Sdelphij * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17354582Sdelphij * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18354582Sdelphij * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19354582Sdelphij * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20354582Sdelphij * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21354582Sdelphij * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22354582Sdelphij * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23354582Sdelphij * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24354582Sdelphij * POSSIBILITY OF SUCH DAMAGE. 25354582Sdelphij */ 26354582Sdelphij 27354582Sdelphij/* 28354582Sdelphij * Parse JSON object serialization format (RFC-7159) 29354582Sdelphij */ 30354582Sdelphij 31354582Sdelphij#ifndef TEST 32354582Sdelphij#include "file.h" 33354582Sdelphij 34354582Sdelphij#ifndef lint 35362844SdelphijFILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $") 36354582Sdelphij#endif 37354582Sdelphij 38354582Sdelphij#include <string.h> 39354582Sdelphij#include "magic.h" 40354582Sdelphij#endif 41354582Sdelphij 42354582Sdelphij#ifdef DEBUG 43354582Sdelphij#include <stdio.h> 44354582Sdelphij#define DPRINTF(a, b, c) \ 45354582Sdelphij printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 46354582Sdelphij#else 47354582Sdelphij#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 48354582Sdelphij#endif 49354582Sdelphij 50354582Sdelphij#define JSON_ARRAY 0 51354582Sdelphij#define JSON_CONSTANT 1 52354582Sdelphij#define JSON_NUMBER 2 53354582Sdelphij#define JSON_OBJECT 3 54354582Sdelphij#define JSON_STRING 4 55354582Sdelphij#define JSON_ARRAYN 5 56354582Sdelphij#define JSON_MAX 6 57354582Sdelphij 58354582Sdelphij/* 59354582Sdelphij * if JSON_COUNT != 0: 60354582Sdelphij * count all the objects, require that we have the whole data file 61354582Sdelphij * otherwise: 62354582Sdelphij * stop if we find an object or an array 63354582Sdelphij */ 64354582Sdelphij#ifndef JSON_COUNT 65354582Sdelphij#define JSON_COUNT 0 66354582Sdelphij#endif 67354582Sdelphij 68354582Sdelphijstatic int json_parse(const unsigned char **, const unsigned char *, size_t *, 69354582Sdelphij size_t); 70354582Sdelphij 71354582Sdelphijstatic int 72354582Sdelphijjson_isspace(const unsigned char uc) 73354582Sdelphij{ 74354582Sdelphij switch (uc) { 75354582Sdelphij case ' ': 76354582Sdelphij case '\n': 77354582Sdelphij case '\r': 78354582Sdelphij case '\t': 79354582Sdelphij return 1; 80354582Sdelphij default: 81354582Sdelphij return 0; 82354582Sdelphij } 83354582Sdelphij} 84354582Sdelphij 85354582Sdelphijstatic int 86354582Sdelphijjson_isdigit(unsigned char uc) 87354582Sdelphij{ 88354582Sdelphij switch (uc) { 89354582Sdelphij case '0': case '1': case '2': case '3': case '4': 90354582Sdelphij case '5': case '6': case '7': case '8': case '9': 91354582Sdelphij return 1; 92354582Sdelphij default: 93354582Sdelphij return 0; 94354582Sdelphij } 95354582Sdelphij} 96354582Sdelphij 97354582Sdelphijstatic int 98354582Sdelphijjson_isxdigit(unsigned char uc) 99354582Sdelphij{ 100354582Sdelphij if (json_isdigit(uc)) 101354582Sdelphij return 1; 102354582Sdelphij switch (uc) { 103354582Sdelphij case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 104354582Sdelphij case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 105354582Sdelphij return 1; 106354582Sdelphij default: 107354582Sdelphij return 0; 108354582Sdelphij } 109354582Sdelphij} 110354582Sdelphij 111354582Sdelphijstatic const unsigned char * 112354582Sdelphijjson_skip_space(const unsigned char *uc, const unsigned char *ue) 113354582Sdelphij{ 114354582Sdelphij while (uc < ue && json_isspace(*uc)) 115354582Sdelphij uc++; 116354582Sdelphij return uc; 117354582Sdelphij} 118354582Sdelphij 119354582Sdelphijstatic int 120354582Sdelphijjson_parse_string(const unsigned char **ucp, const unsigned char *ue) 121354582Sdelphij{ 122354582Sdelphij const unsigned char *uc = *ucp; 123354582Sdelphij size_t i; 124354582Sdelphij 125354582Sdelphij DPRINTF("Parse string: ", uc, *ucp); 126354582Sdelphij while (uc < ue) { 127354582Sdelphij switch (*uc++) { 128354582Sdelphij case '\0': 129354582Sdelphij goto out; 130354582Sdelphij case '\\': 131354582Sdelphij if (uc == ue) 132354582Sdelphij goto out; 133354582Sdelphij switch (*uc++) { 134354582Sdelphij case '\0': 135354582Sdelphij goto out; 136354582Sdelphij case '"': 137354582Sdelphij case '\\': 138354582Sdelphij case '/': 139354582Sdelphij case 'b': 140354582Sdelphij case 'f': 141354582Sdelphij case 'n': 142354582Sdelphij case 'r': 143354582Sdelphij case 't': 144354582Sdelphij continue; 145354582Sdelphij case 'u': 146354582Sdelphij if (ue - uc < 4) { 147354582Sdelphij uc = ue; 148354582Sdelphij goto out; 149354582Sdelphij } 150354582Sdelphij for (i = 0; i < 4; i++) 151354582Sdelphij if (!json_isxdigit(*uc++)) 152354582Sdelphij goto out; 153354582Sdelphij continue; 154354582Sdelphij default: 155354582Sdelphij goto out; 156354582Sdelphij } 157354582Sdelphij case '"': 158354582Sdelphij *ucp = uc; 159362844Sdelphij DPRINTF("Good string: ", uc, *ucp); 160354582Sdelphij return 1; 161354582Sdelphij default: 162354582Sdelphij continue; 163354582Sdelphij } 164354582Sdelphij } 165354582Sdelphijout: 166354582Sdelphij DPRINTF("Bad string: ", uc, *ucp); 167354582Sdelphij *ucp = uc; 168354582Sdelphij return 0; 169354582Sdelphij} 170354582Sdelphij 171354582Sdelphijstatic int 172354582Sdelphijjson_parse_array(const unsigned char **ucp, const unsigned char *ue, 173354582Sdelphij size_t *st, size_t lvl) 174354582Sdelphij{ 175354582Sdelphij const unsigned char *uc = *ucp; 176354582Sdelphij 177354582Sdelphij DPRINTF("Parse array: ", uc, *ucp); 178354582Sdelphij while (uc < ue) { 179362844Sdelphij if (*uc == ']') 180362844Sdelphij goto done; 181354582Sdelphij if (!json_parse(&uc, ue, st, lvl + 1)) 182354582Sdelphij goto out; 183354582Sdelphij if (uc == ue) 184354582Sdelphij goto out; 185354582Sdelphij switch (*uc) { 186354582Sdelphij case ',': 187354582Sdelphij uc++; 188354582Sdelphij continue; 189354582Sdelphij case ']': 190362844Sdelphij done: 191362844Sdelphij st[JSON_ARRAYN]++; 192354582Sdelphij *ucp = uc + 1; 193362844Sdelphij DPRINTF("Good array: ", uc, *ucp); 194354582Sdelphij return 1; 195354582Sdelphij default: 196354582Sdelphij goto out; 197354582Sdelphij } 198354582Sdelphij } 199354582Sdelphijout: 200354582Sdelphij DPRINTF("Bad array: ", uc, *ucp); 201354582Sdelphij *ucp = uc; 202354582Sdelphij return 0; 203354582Sdelphij} 204354582Sdelphij 205354582Sdelphijstatic int 206354582Sdelphijjson_parse_object(const unsigned char **ucp, const unsigned char *ue, 207354582Sdelphij size_t *st, size_t lvl) 208354582Sdelphij{ 209354582Sdelphij const unsigned char *uc = *ucp; 210354582Sdelphij DPRINTF("Parse object: ", uc, *ucp); 211354582Sdelphij while (uc < ue) { 212354582Sdelphij uc = json_skip_space(uc, ue); 213354582Sdelphij if (uc == ue) 214354582Sdelphij goto out; 215362844Sdelphij if (*uc == '}') { 216362844Sdelphij uc++; 217362844Sdelphij goto done; 218362844Sdelphij } 219354582Sdelphij if (*uc++ != '"') { 220354582Sdelphij DPRINTF("not string", uc, *ucp); 221354582Sdelphij goto out; 222354582Sdelphij } 223354582Sdelphij DPRINTF("next field", uc, *ucp); 224354582Sdelphij if (!json_parse_string(&uc, ue)) { 225354582Sdelphij DPRINTF("not string", uc, *ucp); 226354582Sdelphij goto out; 227354582Sdelphij } 228354582Sdelphij uc = json_skip_space(uc, ue); 229354582Sdelphij if (uc == ue) 230354582Sdelphij goto out; 231354582Sdelphij if (*uc++ != ':') { 232354582Sdelphij DPRINTF("not colon", uc, *ucp); 233354582Sdelphij goto out; 234354582Sdelphij } 235354582Sdelphij if (!json_parse(&uc, ue, st, lvl + 1)) { 236354582Sdelphij DPRINTF("not json", uc, *ucp); 237354582Sdelphij goto out; 238354582Sdelphij } 239354582Sdelphij if (uc == ue) 240354582Sdelphij goto out; 241354582Sdelphij switch (*uc++) { 242354582Sdelphij case ',': 243354582Sdelphij continue; 244354582Sdelphij case '}': /* { */ 245362844Sdelphij done: 246354582Sdelphij *ucp = uc; 247354582Sdelphij DPRINTF("Good object: ", uc, *ucp); 248354582Sdelphij return 1; 249354582Sdelphij default: 250354582Sdelphij *ucp = uc - 1; 251354582Sdelphij DPRINTF("not more", uc, *ucp); 252354582Sdelphij goto out; 253354582Sdelphij } 254354582Sdelphij } 255354582Sdelphijout: 256354582Sdelphij DPRINTF("Bad object: ", uc, *ucp); 257354582Sdelphij *ucp = uc; 258354582Sdelphij return 0; 259354582Sdelphij} 260354582Sdelphij 261354582Sdelphijstatic int 262354582Sdelphijjson_parse_number(const unsigned char **ucp, const unsigned char *ue) 263354582Sdelphij{ 264354582Sdelphij const unsigned char *uc = *ucp; 265354582Sdelphij int got = 0; 266354582Sdelphij 267354582Sdelphij DPRINTF("Parse number: ", uc, *ucp); 268354582Sdelphij if (uc == ue) 269354582Sdelphij return 0; 270354582Sdelphij if (*uc == '-') 271354582Sdelphij uc++; 272354582Sdelphij 273354582Sdelphij for (; uc < ue; uc++) { 274354582Sdelphij if (!json_isdigit(*uc)) 275354582Sdelphij break; 276354582Sdelphij got = 1; 277354582Sdelphij } 278354582Sdelphij if (uc == ue) 279354582Sdelphij goto out; 280354582Sdelphij if (*uc == '.') 281354582Sdelphij uc++; 282354582Sdelphij for (; uc < ue; uc++) { 283354582Sdelphij if (!json_isdigit(*uc)) 284354582Sdelphij break; 285354582Sdelphij got = 1; 286354582Sdelphij } 287354582Sdelphij if (uc == ue) 288354582Sdelphij goto out; 289354582Sdelphij if (got && (*uc == 'e' || *uc == 'E')) { 290354582Sdelphij uc++; 291354582Sdelphij got = 0; 292354582Sdelphij if (uc == ue) 293354582Sdelphij goto out; 294354582Sdelphij if (*uc == '+' || *uc == '-') 295354582Sdelphij uc++; 296354582Sdelphij for (; uc < ue; uc++) { 297354582Sdelphij if (!json_isdigit(*uc)) 298354582Sdelphij break; 299354582Sdelphij got = 1; 300354582Sdelphij } 301354582Sdelphij } 302354582Sdelphijout: 303354582Sdelphij if (!got) 304354582Sdelphij DPRINTF("Bad number: ", uc, *ucp); 305354582Sdelphij else 306354582Sdelphij DPRINTF("Good number: ", uc, *ucp); 307354582Sdelphij *ucp = uc; 308354582Sdelphij return got; 309354582Sdelphij} 310354582Sdelphij 311354582Sdelphijstatic int 312354582Sdelphijjson_parse_const(const unsigned char **ucp, const unsigned char *ue, 313354582Sdelphij const char *str, size_t len) 314354582Sdelphij{ 315354582Sdelphij const unsigned char *uc = *ucp; 316354582Sdelphij 317354582Sdelphij DPRINTF("Parse const: ", uc, *ucp); 318354582Sdelphij for (len--; uc < ue && --len;) { 319354582Sdelphij if (*uc++ == *++str) 320354582Sdelphij continue; 321354582Sdelphij } 322354582Sdelphij if (len) 323354582Sdelphij DPRINTF("Bad const: ", uc, *ucp); 324354582Sdelphij *ucp = uc; 325354582Sdelphij return len == 0; 326354582Sdelphij} 327354582Sdelphij 328354582Sdelphijstatic int 329354582Sdelphijjson_parse(const unsigned char **ucp, const unsigned char *ue, 330354582Sdelphij size_t *st, size_t lvl) 331354582Sdelphij{ 332354582Sdelphij const unsigned char *uc; 333354582Sdelphij int rv = 0; 334354582Sdelphij int t; 335354582Sdelphij 336354582Sdelphij uc = json_skip_space(*ucp, ue); 337354582Sdelphij if (uc == ue) 338354582Sdelphij goto out; 339354582Sdelphij 340354582Sdelphij // Avoid recursion 341354582Sdelphij if (lvl > 20) 342354582Sdelphij return 0; 343354582Sdelphij#if JSON_COUNT 344354582Sdelphij /* bail quickly if not counting */ 345354582Sdelphij if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 346354582Sdelphij return 1; 347354582Sdelphij#endif 348354582Sdelphij 349354582Sdelphij DPRINTF("Parse general: ", uc, *ucp); 350354582Sdelphij switch (*uc++) { 351354582Sdelphij case '"': 352354582Sdelphij rv = json_parse_string(&uc, ue); 353354582Sdelphij t = JSON_STRING; 354354582Sdelphij break; 355354582Sdelphij case '[': 356354582Sdelphij rv = json_parse_array(&uc, ue, st, lvl + 1); 357354582Sdelphij t = JSON_ARRAY; 358354582Sdelphij break; 359354582Sdelphij case '{': /* '}' */ 360354582Sdelphij rv = json_parse_object(&uc, ue, st, lvl + 1); 361354582Sdelphij t = JSON_OBJECT; 362354582Sdelphij break; 363354582Sdelphij case 't': 364354582Sdelphij rv = json_parse_const(&uc, ue, "true", sizeof("true")); 365354582Sdelphij t = JSON_CONSTANT; 366354582Sdelphij break; 367354582Sdelphij case 'f': 368354582Sdelphij rv = json_parse_const(&uc, ue, "false", sizeof("false")); 369354582Sdelphij t = JSON_CONSTANT; 370354582Sdelphij break; 371354582Sdelphij case 'n': 372354582Sdelphij rv = json_parse_const(&uc, ue, "null", sizeof("null")); 373354582Sdelphij t = JSON_CONSTANT; 374354582Sdelphij break; 375354582Sdelphij default: 376354582Sdelphij --uc; 377354582Sdelphij rv = json_parse_number(&uc, ue); 378354582Sdelphij t = JSON_NUMBER; 379354582Sdelphij break; 380354582Sdelphij } 381354582Sdelphij if (rv) 382354582Sdelphij st[t]++; 383354582Sdelphij uc = json_skip_space(uc, ue); 384354582Sdelphijout: 385354582Sdelphij *ucp = uc; 386354582Sdelphij DPRINTF("End general: ", uc, *ucp); 387354582Sdelphij if (lvl == 0) 388354582Sdelphij return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 389354582Sdelphij return rv; 390354582Sdelphij} 391354582Sdelphij 392354582Sdelphij#ifndef TEST 393354582Sdelphijint 394354582Sdelphijfile_is_json(struct magic_set *ms, const struct buffer *b) 395354582Sdelphij{ 396354582Sdelphij const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 397354582Sdelphij const unsigned char *ue = uc + b->flen; 398354582Sdelphij size_t st[JSON_MAX]; 399354582Sdelphij int mime = ms->flags & MAGIC_MIME; 400354582Sdelphij 401354582Sdelphij 402354582Sdelphij if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 403354582Sdelphij return 0; 404354582Sdelphij 405354582Sdelphij memset(st, 0, sizeof(st)); 406354582Sdelphij 407354582Sdelphij if (!json_parse(&uc, ue, st, 0)) 408354582Sdelphij return 0; 409354582Sdelphij 410354582Sdelphij if (mime == MAGIC_MIME_ENCODING) 411354582Sdelphij return 1; 412354582Sdelphij if (mime) { 413354582Sdelphij if (file_printf(ms, "application/json") == -1) 414354582Sdelphij return -1; 415354582Sdelphij return 1; 416354582Sdelphij } 417354582Sdelphij if (file_printf(ms, "JSON data") == -1) 418354582Sdelphij return -1; 419354582Sdelphij#if JSON_COUNT 420354582Sdelphij#define P(n) st[n], st[n] > 1 ? "s" : "" 421354582Sdelphij if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 422354582Sdelphij "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 423354582Sdelphij "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 424354582Sdelphij "u >1array%s)", 425354582Sdelphij P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 426354582Sdelphij P(JSON_NUMBER), P(JSON_ARRAYN)) 427354582Sdelphij == -1) 428354582Sdelphij return -1; 429354582Sdelphij#endif 430354582Sdelphij return 1; 431354582Sdelphij} 432354582Sdelphij 433354582Sdelphij#else 434354582Sdelphij 435354582Sdelphij#include <sys/types.h> 436354582Sdelphij#include <sys/stat.h> 437354582Sdelphij#include <stdio.h> 438354582Sdelphij#include <fcntl.h> 439354582Sdelphij#include <unistd.h> 440354582Sdelphij#include <stdlib.h> 441354582Sdelphij#include <stdint.h> 442354582Sdelphij#include <err.h> 443354582Sdelphij 444354582Sdelphijint 445354582Sdelphijmain(int argc, char *argv[]) 446354582Sdelphij{ 447354582Sdelphij int fd, rv; 448354582Sdelphij struct stat st; 449354582Sdelphij unsigned char *p; 450354582Sdelphij size_t stats[JSON_MAX]; 451354582Sdelphij 452354582Sdelphij if ((fd = open(argv[1], O_RDONLY)) == -1) 453354582Sdelphij err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 454354582Sdelphij 455354582Sdelphij if (fstat(fd, &st) == -1) 456354582Sdelphij err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 457354582Sdelphij 458354582Sdelphij if ((p = malloc(st.st_size)) == NULL) 459354582Sdelphij err(EXIT_FAILURE, "Can't allocate %jd bytes", 460354582Sdelphij (intmax_t)st.st_size); 461354582Sdelphij if (read(fd, p, st.st_size) != st.st_size) 462354582Sdelphij err(EXIT_FAILURE, "Can't read %jd bytes", 463354582Sdelphij (intmax_t)st.st_size); 464354582Sdelphij memset(stats, 0, sizeof(stats)); 465354582Sdelphij printf("is json %d\n", json_parse((const unsigned char **)&p, 466354582Sdelphij p + st.st_size, stats, 0)); 467354582Sdelphij return 0; 468354582Sdelphij} 469354582Sdelphij#endif 470