is_json.c revision 354582
1354582Sdelphij/*- 2354582Sdelphij * Copyright (c) 2018 Christos Zoulas 3354582Sdelphij * All rights reserved. 4354582Sdelphij * 5354582Sdelphij * Redistribution and use in source and binary forms, with or without 6354582Sdelphij * modification, are permitted provided that the following conditions 7354582Sdelphij * are met: 8354582Sdelphij * 1. Redistributions of source code must retain the above copyright 9354582Sdelphij * notice, this list of conditions and the following disclaimer. 10354582Sdelphij * 2. Redistributions in binary form must reproduce the above copyright 11354582Sdelphij * notice, this list of conditions and the following disclaimer in the 12354582Sdelphij * documentation and/or other materials provided with the distribution. 13354582Sdelphij * 14354582Sdelphij * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15354582Sdelphij * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16354582Sdelphij * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17354582Sdelphij * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18354582Sdelphij * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19354582Sdelphij * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20354582Sdelphij * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21354582Sdelphij * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22354582Sdelphij * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23354582Sdelphij * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24354582Sdelphij * POSSIBILITY OF SUCH DAMAGE. 25354582Sdelphij */ 26354582Sdelphij 27354582Sdelphij/* 28354582Sdelphij * Parse JSON object serialization format (RFC-7159) 29354582Sdelphij */ 30354582Sdelphij 31354582Sdelphij#ifndef TEST 32354582Sdelphij#include "file.h" 33354582Sdelphij 34354582Sdelphij#ifndef lint 35354582SdelphijFILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $") 36354582Sdelphij#endif 37354582Sdelphij 38354582Sdelphij#include <string.h> 39354582Sdelphij#include "magic.h" 40354582Sdelphij#endif 41354582Sdelphij 42354582Sdelphij#ifdef DEBUG 43354582Sdelphij#include <stdio.h> 44354582Sdelphij#define DPRINTF(a, b, c) \ 45354582Sdelphij printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 46354582Sdelphij#else 47354582Sdelphij#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 48354582Sdelphij#endif 49354582Sdelphij 50354582Sdelphij#define JSON_ARRAY 0 51354582Sdelphij#define JSON_CONSTANT 1 52354582Sdelphij#define JSON_NUMBER 2 53354582Sdelphij#define JSON_OBJECT 3 54354582Sdelphij#define JSON_STRING 4 55354582Sdelphij#define JSON_ARRAYN 5 56354582Sdelphij#define JSON_MAX 6 57354582Sdelphij 58354582Sdelphij/* 59354582Sdelphij * if JSON_COUNT != 0: 60354582Sdelphij * count all the objects, require that we have the whole data file 61354582Sdelphij * otherwise: 62354582Sdelphij * stop if we find an object or an array 63354582Sdelphij */ 64354582Sdelphij#ifndef JSON_COUNT 65354582Sdelphij#define JSON_COUNT 0 66354582Sdelphij#endif 67354582Sdelphij 68354582Sdelphijstatic int json_parse(const unsigned char **, const unsigned char *, size_t *, 69354582Sdelphij size_t); 70354582Sdelphij 71354582Sdelphijstatic int 72354582Sdelphijjson_isspace(const unsigned char uc) 73354582Sdelphij{ 74354582Sdelphij switch (uc) { 75354582Sdelphij case ' ': 76354582Sdelphij case '\n': 77354582Sdelphij case '\r': 78354582Sdelphij case '\t': 79354582Sdelphij return 1; 80354582Sdelphij default: 81354582Sdelphij return 0; 82354582Sdelphij } 83354582Sdelphij} 84354582Sdelphij 85354582Sdelphijstatic int 86354582Sdelphijjson_isdigit(unsigned char uc) 87354582Sdelphij{ 88354582Sdelphij switch (uc) { 89354582Sdelphij case '0': case '1': case '2': case '3': case '4': 90354582Sdelphij case '5': case '6': case '7': case '8': case '9': 91354582Sdelphij return 1; 92354582Sdelphij default: 93354582Sdelphij return 0; 94354582Sdelphij } 95354582Sdelphij} 96354582Sdelphij 97354582Sdelphijstatic int 98354582Sdelphijjson_isxdigit(unsigned char uc) 99354582Sdelphij{ 100354582Sdelphij if (json_isdigit(uc)) 101354582Sdelphij return 1; 102354582Sdelphij switch (uc) { 103354582Sdelphij case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 104354582Sdelphij case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 105354582Sdelphij return 1; 106354582Sdelphij default: 107354582Sdelphij return 0; 108354582Sdelphij } 109354582Sdelphij} 110354582Sdelphij 111354582Sdelphijstatic const unsigned char * 112354582Sdelphijjson_skip_space(const unsigned char *uc, const unsigned char *ue) 113354582Sdelphij{ 114354582Sdelphij while (uc < ue && json_isspace(*uc)) 115354582Sdelphij uc++; 116354582Sdelphij return uc; 117354582Sdelphij} 118354582Sdelphij 119354582Sdelphijstatic int 120354582Sdelphijjson_parse_string(const unsigned char **ucp, const unsigned char *ue) 121354582Sdelphij{ 122354582Sdelphij const unsigned char *uc = *ucp; 123354582Sdelphij size_t i; 124354582Sdelphij 125354582Sdelphij DPRINTF("Parse string: ", uc, *ucp); 126354582Sdelphij while (uc < ue) { 127354582Sdelphij switch (*uc++) { 128354582Sdelphij case '\0': 129354582Sdelphij goto out; 130354582Sdelphij case '\\': 131354582Sdelphij if (uc == ue) 132354582Sdelphij goto out; 133354582Sdelphij switch (*uc++) { 134354582Sdelphij case '\0': 135354582Sdelphij goto out; 136354582Sdelphij case '"': 137354582Sdelphij case '\\': 138354582Sdelphij case '/': 139354582Sdelphij case 'b': 140354582Sdelphij case 'f': 141354582Sdelphij case 'n': 142354582Sdelphij case 'r': 143354582Sdelphij case 't': 144354582Sdelphij continue; 145354582Sdelphij case 'u': 146354582Sdelphij if (ue - uc < 4) { 147354582Sdelphij uc = ue; 148354582Sdelphij goto out; 149354582Sdelphij } 150354582Sdelphij for (i = 0; i < 4; i++) 151354582Sdelphij if (!json_isxdigit(*uc++)) 152354582Sdelphij goto out; 153354582Sdelphij continue; 154354582Sdelphij default: 155354582Sdelphij goto out; 156354582Sdelphij } 157354582Sdelphij case '"': 158354582Sdelphij *ucp = uc; 159354582Sdelphij return 1; 160354582Sdelphij default: 161354582Sdelphij continue; 162354582Sdelphij } 163354582Sdelphij } 164354582Sdelphijout: 165354582Sdelphij DPRINTF("Bad string: ", uc, *ucp); 166354582Sdelphij *ucp = uc; 167354582Sdelphij return 0; 168354582Sdelphij} 169354582Sdelphij 170354582Sdelphijstatic int 171354582Sdelphijjson_parse_array(const unsigned char **ucp, const unsigned char *ue, 172354582Sdelphij size_t *st, size_t lvl) 173354582Sdelphij{ 174354582Sdelphij const unsigned char *uc = *ucp; 175354582Sdelphij int more = 0; /* Array has more than 1 element */ 176354582Sdelphij 177354582Sdelphij DPRINTF("Parse array: ", uc, *ucp); 178354582Sdelphij while (uc < ue) { 179354582Sdelphij if (!json_parse(&uc, ue, st, lvl + 1)) 180354582Sdelphij goto out; 181354582Sdelphij if (uc == ue) 182354582Sdelphij goto out; 183354582Sdelphij switch (*uc) { 184354582Sdelphij case ',': 185354582Sdelphij more++; 186354582Sdelphij uc++; 187354582Sdelphij continue; 188354582Sdelphij case ']': 189354582Sdelphij if (more) 190354582Sdelphij st[JSON_ARRAYN]++; 191354582Sdelphij *ucp = uc + 1; 192354582Sdelphij return 1; 193354582Sdelphij default: 194354582Sdelphij goto out; 195354582Sdelphij } 196354582Sdelphij } 197354582Sdelphijout: 198354582Sdelphij DPRINTF("Bad array: ", uc, *ucp); 199354582Sdelphij *ucp = uc; 200354582Sdelphij return 0; 201354582Sdelphij} 202354582Sdelphij 203354582Sdelphijstatic int 204354582Sdelphijjson_parse_object(const unsigned char **ucp, const unsigned char *ue, 205354582Sdelphij size_t *st, size_t lvl) 206354582Sdelphij{ 207354582Sdelphij const unsigned char *uc = *ucp; 208354582Sdelphij DPRINTF("Parse object: ", uc, *ucp); 209354582Sdelphij while (uc < ue) { 210354582Sdelphij uc = json_skip_space(uc, ue); 211354582Sdelphij if (uc == ue) 212354582Sdelphij goto out; 213354582Sdelphij if (*uc++ != '"') { 214354582Sdelphij DPRINTF("not string", uc, *ucp); 215354582Sdelphij goto out; 216354582Sdelphij } 217354582Sdelphij DPRINTF("next field", uc, *ucp); 218354582Sdelphij if (!json_parse_string(&uc, ue)) { 219354582Sdelphij DPRINTF("not string", uc, *ucp); 220354582Sdelphij goto out; 221354582Sdelphij } 222354582Sdelphij uc = json_skip_space(uc, ue); 223354582Sdelphij if (uc == ue) 224354582Sdelphij goto out; 225354582Sdelphij if (*uc++ != ':') { 226354582Sdelphij DPRINTF("not colon", uc, *ucp); 227354582Sdelphij goto out; 228354582Sdelphij } 229354582Sdelphij if (!json_parse(&uc, ue, st, lvl + 1)) { 230354582Sdelphij DPRINTF("not json", uc, *ucp); 231354582Sdelphij goto out; 232354582Sdelphij } 233354582Sdelphij if (uc == ue) 234354582Sdelphij goto out; 235354582Sdelphij switch (*uc++) { 236354582Sdelphij case ',': 237354582Sdelphij continue; 238354582Sdelphij case '}': /* { */ 239354582Sdelphij *ucp = uc; 240354582Sdelphij DPRINTF("Good object: ", uc, *ucp); 241354582Sdelphij return 1; 242354582Sdelphij default: 243354582Sdelphij *ucp = uc - 1; 244354582Sdelphij DPRINTF("not more", uc, *ucp); 245354582Sdelphij goto out; 246354582Sdelphij } 247354582Sdelphij } 248354582Sdelphijout: 249354582Sdelphij DPRINTF("Bad object: ", uc, *ucp); 250354582Sdelphij *ucp = uc; 251354582Sdelphij return 0; 252354582Sdelphij} 253354582Sdelphij 254354582Sdelphijstatic int 255354582Sdelphijjson_parse_number(const unsigned char **ucp, const unsigned char *ue) 256354582Sdelphij{ 257354582Sdelphij const unsigned char *uc = *ucp; 258354582Sdelphij int got = 0; 259354582Sdelphij 260354582Sdelphij DPRINTF("Parse number: ", uc, *ucp); 261354582Sdelphij if (uc == ue) 262354582Sdelphij return 0; 263354582Sdelphij if (*uc == '-') 264354582Sdelphij uc++; 265354582Sdelphij 266354582Sdelphij for (; uc < ue; uc++) { 267354582Sdelphij if (!json_isdigit(*uc)) 268354582Sdelphij break; 269354582Sdelphij got = 1; 270354582Sdelphij } 271354582Sdelphij if (uc == ue) 272354582Sdelphij goto out; 273354582Sdelphij if (*uc == '.') 274354582Sdelphij uc++; 275354582Sdelphij for (; uc < ue; uc++) { 276354582Sdelphij if (!json_isdigit(*uc)) 277354582Sdelphij break; 278354582Sdelphij got = 1; 279354582Sdelphij } 280354582Sdelphij if (uc == ue) 281354582Sdelphij goto out; 282354582Sdelphij if (got && (*uc == 'e' || *uc == 'E')) { 283354582Sdelphij uc++; 284354582Sdelphij got = 0; 285354582Sdelphij if (uc == ue) 286354582Sdelphij goto out; 287354582Sdelphij if (*uc == '+' || *uc == '-') 288354582Sdelphij uc++; 289354582Sdelphij for (; uc < ue; uc++) { 290354582Sdelphij if (!json_isdigit(*uc)) 291354582Sdelphij break; 292354582Sdelphij got = 1; 293354582Sdelphij } 294354582Sdelphij } 295354582Sdelphijout: 296354582Sdelphij if (!got) 297354582Sdelphij DPRINTF("Bad number: ", uc, *ucp); 298354582Sdelphij else 299354582Sdelphij DPRINTF("Good number: ", uc, *ucp); 300354582Sdelphij *ucp = uc; 301354582Sdelphij return got; 302354582Sdelphij} 303354582Sdelphij 304354582Sdelphijstatic int 305354582Sdelphijjson_parse_const(const unsigned char **ucp, const unsigned char *ue, 306354582Sdelphij const char *str, size_t len) 307354582Sdelphij{ 308354582Sdelphij const unsigned char *uc = *ucp; 309354582Sdelphij 310354582Sdelphij DPRINTF("Parse const: ", uc, *ucp); 311354582Sdelphij for (len--; uc < ue && --len;) { 312354582Sdelphij if (*uc++ == *++str) 313354582Sdelphij continue; 314354582Sdelphij } 315354582Sdelphij if (len) 316354582Sdelphij DPRINTF("Bad const: ", uc, *ucp); 317354582Sdelphij *ucp = uc; 318354582Sdelphij return len == 0; 319354582Sdelphij} 320354582Sdelphij 321354582Sdelphijstatic int 322354582Sdelphijjson_parse(const unsigned char **ucp, const unsigned char *ue, 323354582Sdelphij size_t *st, size_t lvl) 324354582Sdelphij{ 325354582Sdelphij const unsigned char *uc; 326354582Sdelphij int rv = 0; 327354582Sdelphij int t; 328354582Sdelphij 329354582Sdelphij uc = json_skip_space(*ucp, ue); 330354582Sdelphij if (uc == ue) 331354582Sdelphij goto out; 332354582Sdelphij 333354582Sdelphij // Avoid recursion 334354582Sdelphij if (lvl > 20) 335354582Sdelphij return 0; 336354582Sdelphij#if JSON_COUNT 337354582Sdelphij /* bail quickly if not counting */ 338354582Sdelphij if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 339354582Sdelphij return 1; 340354582Sdelphij#endif 341354582Sdelphij 342354582Sdelphij DPRINTF("Parse general: ", uc, *ucp); 343354582Sdelphij switch (*uc++) { 344354582Sdelphij case '"': 345354582Sdelphij rv = json_parse_string(&uc, ue); 346354582Sdelphij t = JSON_STRING; 347354582Sdelphij break; 348354582Sdelphij case '[': 349354582Sdelphij rv = json_parse_array(&uc, ue, st, lvl + 1); 350354582Sdelphij t = JSON_ARRAY; 351354582Sdelphij break; 352354582Sdelphij case '{': /* '}' */ 353354582Sdelphij rv = json_parse_object(&uc, ue, st, lvl + 1); 354354582Sdelphij t = JSON_OBJECT; 355354582Sdelphij break; 356354582Sdelphij case 't': 357354582Sdelphij rv = json_parse_const(&uc, ue, "true", sizeof("true")); 358354582Sdelphij t = JSON_CONSTANT; 359354582Sdelphij break; 360354582Sdelphij case 'f': 361354582Sdelphij rv = json_parse_const(&uc, ue, "false", sizeof("false")); 362354582Sdelphij t = JSON_CONSTANT; 363354582Sdelphij break; 364354582Sdelphij case 'n': 365354582Sdelphij rv = json_parse_const(&uc, ue, "null", sizeof("null")); 366354582Sdelphij t = JSON_CONSTANT; 367354582Sdelphij break; 368354582Sdelphij default: 369354582Sdelphij --uc; 370354582Sdelphij rv = json_parse_number(&uc, ue); 371354582Sdelphij t = JSON_NUMBER; 372354582Sdelphij break; 373354582Sdelphij } 374354582Sdelphij if (rv) 375354582Sdelphij st[t]++; 376354582Sdelphij uc = json_skip_space(uc, ue); 377354582Sdelphijout: 378354582Sdelphij *ucp = uc; 379354582Sdelphij DPRINTF("End general: ", uc, *ucp); 380354582Sdelphij if (lvl == 0) 381354582Sdelphij return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 382354582Sdelphij return rv; 383354582Sdelphij} 384354582Sdelphij 385354582Sdelphij#ifndef TEST 386354582Sdelphijint 387354582Sdelphijfile_is_json(struct magic_set *ms, const struct buffer *b) 388354582Sdelphij{ 389354582Sdelphij const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 390354582Sdelphij const unsigned char *ue = uc + b->flen; 391354582Sdelphij size_t st[JSON_MAX]; 392354582Sdelphij int mime = ms->flags & MAGIC_MIME; 393354582Sdelphij 394354582Sdelphij 395354582Sdelphij if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 396354582Sdelphij return 0; 397354582Sdelphij 398354582Sdelphij memset(st, 0, sizeof(st)); 399354582Sdelphij 400354582Sdelphij if (!json_parse(&uc, ue, st, 0)) 401354582Sdelphij return 0; 402354582Sdelphij 403354582Sdelphij if (mime == MAGIC_MIME_ENCODING) 404354582Sdelphij return 1; 405354582Sdelphij if (mime) { 406354582Sdelphij if (file_printf(ms, "application/json") == -1) 407354582Sdelphij return -1; 408354582Sdelphij return 1; 409354582Sdelphij } 410354582Sdelphij if (file_printf(ms, "JSON data") == -1) 411354582Sdelphij return -1; 412354582Sdelphij#if JSON_COUNT 413354582Sdelphij#define P(n) st[n], st[n] > 1 ? "s" : "" 414354582Sdelphij if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 415354582Sdelphij "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 416354582Sdelphij "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 417354582Sdelphij "u >1array%s)", 418354582Sdelphij P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 419354582Sdelphij P(JSON_NUMBER), P(JSON_ARRAYN)) 420354582Sdelphij == -1) 421354582Sdelphij return -1; 422354582Sdelphij#endif 423354582Sdelphij return 1; 424354582Sdelphij} 425354582Sdelphij 426354582Sdelphij#else 427354582Sdelphij 428354582Sdelphij#include <sys/types.h> 429354582Sdelphij#include <sys/stat.h> 430354582Sdelphij#include <stdio.h> 431354582Sdelphij#include <fcntl.h> 432354582Sdelphij#include <unistd.h> 433354582Sdelphij#include <stdlib.h> 434354582Sdelphij#include <stdint.h> 435354582Sdelphij#include <err.h> 436354582Sdelphij 437354582Sdelphijint 438354582Sdelphijmain(int argc, char *argv[]) 439354582Sdelphij{ 440354582Sdelphij int fd, rv; 441354582Sdelphij struct stat st; 442354582Sdelphij unsigned char *p; 443354582Sdelphij size_t stats[JSON_MAX]; 444354582Sdelphij 445354582Sdelphij if ((fd = open(argv[1], O_RDONLY)) == -1) 446354582Sdelphij err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 447354582Sdelphij 448354582Sdelphij if (fstat(fd, &st) == -1) 449354582Sdelphij err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 450354582Sdelphij 451354582Sdelphij if ((p = malloc(st.st_size)) == NULL) 452354582Sdelphij err(EXIT_FAILURE, "Can't allocate %jd bytes", 453354582Sdelphij (intmax_t)st.st_size); 454354582Sdelphij if (read(fd, p, st.st_size) != st.st_size) 455354582Sdelphij err(EXIT_FAILURE, "Can't read %jd bytes", 456354582Sdelphij (intmax_t)st.st_size); 457354582Sdelphij memset(stats, 0, sizeof(stats)); 458354582Sdelphij printf("is json %d\n", json_parse((const unsigned char **)&p, 459354582Sdelphij p + st.st_size, stats, 0)); 460354582Sdelphij return 0; 461354582Sdelphij} 462354582Sdelphij#endif 463