1#include "EXTERN.h" 2#include "perl.h" 3#include "XSUB.h" 4 5#include <assert.h> 6#include <string.h> 7#include <stdlib.h> 8#include <stdio.h> 9#include <limits.h> 10#include <float.h> 11 12#if defined(__BORLANDC__) || defined(_MSC_VER) 13# define snprintf _snprintf // C compilers have this in stdio.h 14#endif 15 16// some old perls do not have this, try to make it work, no 17// guarantees, though. if it breaks, you get to keep the pieces. 18#ifndef UTF8_MAXBYTES 19# define UTF8_MAXBYTES 13 20#endif 21 22// compatibility with perl <5.18 23#ifndef HvNAMELEN_get 24# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 25#endif 26#ifndef HvNAMELEN 27# define HvNAMELEN(hv) HvNAMELEN_get (hv) 28#endif 29#ifndef HvNAMEUTF8 30# define HvNAMEUTF8(hv) 0 31#endif 32 33// three extra for rounding, sign, and end of string 34#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3) 35 36#define F_ASCII 0x00000001UL 37#define F_LATIN1 0x00000002UL 38#define F_UTF8 0x00000004UL 39#define F_INDENT 0x00000008UL 40#define F_CANONICAL 0x00000010UL 41#define F_SPACE_BEFORE 0x00000020UL 42#define F_SPACE_AFTER 0x00000040UL 43#define F_ALLOW_NONREF 0x00000100UL 44#define F_SHRINK 0x00000200UL 45#define F_ALLOW_BLESSED 0x00000400UL 46#define F_CONV_BLESSED 0x00000800UL 47#define F_RELAXED 0x00001000UL 48#define F_ALLOW_UNKNOWN 0x00002000UL 49#define F_ALLOW_TAGS 0x00004000UL 50#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing 51 52#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER 53 54#define INIT_SIZE 32 // initial scalar size to be allocated 55#define INDENT_STEP 3 // spaces per indentation level 56 57#define SHORT_STRING_LEN 16384 // special-case strings of up to this size 58 59#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) 60 61#define SB do { 62#define SE } while (0) 63 64#if __GNUC__ >= 3 65# define expect(expr,value) __builtin_expect ((expr), (value)) 66# define INLINE static inline 67#else 68# define expect(expr,value) (expr) 69# define INLINE static 70#endif 71 72#define expect_false(expr) expect ((expr) != 0, 0) 73#define expect_true(expr) expect ((expr) != 0, 1) 74 75#define IN_RANGE_INC(type,val,beg,end) \ 76 ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ 77 <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) 78 79#define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)" 80 81#ifdef USE_ITHREADS 82# define JSON_SLOW 1 83# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) 84#else 85# define JSON_SLOW 0 86# define JSON_STASH json_stash 87#endif 88 89// the amount of HEs to allocate on the stack, when sorting keys 90#define STACK_HES 64 91 92static HV *json_stash, *types_boolean_stash; // JSON::XS:: 93static SV *types_true, *types_false, *sv_json; 94 95enum { 96 INCR_M_WS = 0, // initial whitespace skipping, must be 0 97 INCR_M_STR, // inside string 98 INCR_M_BS, // inside backslash 99 INCR_M_C0, // inside comment in initial whitespace sequence 100 INCR_M_C1, // inside comment in other places 101 INCR_M_JSON // outside anything, count nesting 102}; 103 104#define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON) 105 106typedef struct { 107 U32 flags; 108 U32 max_depth; 109 STRLEN max_size; 110 111 SV *cb_object; 112 HV *cb_sk_object; 113 114 // for the incremental parser 115 SV *incr_text; // the source text so far 116 STRLEN incr_pos; // the current offset into the text 117 int incr_nest; // {[]}-nesting level 118 unsigned char incr_mode; 119} JSON; 120 121INLINE void 122json_init (JSON *json) 123{ 124 Zero (json, 1, JSON); 125 json->max_depth = 512; 126} 127 128///////////////////////////////////////////////////////////////////////////// 129// utility functions 130 131INLINE SV * 132get_bool (const char *name) 133{ 134 SV *sv = get_sv (name, 1); 135 136 SvREADONLY_on (sv); 137 SvREADONLY_on (SvRV (sv)); 138 139 return sv; 140} 141 142INLINE void 143shrink (SV *sv) 144{ 145 sv_utf8_downgrade (sv, 1); 146 147 if (SvLEN (sv) > SvCUR (sv) + 1) 148 { 149#ifdef SvPV_shrink_to_cur 150 SvPV_shrink_to_cur (sv); 151#elif defined (SvPV_renew) 152 SvPV_renew (sv, SvCUR (sv) + 1); 153#endif 154 } 155} 156 157// decode an utf-8 character and return it, or (UV)-1 in 158// case of an error. 159// we special-case "safe" characters from U+80 .. U+7FF, 160// but use the very good perl function to parse anything else. 161// note that we never call this function for a ascii codepoints 162INLINE UV 163decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) 164{ 165 if (expect_true (len >= 2 166 && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) 167 && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) 168 { 169 *clen = 2; 170 return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); 171 } 172 else 173 return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); 174} 175 176// likewise for encoding, also never called for ascii codepoints 177// this function takes advantage of this fact, although current gccs 178// seem to optimise the check for >= 0x80 away anyways 179INLINE unsigned char * 180encode_utf8 (unsigned char *s, UV ch) 181{ 182 if (expect_false (ch < 0x000080)) 183 *s++ = ch; 184 else if (expect_true (ch < 0x000800)) 185 *s++ = 0xc0 | ( ch >> 6), 186 *s++ = 0x80 | ( ch & 0x3f); 187 else if ( ch < 0x010000) 188 *s++ = 0xe0 | ( ch >> 12), 189 *s++ = 0x80 | ((ch >> 6) & 0x3f), 190 *s++ = 0x80 | ( ch & 0x3f); 191 else if ( ch < 0x110000) 192 *s++ = 0xf0 | ( ch >> 18), 193 *s++ = 0x80 | ((ch >> 12) & 0x3f), 194 *s++ = 0x80 | ((ch >> 6) & 0x3f), 195 *s++ = 0x80 | ( ch & 0x3f); 196 197 return s; 198} 199 200// convert offset pointer to character index, sv must be string 201static STRLEN 202ptr_to_index (SV *sv, char *offset) 203{ 204 return SvUTF8 (sv) 205 ? utf8_distance (offset, SvPVX (sv)) 206 : offset - SvPVX (sv); 207} 208 209///////////////////////////////////////////////////////////////////////////// 210// fp hell 211 212// scan a group of digits, and a trailing exponent 213static void 214json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp, int maxdepth) 215{ 216 UV uaccum = 0; 217 int eaccum = 0; 218 219 // if we recurse too deep, skip all remaining digits 220 // to avoid a stack overflow attack 221 if (expect_false (--maxdepth <= 0)) 222 while (((U8)*s - '0') < 10) 223 ++s; 224 225 for (;;) 226 { 227 U8 dig = (U8)*s - '0'; 228 229 if (expect_false (dig >= 10)) 230 { 231 if (dig == (U8)((U8)'.' - (U8)'0')) 232 { 233 ++s; 234 json_atof_scan1 (s, accum, expo, 1, maxdepth); 235 } 236 else if ((dig | ' ') == 'e' - '0') 237 { 238 int exp2 = 0; 239 int neg = 0; 240 241 ++s; 242 243 if (*s == '-') 244 { 245 ++s; 246 neg = 1; 247 } 248 else if (*s == '+') 249 ++s; 250 251 while ((dig = (U8)*s - '0') < 10) 252 exp2 = exp2 * 10 + *s++ - '0'; 253 254 *expo += neg ? -exp2 : exp2; 255 } 256 257 break; 258 } 259 260 ++s; 261 262 uaccum = uaccum * 10 + dig; 263 ++eaccum; 264 265 // if we have too many digits, then recurse for more 266 // we actually do this for rather few digits 267 if (uaccum >= (UV_MAX - 9) / 10) 268 { 269 if (postdp) *expo -= eaccum; 270 json_atof_scan1 (s, accum, expo, postdp, maxdepth); 271 if (postdp) *expo += eaccum; 272 273 break; 274 } 275 } 276 277 // this relies greatly on the quality of the pow () 278 // implementation of the platform, but a good 279 // implementation is hard to beat. 280 // (IEEE 754 conformant ones are required to be exact) 281 if (postdp) *expo -= eaccum; 282 *accum += uaccum * Perl_pow (10., *expo); 283 *expo += eaccum; 284} 285 286static NV 287json_atof (const char *s) 288{ 289 NV accum = 0.; 290 int expo = 0; 291 int neg = 0; 292 293 if (*s == '-') 294 { 295 ++s; 296 neg = 1; 297 } 298 299 // a recursion depth of ten gives us >>500 bits 300 json_atof_scan1 (s, &accum, &expo, 0, 10); 301 302 return neg ? -accum : accum; 303} 304///////////////////////////////////////////////////////////////////////////// 305// encoder 306 307// structure used for encoding JSON 308typedef struct 309{ 310 char *cur; // SvPVX (sv) + current output position 311 char *end; // SvEND (sv) 312 SV *sv; // result scalar 313 JSON json; 314 U32 indent; // indentation level 315 UV limit; // escape character values >= this value when encoding 316} enc_t; 317 318INLINE void 319need (enc_t *enc, STRLEN len) 320{ 321 if (expect_false (enc->cur + len >= enc->end)) 322 { 323 STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); 324 SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); 325 enc->cur = SvPVX (enc->sv) + cur; 326 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; 327 } 328} 329 330INLINE void 331encode_ch (enc_t *enc, char ch) 332{ 333 need (enc, 1); 334 *enc->cur++ = ch; 335} 336 337static void 338encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8) 339{ 340 char *end = str + len; 341 342 need (enc, len); 343 344 while (str < end) 345 { 346 unsigned char ch = *(unsigned char *)str; 347 348 if (expect_true (ch >= 0x20 && ch < 0x80)) // most common case 349 { 350 if (expect_false (ch == '"')) // but with slow exceptions 351 { 352 need (enc, len += 1); 353 *enc->cur++ = '\\'; 354 *enc->cur++ = '"'; 355 } 356 else if (expect_false (ch == '\\')) 357 { 358 need (enc, len += 1); 359 *enc->cur++ = '\\'; 360 *enc->cur++ = '\\'; 361 } 362 else 363 *enc->cur++ = ch; 364 365 ++str; 366 } 367 else 368 { 369 switch (ch) 370 { 371 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; 372 case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; 373 case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; 374 case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; 375 case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; 376 377 default: 378 { 379 STRLEN clen; 380 UV uch; 381 382 if (is_utf8) 383 { 384 uch = decode_utf8 (str, end - str, &clen); 385 if (clen == (STRLEN)-1) 386 croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str); 387 } 388 else 389 { 390 uch = ch; 391 clen = 1; 392 } 393 394 if (uch < 0x80/*0x20*/ || uch >= enc->limit) 395 { 396 if (uch >= 0x10000UL) 397 { 398 if (uch >= 0x110000UL) 399 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); 400 401 need (enc, len += 11); 402 sprintf (enc->cur, "\\u%04x\\u%04x", 403 (int)((uch - 0x10000) / 0x400 + 0xD800), 404 (int)((uch - 0x10000) % 0x400 + 0xDC00)); 405 enc->cur += 12; 406 } 407 else 408 { 409 need (enc, len += 5); 410 *enc->cur++ = '\\'; 411 *enc->cur++ = 'u'; 412 *enc->cur++ = PL_hexdigit [ uch >> 12 ]; 413 *enc->cur++ = PL_hexdigit [(uch >> 8) & 15]; 414 *enc->cur++ = PL_hexdigit [(uch >> 4) & 15]; 415 *enc->cur++ = PL_hexdigit [(uch >> 0) & 15]; 416 } 417 418 str += clen; 419 } 420 else if (enc->json.flags & F_LATIN1) 421 { 422 *enc->cur++ = uch; 423 str += clen; 424 } 425 else if (is_utf8) 426 { 427 need (enc, len += clen); 428 do 429 { 430 *enc->cur++ = *str++; 431 } 432 while (--clen); 433 } 434 else 435 { 436 need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed 437 enc->cur = encode_utf8 (enc->cur, uch); 438 ++str; 439 } 440 } 441 } 442 } 443 444 --len; 445 } 446} 447 448INLINE void 449encode_indent (enc_t *enc) 450{ 451 if (enc->json.flags & F_INDENT) 452 { 453 int spaces = enc->indent * INDENT_STEP; 454 455 need (enc, spaces); 456 memset (enc->cur, ' ', spaces); 457 enc->cur += spaces; 458 } 459} 460 461INLINE void 462encode_space (enc_t *enc) 463{ 464 need (enc, 1); 465 encode_ch (enc, ' '); 466} 467 468INLINE void 469encode_nl (enc_t *enc) 470{ 471 if (enc->json.flags & F_INDENT) 472 { 473 need (enc, 1); 474 encode_ch (enc, '\n'); 475 } 476} 477 478INLINE void 479encode_comma (enc_t *enc) 480{ 481 encode_ch (enc, ','); 482 483 if (enc->json.flags & F_INDENT) 484 encode_nl (enc); 485 else if (enc->json.flags & F_SPACE_AFTER) 486 encode_space (enc); 487} 488 489static void encode_sv (enc_t *enc, SV *sv); 490 491static void 492encode_av (enc_t *enc, AV *av) 493{ 494 int i, len = av_len (av); 495 496 if (enc->indent >= enc->json.max_depth) 497 croak (ERR_NESTING_EXCEEDED); 498 499 encode_ch (enc, '['); 500 501 if (len >= 0) 502 { 503 encode_nl (enc); ++enc->indent; 504 505 for (i = 0; i <= len; ++i) 506 { 507 SV **svp = av_fetch (av, i, 0); 508 509 encode_indent (enc); 510 511 if (svp) 512 encode_sv (enc, *svp); 513 else 514 encode_str (enc, "null", 4, 0); 515 516 if (i < len) 517 encode_comma (enc); 518 } 519 520 encode_nl (enc); --enc->indent; encode_indent (enc); 521 } 522 523 encode_ch (enc, ']'); 524} 525 526static void 527encode_hk (enc_t *enc, HE *he) 528{ 529 encode_ch (enc, '"'); 530 531 if (HeKLEN (he) == HEf_SVKEY) 532 { 533 SV *sv = HeSVKEY (he); 534 STRLEN len; 535 char *str; 536 537 SvGETMAGIC (sv); 538 str = SvPV (sv, len); 539 540 encode_str (enc, str, len, SvUTF8 (sv)); 541 } 542 else 543 encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he)); 544 545 encode_ch (enc, '"'); 546 547 if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc); 548 encode_ch (enc, ':'); 549 if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc); 550} 551 552// compare hash entries, used when all keys are bytestrings 553static int 554he_cmp_fast (const void *a_, const void *b_) 555{ 556 int cmp; 557 558 HE *a = *(HE **)a_; 559 HE *b = *(HE **)b_; 560 561 STRLEN la = HeKLEN (a); 562 STRLEN lb = HeKLEN (b); 563 564 if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la))) 565 cmp = lb - la; 566 567 return cmp; 568} 569 570// compare hash entries, used when some keys are sv's or utf-x 571static int 572he_cmp_slow (const void *a, const void *b) 573{ 574 return sv_cmp (HeSVKEY_force (*(HE **)b), HeSVKEY_force (*(HE **)a)); 575} 576 577static void 578encode_hv (enc_t *enc, HV *hv) 579{ 580 HE *he; 581 582 if (enc->indent >= enc->json.max_depth) 583 croak (ERR_NESTING_EXCEEDED); 584 585 encode_ch (enc, '{'); 586 587 // for canonical output we have to sort by keys first 588 // actually, this is mostly due to the stupid so-called 589 // security workaround added somewhere in 5.8.x 590 // that randomises hash orderings 591 if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv)) 592 { 593 int count = hv_iterinit (hv); 594 595 if (SvMAGICAL (hv)) 596 { 597 // need to count by iterating. could improve by dynamically building the vector below 598 // but I don't care for the speed of this special case. 599 // note also that we will run into undefined behaviour when the two iterations 600 // do not result in the same count, something I might care for in some later release. 601 602 count = 0; 603 while (hv_iternext (hv)) 604 ++count; 605 606 hv_iterinit (hv); 607 } 608 609 if (count) 610 { 611 int i, fast = 1; 612 HE *hes_stack [STACK_HES]; 613 HE **hes = hes_stack; 614 615 // allocate larger arrays on the heap 616 if (count > STACK_HES) 617 { 618 SV *sv = sv_2mortal (NEWSV (0, count * sizeof (*hes))); 619 hes = (HE **)SvPVX (sv); 620 } 621 622 i = 0; 623 while ((he = hv_iternext (hv))) 624 { 625 hes [i++] = he; 626 if (HeKLEN (he) < 0 || HeKUTF8 (he)) 627 fast = 0; 628 } 629 630 assert (i == count); 631 632 if (fast) 633 qsort (hes, count, sizeof (HE *), he_cmp_fast); 634 else 635 { 636 // hack to forcefully disable "use bytes" 637 COP cop = *PL_curcop; 638 cop.op_private = 0; 639 640 ENTER; 641 SAVETMPS; 642 643 SAVEVPTR (PL_curcop); 644 PL_curcop = &cop; 645 646 qsort (hes, count, sizeof (HE *), he_cmp_slow); 647 648 FREETMPS; 649 LEAVE; 650 } 651 652 encode_nl (enc); ++enc->indent; 653 654 while (count--) 655 { 656 encode_indent (enc); 657 he = hes [count]; 658 encode_hk (enc, he); 659 encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he)); 660 661 if (count) 662 encode_comma (enc); 663 } 664 665 encode_nl (enc); --enc->indent; encode_indent (enc); 666 } 667 } 668 else 669 { 670 if (hv_iterinit (hv) || SvMAGICAL (hv)) 671 if ((he = hv_iternext (hv))) 672 { 673 encode_nl (enc); ++enc->indent; 674 675 for (;;) 676 { 677 encode_indent (enc); 678 encode_hk (enc, he); 679 encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he)); 680 681 if (!(he = hv_iternext (hv))) 682 break; 683 684 encode_comma (enc); 685 } 686 687 encode_nl (enc); --enc->indent; encode_indent (enc); 688 } 689 } 690 691 encode_ch (enc, '}'); 692} 693 694// encode objects, arrays and special \0=false and \1=true values. 695static void 696encode_rv (enc_t *enc, SV *sv) 697{ 698 svtype svt; 699 GV *method; 700 701 SvGETMAGIC (sv); 702 svt = SvTYPE (sv); 703 704 if (expect_false (SvOBJECT (sv))) 705 { 706 HV *boolean_stash = !JSON_SLOW || types_boolean_stash 707 ? types_boolean_stash 708 : gv_stashpv ("Types::Serialiser::Boolean", 1); 709 HV *stash = SvSTASH (sv); 710 711 if (stash == boolean_stash) 712 { 713 if (SvIV (sv)) 714 encode_str (enc, "true", 4, 0); 715 else 716 encode_str (enc, "false", 5, 0); 717 } 718 else if ((enc->json.flags & F_ALLOW_TAGS) && (method = gv_fetchmethod_autoload (stash, "FREEZE", 0))) 719 { 720 int count; 721 dSP; 722 723 ENTER; SAVETMPS; PUSHMARK (SP); 724 EXTEND (SP, 2); 725 // we re-bless the reference to get overload and other niceties right 726 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 727 PUSHs (sv_json); 728 729 PUTBACK; 730 count = call_sv ((SV *)GvCV (method), G_ARRAY); 731 SPAGAIN; 732 733 // catch this surprisingly common error 734 if (SvROK (TOPs) && SvRV (TOPs) == sv) 735 croak ("%s::TO_JSON method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); 736 737 encode_ch (enc, '('); 738 encode_ch (enc, '"'); 739 encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash)); 740 encode_ch (enc, '"'); 741 encode_ch (enc, ')'); 742 encode_ch (enc, '['); 743 744 while (count) 745 { 746 encode_sv (enc, SP[1 - count--]); 747 748 if (count) 749 encode_ch (enc, ','); 750 } 751 752 encode_ch (enc, ']'); 753 754 PUTBACK; 755 756 FREETMPS; LEAVE; 757 } 758 else if ((enc->json.flags & F_CONV_BLESSED) && (method = gv_fetchmethod_autoload (stash, "TO_JSON", 0))) 759 { 760 dSP; 761 762 ENTER; SAVETMPS; PUSHMARK (SP); 763 // we re-bless the reference to get overload and other niceties right 764 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 765 766 // calling with G_SCALAR ensures that we always get a 1 return value 767 PUTBACK; 768 call_sv ((SV *)GvCV (method), G_SCALAR); 769 SPAGAIN; 770 771 // catch this surprisingly common error 772 if (SvROK (TOPs) && SvRV (TOPs) == sv) 773 croak ("%s::TO_JSON method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); 774 775 sv = POPs; 776 PUTBACK; 777 778 encode_sv (enc, sv); 779 780 FREETMPS; LEAVE; 781 } 782 else if (enc->json.flags & F_ALLOW_BLESSED) 783 encode_str (enc, "null", 4, 0); 784 else 785 croak ("encountered object '%s', but neither allow_blessed, convert_blessed nor allow_tags settings are enabled (or TO_JSON/FREEZE method missing)", 786 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 787 } 788 else if (svt == SVt_PVHV) 789 encode_hv (enc, (HV *)sv); 790 else if (svt == SVt_PVAV) 791 encode_av (enc, (AV *)sv); 792 else if (svt < SVt_PVAV) 793 { 794 STRLEN len = 0; 795 char *pv = svt ? SvPV (sv, len) : 0; 796 797 if (len == 1 && *pv == '1') 798 encode_str (enc, "true", 4, 0); 799 else if (len == 1 && *pv == '0') 800 encode_str (enc, "false", 5, 0); 801 else if (enc->json.flags & F_ALLOW_UNKNOWN) 802 encode_str (enc, "null", 4, 0); 803 else 804 croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", 805 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 806 } 807 else if (enc->json.flags & F_ALLOW_UNKNOWN) 808 encode_str (enc, "null", 4, 0); 809 else 810 croak ("encountered %s, but JSON can only represent references to arrays or hashes", 811 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 812} 813 814static void 815encode_sv (enc_t *enc, SV *sv) 816{ 817 SvGETMAGIC (sv); 818 819 if (SvPOKp (sv)) 820 { 821 STRLEN len; 822 char *str = SvPV (sv, len); 823 encode_ch (enc, '"'); 824 encode_str (enc, str, len, SvUTF8 (sv)); 825 encode_ch (enc, '"'); 826 } 827 else if (SvNOKp (sv)) 828 { 829 // trust that perl will do the right thing w.r.t. JSON syntax. 830 need (enc, NV_DIG + 32); 831 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); 832 enc->cur += strlen (enc->cur); 833 } 834 else if (SvIOKp (sv)) 835 { 836 // we assume we can always read an IV as a UV and vice versa 837 // we assume two's complement 838 // we assume no aliasing issues in the union 839 if (SvIsUV (sv) ? SvUVX (sv) <= 59000 840 : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000) 841 { 842 // optimise the "small number case" 843 // code will likely be branchless and use only a single multiplication 844 // works for numbers up to 59074 845 I32 i = SvIVX (sv); 846 U32 u; 847 char digit, nz = 0; 848 849 need (enc, 6); 850 851 *enc->cur = '-'; enc->cur += i < 0 ? 1 : 0; 852 u = i < 0 ? -i : i; 853 854 // convert to 4.28 fixed-point representation 855 u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits 856 857 // now output digit by digit, each time masking out the integer part 858 // and multiplying by 5 while moving the decimal point one to the right, 859 // resulting in a net multiplication by 10. 860 // we always write the digit to memory but conditionally increment 861 // the pointer, to enable the use of conditional move instructions. 862 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5; 863 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5; 864 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5; 865 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5; 866 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' 867 } 868 else 869 { 870 // large integer, use the (rather slow) snprintf way. 871 need (enc, IVUV_MAXCHARS); 872 enc->cur += 873 SvIsUV(sv) 874 ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) 875 : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); 876 } 877 } 878 else if (SvROK (sv)) 879 encode_rv (enc, SvRV (sv)); 880 else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN) 881 encode_str (enc, "null", 4, 0); 882 else 883 croak ("encountered perl type (%s,0x%x) that JSON cannot handle, check your input data", 884 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv)); 885} 886 887static SV * 888encode_json (SV *scalar, JSON *json) 889{ 890 enc_t enc; 891 892 if (!(json->flags & F_ALLOW_NONREF) && !SvROK (scalar)) 893 croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)"); 894 895 enc.json = *json; 896 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 897 enc.cur = SvPVX (enc.sv); 898 enc.end = SvEND (enc.sv); 899 enc.indent = 0; 900 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL 901 : enc.json.flags & F_LATIN1 ? 0x000100UL 902 : 0x110000UL; 903 904 SvPOK_only (enc.sv); 905 encode_sv (&enc, scalar); 906 encode_nl (&enc); 907 908 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 909 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings 910 911 if (!(enc.json.flags & (F_ASCII | F_LATIN1 | F_UTF8))) 912 SvUTF8_on (enc.sv); 913 914 if (enc.json.flags & F_SHRINK) 915 shrink (enc.sv); 916 917 return enc.sv; 918} 919 920///////////////////////////////////////////////////////////////////////////// 921// decoder 922 923// structure used for decoding JSON 924typedef struct 925{ 926 char *cur; // current parser pointer 927 char *end; // end of input string 928 const char *err; // parse error, if != 0 929 JSON json; 930 U32 depth; // recursion depth 931 U32 maxdepth; // recursion depth limit 932} dec_t; 933 934INLINE void 935decode_comment (dec_t *dec) 936{ 937 // only '#'-style comments allowed a.t.m. 938 939 while (*dec->cur && *dec->cur != 0x0a && *dec->cur != 0x0d) 940 ++dec->cur; 941} 942 943INLINE void 944decode_ws (dec_t *dec) 945{ 946 for (;;) 947 { 948 char ch = *dec->cur; 949 950 if (ch > 0x20) 951 { 952 if (expect_false (ch == '#')) 953 { 954 if (dec->json.flags & F_RELAXED) 955 decode_comment (dec); 956 else 957 break; 958 } 959 else 960 break; 961 } 962 else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09) 963 break; // parse error, but let higher level handle it, gives better error messages 964 965 ++dec->cur; 966 } 967} 968 969#define ERR(reason) SB dec->err = reason; goto fail; SE 970 971#define EXPECT_CH(ch) SB \ 972 if (*dec->cur != ch) \ 973 ERR (# ch " expected"); \ 974 ++dec->cur; \ 975 SE 976 977#define DEC_INC_DEPTH if (++dec->depth > dec->json.max_depth) ERR (ERR_NESTING_EXCEEDED) 978#define DEC_DEC_DEPTH --dec->depth 979 980static SV *decode_sv (dec_t *dec); 981 982static signed char decode_hexdigit[256]; 983 984static UV 985decode_4hex (dec_t *dec) 986{ 987 signed char d1, d2, d3, d4; 988 unsigned char *cur = (unsigned char *)dec->cur; 989 990 d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected"); 991 d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected"); 992 d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected"); 993 d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected"); 994 995 dec->cur += 4; 996 997 return ((UV)d1) << 12 998 | ((UV)d2) << 8 999 | ((UV)d3) << 4 1000 | ((UV)d4); 1001 1002fail: 1003 return (UV)-1; 1004} 1005 1006static SV * 1007decode_str (dec_t *dec) 1008{ 1009 SV *sv = 0; 1010 int utf8 = 0; 1011 char *dec_cur = dec->cur; 1012 1013 do 1014 { 1015 char buf [SHORT_STRING_LEN + UTF8_MAXBYTES]; 1016 char *cur = buf; 1017 1018 do 1019 { 1020 unsigned char ch = *(unsigned char *)dec_cur++; 1021 1022 if (expect_false (ch == '"')) 1023 { 1024 --dec_cur; 1025 break; 1026 } 1027 else if (expect_false (ch == '\\')) 1028 { 1029 switch (*dec_cur) 1030 { 1031 case '\\': 1032 case '/': 1033 case '"': *cur++ = *dec_cur++; break; 1034 1035 case 'b': ++dec_cur; *cur++ = '\010'; break; 1036 case 't': ++dec_cur; *cur++ = '\011'; break; 1037 case 'n': ++dec_cur; *cur++ = '\012'; break; 1038 case 'f': ++dec_cur; *cur++ = '\014'; break; 1039 case 'r': ++dec_cur; *cur++ = '\015'; break; 1040 1041 case 'u': 1042 { 1043 UV lo, hi; 1044 ++dec_cur; 1045 1046 dec->cur = dec_cur; 1047 hi = decode_4hex (dec); 1048 dec_cur = dec->cur; 1049 if (hi == (UV)-1) 1050 goto fail; 1051 1052 // possibly a surrogate pair 1053 if (hi >= 0xd800) 1054 if (hi < 0xdc00) 1055 { 1056 if (dec_cur [0] != '\\' || dec_cur [1] != 'u') 1057 ERR ("missing low surrogate character in surrogate pair"); 1058 1059 dec_cur += 2; 1060 1061 dec->cur = dec_cur; 1062 lo = decode_4hex (dec); 1063 dec_cur = dec->cur; 1064 if (lo == (UV)-1) 1065 goto fail; 1066 1067 if (lo < 0xdc00 || lo >= 0xe000) 1068 ERR ("surrogate pair expected"); 1069 1070 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000; 1071 } 1072 else if (hi < 0xe000) 1073 ERR ("missing high surrogate character in surrogate pair"); 1074 1075 if (hi >= 0x80) 1076 { 1077 utf8 = 1; 1078 1079 cur = encode_utf8 (cur, hi); 1080 } 1081 else 1082 *cur++ = hi; 1083 } 1084 break; 1085 1086 default: 1087 --dec_cur; 1088 ERR ("illegal backslash escape sequence in string"); 1089 } 1090 } 1091 else if (expect_true (ch >= 0x20 && ch < 0x80)) 1092 *cur++ = ch; 1093 else if (ch >= 0x80) 1094 { 1095 STRLEN clen; 1096 1097 --dec_cur; 1098 1099 decode_utf8 (dec_cur, dec->end - dec_cur, &clen); 1100 if (clen == (STRLEN)-1) 1101 ERR ("malformed UTF-8 character in JSON string"); 1102 1103 do 1104 *cur++ = *dec_cur++; 1105 while (--clen); 1106 1107 utf8 = 1; 1108 } 1109 else 1110 { 1111 --dec_cur; 1112 1113 if (!ch) 1114 ERR ("unexpected end of string while parsing JSON string"); 1115 else 1116 ERR ("invalid character encountered while parsing JSON string"); 1117 } 1118 } 1119 while (cur < buf + SHORT_STRING_LEN); 1120 1121 { 1122 STRLEN len = cur - buf; 1123 1124 if (sv) 1125 { 1126 STRLEN cur = SvCUR (sv); 1127 1128 if (SvLEN (sv) <= cur + len) 1129 SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); 1130 1131 memcpy (SvPVX (sv) + SvCUR (sv), buf, len); 1132 SvCUR_set (sv, SvCUR (sv) + len); 1133 } 1134 else 1135 sv = newSVpvn (buf, len); 1136 } 1137 } 1138 while (*dec_cur != '"'); 1139 1140 ++dec_cur; 1141 1142 if (sv) 1143 { 1144 SvPOK_only (sv); 1145 *SvEND (sv) = 0; 1146 1147 if (utf8) 1148 SvUTF8_on (sv); 1149 } 1150 else 1151 sv = newSVpvn ("", 0); 1152 1153 dec->cur = dec_cur; 1154 return sv; 1155 1156fail: 1157 dec->cur = dec_cur; 1158 return 0; 1159} 1160 1161static SV * 1162decode_num (dec_t *dec) 1163{ 1164 int is_nv = 0; 1165 char *start = dec->cur; 1166 1167 // [minus] 1168 if (*dec->cur == '-') 1169 ++dec->cur; 1170 1171 if (*dec->cur == '0') 1172 { 1173 ++dec->cur; 1174 if (*dec->cur >= '0' && *dec->cur <= '9') 1175 ERR ("malformed number (leading zero must not be followed by another digit)"); 1176 } 1177 else if (*dec->cur < '0' || *dec->cur > '9') 1178 ERR ("malformed number (no digits after initial minus)"); 1179 else 1180 do 1181 { 1182 ++dec->cur; 1183 } 1184 while (*dec->cur >= '0' && *dec->cur <= '9'); 1185 1186 // [frac] 1187 if (*dec->cur == '.') 1188 { 1189 ++dec->cur; 1190 1191 if (*dec->cur < '0' || *dec->cur > '9') 1192 ERR ("malformed number (no digits after decimal point)"); 1193 1194 do 1195 { 1196 ++dec->cur; 1197 } 1198 while (*dec->cur >= '0' && *dec->cur <= '9'); 1199 1200 is_nv = 1; 1201 } 1202 1203 // [exp] 1204 if (*dec->cur == 'e' || *dec->cur == 'E') 1205 { 1206 ++dec->cur; 1207 1208 if (*dec->cur == '-' || *dec->cur == '+') 1209 ++dec->cur; 1210 1211 if (*dec->cur < '0' || *dec->cur > '9') 1212 ERR ("malformed number (no digits after exp sign)"); 1213 1214 do 1215 { 1216 ++dec->cur; 1217 } 1218 while (*dec->cur >= '0' && *dec->cur <= '9'); 1219 1220 is_nv = 1; 1221 } 1222 1223 if (!is_nv) 1224 { 1225 int len = dec->cur - start; 1226 1227 // special case the rather common 1..5-digit-int case 1228 if (*start == '-') 1229 switch (len) 1230 { 1231 case 2: return newSViv (-(IV)( start [1] - '0' * 1)); 1232 case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11)); 1233 case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); 1234 case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); 1235 case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); 1236 } 1237 else 1238 switch (len) 1239 { 1240 case 1: return newSViv ( start [0] - '0' * 1); 1241 case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); 1242 case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); 1243 case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); 1244 case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); 1245 } 1246 1247 { 1248 UV uv; 1249 int numtype = grok_number (start, len, &uv); 1250 if (numtype & IS_NUMBER_IN_UV) 1251 if (numtype & IS_NUMBER_NEG) 1252 { 1253 if (uv < (UV)IV_MIN) 1254 return newSViv (-(IV)uv); 1255 } 1256 else 1257 return newSVuv (uv); 1258 } 1259 1260 len -= *start == '-' ? 1 : 0; 1261 1262 // does not fit into IV or UV, try NV 1263 if (len <= NV_DIG) 1264 // fits into NV without loss of precision 1265 return newSVnv (json_atof (start)); 1266 1267 // everything else fails, convert it to a string 1268 return newSVpvn (start, dec->cur - start); 1269 } 1270 1271 // loss of precision here 1272 return newSVnv (json_atof (start)); 1273 1274fail: 1275 return 0; 1276} 1277 1278static SV * 1279decode_av (dec_t *dec) 1280{ 1281 AV *av = newAV (); 1282 1283 DEC_INC_DEPTH; 1284 decode_ws (dec); 1285 1286 if (*dec->cur == ']') 1287 ++dec->cur; 1288 else 1289 for (;;) 1290 { 1291 SV *value; 1292 1293 value = decode_sv (dec); 1294 if (!value) 1295 goto fail; 1296 1297 av_push (av, value); 1298 1299 decode_ws (dec); 1300 1301 if (*dec->cur == ']') 1302 { 1303 ++dec->cur; 1304 break; 1305 } 1306 1307 if (*dec->cur != ',') 1308 ERR (", or ] expected while parsing array"); 1309 1310 ++dec->cur; 1311 1312 decode_ws (dec); 1313 1314 if (*dec->cur == ']' && dec->json.flags & F_RELAXED) 1315 { 1316 ++dec->cur; 1317 break; 1318 } 1319 } 1320 1321 DEC_DEC_DEPTH; 1322 return newRV_noinc ((SV *)av); 1323 1324fail: 1325 SvREFCNT_dec (av); 1326 DEC_DEC_DEPTH; 1327 return 0; 1328} 1329 1330static SV * 1331decode_hv (dec_t *dec) 1332{ 1333 SV *sv; 1334 HV *hv = newHV (); 1335 1336 DEC_INC_DEPTH; 1337 decode_ws (dec); 1338 1339 if (*dec->cur == '}') 1340 ++dec->cur; 1341 else 1342 for (;;) 1343 { 1344 EXPECT_CH ('"'); 1345 1346 // heuristic: assume that 1347 // a) decode_str + hv_store_ent are abysmally slow. 1348 // b) most hash keys are short, simple ascii text. 1349 // => try to "fast-match" such strings to avoid 1350 // the overhead of decode_str + hv_store_ent. 1351 { 1352 SV *value; 1353 char *p = dec->cur; 1354 char *e = p + 24; // only try up to 24 bytes 1355 1356 for (;;) 1357 { 1358 // the >= 0x80 is false on most architectures 1359 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') 1360 { 1361 // slow path, back up and use decode_str 1362 SV *key = decode_str (dec); 1363 if (!key) 1364 goto fail; 1365 1366 decode_ws (dec); EXPECT_CH (':'); 1367 1368 decode_ws (dec); 1369 value = decode_sv (dec); 1370 if (!value) 1371 { 1372 SvREFCNT_dec (key); 1373 goto fail; 1374 } 1375 1376 hv_store_ent (hv, key, value, 0); 1377 SvREFCNT_dec (key); 1378 1379 break; 1380 } 1381 else if (*p == '"') 1382 { 1383 // fast path, got a simple key 1384 char *key = dec->cur; 1385 int len = p - key; 1386 dec->cur = p + 1; 1387 1388 decode_ws (dec); EXPECT_CH (':'); 1389 1390 decode_ws (dec); 1391 value = decode_sv (dec); 1392 if (!value) 1393 goto fail; 1394 1395 hv_store (hv, key, len, value, 0); 1396 1397 break; 1398 } 1399 1400 ++p; 1401 } 1402 } 1403 1404 decode_ws (dec); 1405 1406 if (*dec->cur == '}') 1407 { 1408 ++dec->cur; 1409 break; 1410 } 1411 1412 if (*dec->cur != ',') 1413 ERR (", or } expected while parsing object/hash"); 1414 1415 ++dec->cur; 1416 1417 decode_ws (dec); 1418 1419 if (*dec->cur == '}' && dec->json.flags & F_RELAXED) 1420 { 1421 ++dec->cur; 1422 break; 1423 } 1424 } 1425 1426 DEC_DEC_DEPTH; 1427 sv = newRV_noinc ((SV *)hv); 1428 1429 // check filter callbacks 1430 if (dec->json.flags & F_HOOK) 1431 { 1432 if (dec->json.cb_sk_object && HvKEYS (hv) == 1) 1433 { 1434 HE *cb, *he; 1435 1436 hv_iterinit (hv); 1437 he = hv_iternext (hv); 1438 hv_iterinit (hv); 1439 1440 // the next line creates a mortal sv each time its called. 1441 // might want to optimise this for common cases. 1442 cb = hv_fetch_ent (dec->json.cb_sk_object, hv_iterkeysv (he), 0, 0); 1443 1444 if (cb) 1445 { 1446 dSP; 1447 int count; 1448 1449 ENTER; SAVETMPS; PUSHMARK (SP); 1450 XPUSHs (HeVAL (he)); 1451 sv_2mortal (sv); 1452 1453 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; 1454 1455 if (count == 1) 1456 { 1457 sv = newSVsv (POPs); 1458 FREETMPS; LEAVE; 1459 return sv; 1460 } 1461 1462 SvREFCNT_inc (sv); 1463 FREETMPS; LEAVE; 1464 } 1465 } 1466 1467 if (dec->json.cb_object) 1468 { 1469 dSP; 1470 int count; 1471 1472 ENTER; SAVETMPS; PUSHMARK (SP); 1473 XPUSHs (sv_2mortal (sv)); 1474 1475 PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; 1476 1477 if (count == 1) 1478 { 1479 sv = newSVsv (POPs); 1480 FREETMPS; LEAVE; 1481 return sv; 1482 } 1483 1484 SvREFCNT_inc (sv); 1485 FREETMPS; LEAVE; 1486 } 1487 } 1488 1489 return sv; 1490 1491fail: 1492 SvREFCNT_dec (hv); 1493 DEC_DEC_DEPTH; 1494 return 0; 1495} 1496 1497static SV * 1498decode_tag (dec_t *dec) 1499{ 1500 SV *tag = 0; 1501 SV *val = 0; 1502 1503 if (!(dec->json.flags & F_ALLOW_TAGS)) 1504 ERR ("malformed JSON string, neither array, object, number, string or atom"); 1505 1506 ++dec->cur; 1507 1508 decode_ws (dec); 1509 1510 tag = decode_sv (dec); 1511 if (!tag) 1512 goto fail; 1513 1514 if (!SvPOK (tag)) 1515 ERR ("malformed JSON string, (tag) must be a string"); 1516 1517 decode_ws (dec); 1518 1519 if (*dec->cur != ')') 1520 ERR (") expected after tag"); 1521 1522 ++dec->cur; 1523 1524 decode_ws (dec); 1525 1526 val = decode_sv (dec); 1527 if (!val) 1528 goto fail; 1529 1530 if (!SvROK (val) || SvTYPE (SvRV (val)) != SVt_PVAV) 1531 ERR ("malformed JSON string, tag value must be an array"); 1532 1533 { 1534 AV *av = (AV *)SvRV (val); 1535 int i, len = av_len (av) + 1; 1536 HV *stash = gv_stashsv (tag, 0); 1537 SV *sv; 1538 1539 if (!stash) 1540 ERR ("cannot decode perl-object (package does not exist)"); 1541 1542 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); 1543 1544 if (!method) 1545 ERR ("cannot decode perl-object (package does not have a THAW method)"); 1546 1547 dSP; 1548 1549 ENTER; SAVETMPS; PUSHMARK (SP); 1550 EXTEND (SP, len + 2); 1551 // we re-bless the reference to get overload and other niceties right 1552 PUSHs (tag); 1553 PUSHs (sv_json); 1554 1555 for (i = 0; i < len; ++i) 1556 PUSHs (*av_fetch (av, i, 1)); 1557 1558 PUTBACK; 1559 call_sv ((SV *)GvCV (method), G_SCALAR); 1560 SPAGAIN; 1561 1562 SvREFCNT_dec (tag); 1563 SvREFCNT_dec (val); 1564 sv = SvREFCNT_inc (POPs); 1565 1566 PUTBACK; 1567 1568 FREETMPS; LEAVE; 1569 1570 return sv; 1571 } 1572 1573fail: 1574 SvREFCNT_dec (tag); 1575 SvREFCNT_dec (val); 1576 return 0; 1577} 1578 1579static SV * 1580decode_sv (dec_t *dec) 1581{ 1582 // the beauty of JSON: you need exactly one character lookahead 1583 // to parse everything. 1584 switch (*dec->cur) 1585 { 1586 case '"': ++dec->cur; return decode_str (dec); 1587 case '[': ++dec->cur; return decode_av (dec); 1588 case '{': ++dec->cur; return decode_hv (dec); 1589 case '(': return decode_tag (dec); 1590 1591 case '-': 1592 case '0': case '1': case '2': case '3': case '4': 1593 case '5': case '6': case '7': case '8': case '9': 1594 return decode_num (dec); 1595 1596 case 't': 1597 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) 1598 { 1599 dec->cur += 4; 1600#if JSON_SLOW 1601 types_true = get_bool ("Types::Serialiser::true"); 1602#endif 1603 return newSVsv (types_true); 1604 } 1605 else 1606 ERR ("'true' expected"); 1607 1608 break; 1609 1610 case 'f': 1611 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) 1612 { 1613 dec->cur += 5; 1614#if JSON_SLOW 1615 types_false = get_bool ("Types::Serialiser::false"); 1616#endif 1617 return newSVsv (types_false); 1618 } 1619 else 1620 ERR ("'false' expected"); 1621 1622 break; 1623 1624 case 'n': 1625 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) 1626 { 1627 dec->cur += 4; 1628 return newSVsv (&PL_sv_undef); 1629 } 1630 else 1631 ERR ("'null' expected"); 1632 1633 break; 1634 1635 default: 1636 ERR ("malformed JSON string, neither tag, array, object, number, string or atom"); 1637 break; 1638 } 1639 1640fail: 1641 return 0; 1642} 1643 1644static SV * 1645decode_json (SV *string, JSON *json, char **offset_return) 1646{ 1647 dec_t dec; 1648 SV *sv; 1649 1650 /* work around bugs in 5.10 where manipulating magic values 1651 * makes perl ignore the magic in subsequent accesses. 1652 * also make a copy of non-PV values, to get them into a clean 1653 * state (SvPV should do that, but it's buggy, see below). 1654 */ 1655 /*SvGETMAGIC (string);*/ 1656 if (SvMAGICAL (string) || !SvPOK (string)) 1657 string = sv_2mortal (newSVsv (string)); 1658 1659 SvUPGRADE (string, SVt_PV); 1660 1661 /* work around a bug in perl 5.10, which causes SvCUR to fail an 1662 * assertion with -DDEBUGGING, although SvCUR is documented to 1663 * return the xpv_cur field which certainly exists after upgrading. 1664 * according to nicholas clark, calling SvPOK fixes this. 1665 * But it doesn't fix it, so try another workaround, call SvPV_nolen 1666 * and hope for the best. 1667 * Damnit, SvPV_nolen still trips over yet another assertion. This 1668 * assertion business is seriously broken, try yet another workaround 1669 * for the broken -DDEBUGGING. 1670 */ 1671 { 1672#ifdef DEBUGGING 1673 STRLEN offset = SvOK (string) ? sv_len (string) : 0; 1674#else 1675 STRLEN offset = SvCUR (string); 1676#endif 1677 1678 if (offset > json->max_size && json->max_size) 1679 croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", 1680 (unsigned long)SvCUR (string), (unsigned long)json->max_size); 1681 } 1682 1683 if (DECODE_WANTS_OCTETS (json)) 1684 sv_utf8_downgrade (string, 0); 1685 else 1686 sv_utf8_upgrade (string); 1687 1688 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP 1689 1690 dec.json = *json; 1691 dec.cur = SvPVX (string); 1692 dec.end = SvEND (string); 1693 dec.err = 0; 1694 dec.depth = 0; 1695 1696 if (dec.json.cb_object || dec.json.cb_sk_object) 1697 dec.json.flags |= F_HOOK; 1698 1699 *dec.end = 0; // this should basically be a nop, too, but make sure it's there 1700 1701 decode_ws (&dec); 1702 sv = decode_sv (&dec); 1703 1704 if (offset_return) 1705 *offset_return = dec.cur; 1706 1707 if (!(offset_return || !sv)) 1708 { 1709 // check for trailing garbage 1710 decode_ws (&dec); 1711 1712 if (*dec.cur) 1713 { 1714 dec.err = "garbage after JSON object"; 1715 SvREFCNT_dec (sv); 1716 sv = 0; 1717 } 1718 } 1719 1720 if (!sv) 1721 { 1722 SV *uni = sv_newmortal (); 1723 1724 // horrible hack to silence warning inside pv_uni_display 1725 COP cop = *PL_curcop; 1726 cop.cop_warnings = pWARN_NONE; 1727 ENTER; 1728 SAVEVPTR (PL_curcop); 1729 PL_curcop = &cop; 1730 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); 1731 LEAVE; 1732 1733 croak ("%s, at character offset %d (before \"%s\")", 1734 dec.err, 1735 (int)ptr_to_index (string, dec.cur), 1736 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); 1737 } 1738 1739 sv = sv_2mortal (sv); 1740 1741 if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv)) 1742 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)"); 1743 1744 return sv; 1745} 1746 1747///////////////////////////////////////////////////////////////////////////// 1748// incremental parser 1749 1750static void 1751incr_parse (JSON *self) 1752{ 1753 const char *p = SvPVX (self->incr_text) + self->incr_pos; 1754 1755 // the state machine here is a bit convoluted and could be simplified a lot 1756 // but this would make it slower, so... 1757 1758 for (;;) 1759 { 1760 //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D 1761 switch (self->incr_mode) 1762 { 1763 // only used for initial whitespace skipping 1764 case INCR_M_WS: 1765 for (;;) 1766 { 1767 if (*p > 0x20) 1768 { 1769 if (*p == '#') 1770 { 1771 self->incr_mode = INCR_M_C0; 1772 goto incr_m_c; 1773 } 1774 else 1775 { 1776 self->incr_mode = INCR_M_JSON; 1777 goto incr_m_json; 1778 } 1779 } 1780 else if (!*p) 1781 goto interrupt; 1782 1783 ++p; 1784 } 1785 1786 // skip a single char inside a string (for \\-processing) 1787 case INCR_M_BS: 1788 if (!*p) 1789 goto interrupt; 1790 1791 ++p; 1792 self->incr_mode = INCR_M_STR; 1793 goto incr_m_str; 1794 1795 // inside #-style comments 1796 case INCR_M_C0: 1797 case INCR_M_C1: 1798 incr_m_c: 1799 for (;;) 1800 { 1801 if (*p == '\n') 1802 { 1803 self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; 1804 break; 1805 } 1806 else if (!*p) 1807 goto interrupt; 1808 1809 ++p; 1810 } 1811 1812 break; 1813 1814 // inside a string 1815 case INCR_M_STR: 1816 incr_m_str: 1817 for (;;) 1818 { 1819 if (*p == '"') 1820 { 1821 ++p; 1822 self->incr_mode = INCR_M_JSON; 1823 1824 if (!self->incr_nest) 1825 goto interrupt; 1826 1827 goto incr_m_json; 1828 } 1829 else if (*p == '\\') 1830 { 1831 ++p; // "virtually" consumes character after \ 1832 1833 if (!*p) // if at end of string we have to switch modes 1834 { 1835 self->incr_mode = INCR_M_BS; 1836 goto interrupt; 1837 } 1838 } 1839 else if (!*p) 1840 goto interrupt; 1841 1842 ++p; 1843 } 1844 1845 // after initial ws, outside string 1846 case INCR_M_JSON: 1847 incr_m_json: 1848 for (;;) 1849 { 1850 switch (*p++) 1851 { 1852 case 0: 1853 --p; 1854 goto interrupt; 1855 1856 case 0x09: 1857 case 0x0a: 1858 case 0x0d: 1859 case 0x20: 1860 if (!self->incr_nest) 1861 { 1862 --p; // do not eat the whitespace, let the next round do it 1863 goto interrupt; 1864 } 1865 break; 1866 1867 case '"': 1868 self->incr_mode = INCR_M_STR; 1869 goto incr_m_str; 1870 1871 case '[': 1872 case '{': 1873 case '(': 1874 if (++self->incr_nest > self->max_depth) 1875 croak (ERR_NESTING_EXCEEDED); 1876 break; 1877 1878 case ']': 1879 case '}': 1880 if (--self->incr_nest <= 0) 1881 goto interrupt; 1882 break; 1883 1884 case ')': 1885 --self->incr_nest; 1886 break; 1887 1888 case '#': 1889 self->incr_mode = INCR_M_C1; 1890 goto incr_m_c; 1891 } 1892 } 1893 } 1894 1895 modechange: 1896 ; 1897 } 1898 1899interrupt: 1900 self->incr_pos = p - SvPVX (self->incr_text); 1901 //printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D 1902 //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D 1903} 1904 1905///////////////////////////////////////////////////////////////////////////// 1906// XS interface functions 1907 1908MODULE = JSON::XS PACKAGE = JSON::XS 1909 1910BOOT: 1911{ 1912 int i; 1913 1914 for (i = 0; i < 256; ++i) 1915 decode_hexdigit [i] = 1916 i >= '0' && i <= '9' ? i - '0' 1917 : i >= 'a' && i <= 'f' ? i - 'a' + 10 1918 : i >= 'A' && i <= 'F' ? i - 'A' + 10 1919 : -1; 1920 1921 json_stash = gv_stashpv ("JSON::XS" , 1); 1922 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1); 1923 1924 types_true = get_bool ("Types::Serialiser::true"); 1925 types_false = get_bool ("Types::Serialiser::false"); 1926 1927 sv_json = newSVpv ("JSON", 0); 1928 SvREADONLY_on (sv_json); 1929 1930 CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */ 1931} 1932 1933PROTOTYPES: DISABLE 1934 1935void CLONE (...) 1936 CODE: 1937 json_stash = 0; 1938 types_boolean_stash = 0; 1939 1940void new (char *klass) 1941 PPCODE: 1942{ 1943 SV *pv = NEWSV (0, sizeof (JSON)); 1944 SvPOK_only (pv); 1945 json_init ((JSON *)SvPVX (pv)); 1946 XPUSHs (sv_2mortal (sv_bless ( 1947 newRV_noinc (pv), 1948 strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1) 1949 ))); 1950} 1951 1952void ascii (JSON *self, int enable = 1) 1953 ALIAS: 1954 ascii = F_ASCII 1955 latin1 = F_LATIN1 1956 utf8 = F_UTF8 1957 indent = F_INDENT 1958 canonical = F_CANONICAL 1959 space_before = F_SPACE_BEFORE 1960 space_after = F_SPACE_AFTER 1961 pretty = F_PRETTY 1962 allow_nonref = F_ALLOW_NONREF 1963 shrink = F_SHRINK 1964 allow_blessed = F_ALLOW_BLESSED 1965 convert_blessed = F_CONV_BLESSED 1966 relaxed = F_RELAXED 1967 allow_unknown = F_ALLOW_UNKNOWN 1968 allow_tags = F_ALLOW_TAGS 1969 PPCODE: 1970{ 1971 if (enable) 1972 self->flags |= ix; 1973 else 1974 self->flags &= ~ix; 1975 1976 XPUSHs (ST (0)); 1977} 1978 1979void get_ascii (JSON *self) 1980 ALIAS: 1981 get_ascii = F_ASCII 1982 get_latin1 = F_LATIN1 1983 get_utf8 = F_UTF8 1984 get_indent = F_INDENT 1985 get_canonical = F_CANONICAL 1986 get_space_before = F_SPACE_BEFORE 1987 get_space_after = F_SPACE_AFTER 1988 get_allow_nonref = F_ALLOW_NONREF 1989 get_shrink = F_SHRINK 1990 get_allow_blessed = F_ALLOW_BLESSED 1991 get_convert_blessed = F_CONV_BLESSED 1992 get_relaxed = F_RELAXED 1993 get_allow_unknown = F_ALLOW_UNKNOWN 1994 get_allow_tags = F_ALLOW_TAGS 1995 PPCODE: 1996 XPUSHs (boolSV (self->flags & ix)); 1997 1998void max_depth (JSON *self, U32 max_depth = 0x80000000UL) 1999 PPCODE: 2000 self->max_depth = max_depth; 2001 XPUSHs (ST (0)); 2002 2003U32 get_max_depth (JSON *self) 2004 CODE: 2005 RETVAL = self->max_depth; 2006 OUTPUT: 2007 RETVAL 2008 2009void max_size (JSON *self, U32 max_size = 0) 2010 PPCODE: 2011 self->max_size = max_size; 2012 XPUSHs (ST (0)); 2013 2014int get_max_size (JSON *self) 2015 CODE: 2016 RETVAL = self->max_size; 2017 OUTPUT: 2018 RETVAL 2019 2020void filter_json_object (JSON *self, SV *cb = &PL_sv_undef) 2021 PPCODE: 2022{ 2023 SvREFCNT_dec (self->cb_object); 2024 self->cb_object = SvOK (cb) ? newSVsv (cb) : 0; 2025 2026 XPUSHs (ST (0)); 2027} 2028 2029void filter_json_single_key_object (JSON *self, SV *key, SV *cb = &PL_sv_undef) 2030 PPCODE: 2031{ 2032 if (!self->cb_sk_object) 2033 self->cb_sk_object = newHV (); 2034 2035 if (SvOK (cb)) 2036 hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0); 2037 else 2038 { 2039 hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0); 2040 2041 if (!HvKEYS (self->cb_sk_object)) 2042 { 2043 SvREFCNT_dec (self->cb_sk_object); 2044 self->cb_sk_object = 0; 2045 } 2046 } 2047 2048 XPUSHs (ST (0)); 2049} 2050 2051void encode (JSON *self, SV *scalar) 2052 PPCODE: 2053 PUTBACK; scalar = encode_json (scalar, self); SPAGAIN; 2054 XPUSHs (scalar); 2055 2056void decode (JSON *self, SV *jsonstr) 2057 PPCODE: 2058 PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN; 2059 XPUSHs (jsonstr); 2060 2061void decode_prefix (JSON *self, SV *jsonstr) 2062 PPCODE: 2063{ 2064 SV *sv; 2065 char *offset; 2066 PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN; 2067 EXTEND (SP, 2); 2068 PUSHs (sv); 2069 PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); 2070} 2071 2072void incr_parse (JSON *self, SV *jsonstr = 0) 2073 PPCODE: 2074{ 2075 if (!self->incr_text) 2076 self->incr_text = newSVpvn ("", 0); 2077 2078 /* if utf8-ness doesn't match the decoder, need to upgrade/downgrade */ 2079 if (!DECODE_WANTS_OCTETS (self) == !SvUTF8 (self->incr_text)) 2080 if (DECODE_WANTS_OCTETS (self)) 2081 { 2082 if (self->incr_pos) 2083 self->incr_pos = utf8_length ((U8 *)SvPVX (self->incr_text), 2084 (U8 *)SvPVX (self->incr_text) + self->incr_pos); 2085 2086 sv_utf8_downgrade (self->incr_text, 0); 2087 } 2088 else 2089 { 2090 sv_utf8_upgrade (self->incr_text); 2091 2092 if (self->incr_pos) 2093 self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) 2094 - (U8 *)SvPVX (self->incr_text); 2095 } 2096 2097 // append data, if any 2098 if (jsonstr) 2099 { 2100 /* make sure both strings have same encoding */ 2101 if (SvUTF8 (jsonstr) != SvUTF8 (self->incr_text)) 2102 if (SvUTF8 (jsonstr)) 2103 sv_utf8_downgrade (jsonstr, 0); 2104 else 2105 sv_utf8_upgrade (jsonstr); 2106 2107 /* and then just blindly append */ 2108 { 2109 STRLEN len; 2110 const char *str = SvPV (jsonstr, len); 2111 STRLEN cur = SvCUR (self->incr_text); 2112 2113 if (SvLEN (self->incr_text) <= cur + len) 2114 SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); 2115 2116 Move (str, SvEND (self->incr_text), len, char); 2117 SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); 2118 *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there 2119 } 2120 } 2121 2122 if (GIMME_V != G_VOID) 2123 do 2124 { 2125 SV *sv; 2126 char *offset; 2127 2128 if (!INCR_DONE (self)) 2129 { 2130 incr_parse (self); 2131 2132 if (self->incr_pos > self->max_size && self->max_size) 2133 croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", 2134 (unsigned long)self->incr_pos, (unsigned long)self->max_size); 2135 2136 if (!INCR_DONE (self)) 2137 { 2138 // as an optimisation, do not accumulate white space in the incr buffer 2139 if (self->incr_mode == INCR_M_WS && self->incr_pos) 2140 { 2141 self->incr_pos = 0; 2142 SvCUR_set (self->incr_text, 0); 2143 } 2144 2145 break; 2146 } 2147 } 2148 2149 PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN; 2150 XPUSHs (sv); 2151 2152 self->incr_pos -= offset - SvPVX (self->incr_text); 2153 self->incr_nest = 0; 2154 self->incr_mode = 0; 2155 2156 sv_chop (self->incr_text, offset); 2157 } 2158 while (GIMME_V == G_ARRAY); 2159} 2160 2161SV *incr_text (JSON *self) 2162 ATTRS: lvalue 2163 CODE: 2164{ 2165 if (self->incr_pos) 2166 croak ("incr_text can not be called when the incremental parser already started parsing"); 2167 2168 RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef; 2169} 2170 OUTPUT: 2171 RETVAL 2172 2173void incr_skip (JSON *self) 2174 CODE: 2175{ 2176 if (self->incr_pos) 2177 { 2178 sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos); 2179 self->incr_pos = 0; 2180 self->incr_nest = 0; 2181 self->incr_mode = 0; 2182 } 2183} 2184 2185void incr_reset (JSON *self) 2186 CODE: 2187{ 2188 SvREFCNT_dec (self->incr_text); 2189 self->incr_text = 0; 2190 self->incr_pos = 0; 2191 self->incr_nest = 0; 2192 self->incr_mode = 0; 2193} 2194 2195void DESTROY (JSON *self) 2196 CODE: 2197 SvREFCNT_dec (self->cb_sk_object); 2198 SvREFCNT_dec (self->cb_object); 2199 SvREFCNT_dec (self->incr_text); 2200 2201PROTOTYPES: ENABLE 2202 2203void encode_json (SV *scalar) 2204 PPCODE: 2205{ 2206 JSON json; 2207 json_init (&json); 2208 json.flags |= F_UTF8; 2209 PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN; 2210 XPUSHs (scalar); 2211} 2212 2213void decode_json (SV *jsonstr) 2214 PPCODE: 2215{ 2216 JSON json; 2217 json_init (&json); 2218 json.flags |= F_UTF8; 2219 PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN; 2220 XPUSHs (jsonstr); 2221} 2222 2223