1262395Sbapt/* Copyright (c) 2013, Vsevolod Stakhov 2262395Sbapt * All rights reserved. 3262395Sbapt * 4262395Sbapt * Redistribution and use in source and binary forms, with or without 5262395Sbapt * modification, are permitted provided that the following conditions are met: 6262395Sbapt * * Redistributions of source code must retain the above copyright 7262395Sbapt * notice, this list of conditions and the following disclaimer. 8262395Sbapt * * Redistributions in binary form must reproduce the above copyright 9262395Sbapt * notice, this list of conditions and the following disclaimer in the 10262395Sbapt * documentation and/or other materials provided with the distribution. 11262395Sbapt * 12262395Sbapt * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13262395Sbapt * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14262395Sbapt * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15262395Sbapt * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16262395Sbapt * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17262395Sbapt * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18262395Sbapt * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19262395Sbapt * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20262395Sbapt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21262395Sbapt * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22262395Sbapt */ 23262395Sbapt 24262395Sbapt#include "ucl.h" 25262395Sbapt#include "ucl_internal.h" 26262395Sbapt#include "ucl_chartable.h" 27262395Sbapt 28262395Sbapt/** 29262395Sbapt * @file rcl_parser.c 30262395Sbapt * The implementation of rcl parser 31262395Sbapt */ 32262395Sbapt 33262395Sbaptstruct ucl_parser_saved_state { 34262395Sbapt unsigned int line; 35262395Sbapt unsigned int column; 36262395Sbapt size_t remain; 37262395Sbapt const unsigned char *pos; 38262395Sbapt}; 39262395Sbapt 40262395Sbapt/** 41262395Sbapt * Move up to len characters 42262395Sbapt * @param parser 43262395Sbapt * @param begin 44262395Sbapt * @param len 45262395Sbapt * @return new position in chunk 46262395Sbapt */ 47262395Sbapt#define ucl_chunk_skipc(chunk, p) do{ \ 48262395Sbapt if (*(p) == '\n') { \ 49262395Sbapt (chunk)->line ++; \ 50262395Sbapt (chunk)->column = 0; \ 51262395Sbapt } \ 52262395Sbapt else (chunk)->column ++; \ 53262395Sbapt (p++); \ 54262395Sbapt (chunk)->pos ++; \ 55262395Sbapt (chunk)->remain --; \ 56262395Sbapt } while (0) 57262395Sbapt 58262395Sbaptstatic inline void 59262395Sbaptucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err) 60262395Sbapt{ 61262395Sbapt if (chunk->pos < chunk->end) { 62262395Sbapt if (isgraph (*chunk->pos)) { 63262395Sbapt ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'", 64262395Sbapt chunk->line, chunk->column, str, *chunk->pos); 65262395Sbapt } 66262395Sbapt else { 67262395Sbapt ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'", 68262395Sbapt chunk->line, chunk->column, str, (int)*chunk->pos); 69262395Sbapt } 70262395Sbapt } 71262395Sbapt else { 72262395Sbapt ucl_create_err (err, "error at the end of chunk: %s", str); 73262395Sbapt } 74262395Sbapt} 75262395Sbapt 76262395Sbapt/** 77262395Sbapt * Skip all comments from the current pos resolving nested and multiline comments 78262395Sbapt * @param parser 79262395Sbapt * @return 80262395Sbapt */ 81262395Sbaptstatic bool 82262395Sbaptucl_skip_comments (struct ucl_parser *parser) 83262395Sbapt{ 84262395Sbapt struct ucl_chunk *chunk = parser->chunks; 85262395Sbapt const unsigned char *p; 86262395Sbapt int comments_nested = 0; 87262395Sbapt 88262395Sbapt p = chunk->pos; 89262395Sbapt 90262395Sbaptstart: 91262395Sbapt if (*p == '#') { 92262395Sbapt if (parser->state != UCL_STATE_SCOMMENT && 93262395Sbapt parser->state != UCL_STATE_MCOMMENT) { 94262395Sbapt while (p < chunk->end) { 95262395Sbapt if (*p == '\n') { 96262395Sbapt ucl_chunk_skipc (chunk, p); 97262395Sbapt goto start; 98262395Sbapt } 99262395Sbapt ucl_chunk_skipc (chunk, p); 100262395Sbapt } 101262395Sbapt } 102262395Sbapt } 103262395Sbapt else if (*p == '/' && chunk->remain >= 2) { 104262395Sbapt if (p[1] == '*') { 105262395Sbapt ucl_chunk_skipc (chunk, p); 106262395Sbapt comments_nested ++; 107262395Sbapt ucl_chunk_skipc (chunk, p); 108262395Sbapt 109262395Sbapt while (p < chunk->end) { 110262395Sbapt if (*p == '*') { 111262395Sbapt ucl_chunk_skipc (chunk, p); 112262395Sbapt if (*p == '/') { 113262395Sbapt comments_nested --; 114262395Sbapt if (comments_nested == 0) { 115262395Sbapt ucl_chunk_skipc (chunk, p); 116262395Sbapt goto start; 117262395Sbapt } 118262395Sbapt } 119262395Sbapt ucl_chunk_skipc (chunk, p); 120262395Sbapt } 121262395Sbapt else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 122262395Sbapt comments_nested ++; 123262395Sbapt ucl_chunk_skipc (chunk, p); 124262395Sbapt ucl_chunk_skipc (chunk, p); 125262395Sbapt continue; 126262395Sbapt } 127262395Sbapt ucl_chunk_skipc (chunk, p); 128262395Sbapt } 129262395Sbapt if (comments_nested != 0) { 130262395Sbapt ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err); 131262395Sbapt return false; 132262395Sbapt } 133262395Sbapt } 134262395Sbapt } 135262395Sbapt 136262395Sbapt return true; 137262395Sbapt} 138262395Sbapt 139262395Sbapt/** 140262395Sbapt * Return multiplier for a character 141262395Sbapt * @param c multiplier character 142262395Sbapt * @param is_bytes if true use 1024 multiplier 143262395Sbapt * @return multiplier 144262395Sbapt */ 145262395Sbaptstatic inline unsigned long 146262395Sbaptucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 147262395Sbapt const struct { 148262395Sbapt char c; 149262395Sbapt long mult_normal; 150262395Sbapt long mult_bytes; 151262395Sbapt } multipliers[] = { 152262395Sbapt {'m', 1000 * 1000, 1024 * 1024}, 153262395Sbapt {'k', 1000, 1024}, 154262395Sbapt {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 155262395Sbapt }; 156262395Sbapt int i; 157262395Sbapt 158262395Sbapt for (i = 0; i < 3; i ++) { 159262395Sbapt if (tolower (c) == multipliers[i].c) { 160262395Sbapt if (is_bytes) { 161262395Sbapt return multipliers[i].mult_bytes; 162262395Sbapt } 163262395Sbapt return multipliers[i].mult_normal; 164262395Sbapt } 165262395Sbapt } 166262395Sbapt 167262395Sbapt return 1; 168262395Sbapt} 169262395Sbapt 170262395Sbapt 171262395Sbapt/** 172262395Sbapt * Return multiplier for time scaling 173262395Sbapt * @param c 174262395Sbapt * @return 175262395Sbapt */ 176262395Sbaptstatic inline double 177262395Sbaptucl_lex_time_multiplier (const unsigned char c) { 178262395Sbapt const struct { 179262395Sbapt char c; 180262395Sbapt double mult; 181262395Sbapt } multipliers[] = { 182262395Sbapt {'m', 60}, 183262395Sbapt {'h', 60 * 60}, 184262395Sbapt {'d', 60 * 60 * 24}, 185262395Sbapt {'w', 60 * 60 * 24 * 7}, 186262395Sbapt {'y', 60 * 60 * 24 * 7 * 365} 187262395Sbapt }; 188262395Sbapt int i; 189262395Sbapt 190262395Sbapt for (i = 0; i < 5; i ++) { 191262395Sbapt if (tolower (c) == multipliers[i].c) { 192262395Sbapt return multipliers[i].mult; 193262395Sbapt } 194262395Sbapt } 195262395Sbapt 196262395Sbapt return 1; 197262395Sbapt} 198262395Sbapt 199262395Sbapt/** 200262395Sbapt * Return true if a character is a end of an atom 201262395Sbapt * @param c 202262395Sbapt * @return 203262395Sbapt */ 204262395Sbaptstatic inline bool 205262395Sbaptucl_lex_is_atom_end (const unsigned char c) 206262395Sbapt{ 207262395Sbapt return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 208262395Sbapt} 209262395Sbapt 210262395Sbaptstatic inline bool 211262395Sbaptucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 212262395Sbapt{ 213262395Sbapt if (c1 == '/') { 214262395Sbapt if (c2 == '*') { 215262395Sbapt return true; 216262395Sbapt } 217262395Sbapt } 218262395Sbapt else if (c1 == '#') { 219262395Sbapt return true; 220262395Sbapt } 221262395Sbapt return false; 222262395Sbapt} 223262395Sbapt 224262395Sbapt/** 225262395Sbapt * Check variable found 226262395Sbapt * @param parser 227262395Sbapt * @param ptr 228262395Sbapt * @param remain 229262395Sbapt * @param out_len 230262395Sbapt * @param strict 231262395Sbapt * @param found 232262395Sbapt * @return 233262395Sbapt */ 234262395Sbaptstatic inline const char * 235262395Sbaptucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 236262395Sbapt size_t *out_len, bool strict, bool *found) 237262395Sbapt{ 238262395Sbapt struct ucl_variable *var; 239268896Sbapt unsigned char *dst; 240268896Sbapt size_t dstlen; 241268896Sbapt bool need_free = false; 242262395Sbapt 243262395Sbapt LL_FOREACH (parser->variables, var) { 244262395Sbapt if (strict) { 245262395Sbapt if (remain == var->var_len) { 246262395Sbapt if (memcmp (ptr, var->var, var->var_len) == 0) { 247262395Sbapt *out_len += var->value_len; 248262395Sbapt *found = true; 249262395Sbapt return (ptr + var->var_len); 250262395Sbapt } 251262395Sbapt } 252262395Sbapt } 253262395Sbapt else { 254262395Sbapt if (remain >= var->var_len) { 255262395Sbapt if (memcmp (ptr, var->var, var->var_len) == 0) { 256262395Sbapt *out_len += var->value_len; 257262395Sbapt *found = true; 258262395Sbapt return (ptr + var->var_len); 259262395Sbapt } 260262395Sbapt } 261262395Sbapt } 262262395Sbapt } 263262395Sbapt 264268896Sbapt /* XXX: can only handle ${VAR} */ 265268896Sbapt if (!(*found) && parser->var_handler != NULL && strict) { 266268896Sbapt /* Call generic handler */ 267268896Sbapt if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 268268896Sbapt parser->var_data)) { 269268896Sbapt *found = true; 270268896Sbapt if (need_free) { 271268896Sbapt free (dst); 272268896Sbapt } 273268896Sbapt return (ptr + remain); 274268896Sbapt } 275268896Sbapt } 276268896Sbapt 277262395Sbapt return ptr; 278262395Sbapt} 279262395Sbapt 280262395Sbapt/** 281262395Sbapt * Check for a variable in a given string 282262395Sbapt * @param parser 283262395Sbapt * @param ptr 284262395Sbapt * @param remain 285262395Sbapt * @param out_len 286262395Sbapt * @param vars_found 287262395Sbapt * @return 288262395Sbapt */ 289262395Sbaptstatic const char * 290268896Sbaptucl_check_variable (struct ucl_parser *parser, const char *ptr, 291268896Sbapt size_t remain, size_t *out_len, bool *vars_found) 292262395Sbapt{ 293262395Sbapt const char *p, *end, *ret = ptr; 294262395Sbapt bool found = false; 295262395Sbapt 296262395Sbapt if (*ptr == '{') { 297262395Sbapt /* We need to match the variable enclosed in braces */ 298262395Sbapt p = ptr + 1; 299262395Sbapt end = ptr + remain; 300262395Sbapt while (p < end) { 301262395Sbapt if (*p == '}') { 302268896Sbapt ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, 303268896Sbapt out_len, true, &found); 304262395Sbapt if (found) { 305262395Sbapt /* {} must be excluded actually */ 306262395Sbapt ret ++; 307262395Sbapt if (!*vars_found) { 308262395Sbapt *vars_found = true; 309262395Sbapt } 310262395Sbapt } 311262395Sbapt else { 312262395Sbapt *out_len += 2; 313262395Sbapt } 314262395Sbapt break; 315262395Sbapt } 316262395Sbapt p ++; 317262395Sbapt } 318262395Sbapt } 319262395Sbapt else if (*ptr != '$') { 320262395Sbapt /* Not count escaped dollar sign */ 321262395Sbapt ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 322262395Sbapt if (found && !*vars_found) { 323262395Sbapt *vars_found = true; 324262395Sbapt } 325262395Sbapt if (!found) { 326262395Sbapt (*out_len) ++; 327262395Sbapt } 328262395Sbapt } 329262395Sbapt else { 330262395Sbapt ret ++; 331262395Sbapt (*out_len) ++; 332262395Sbapt } 333262395Sbapt 334262395Sbapt return ret; 335262395Sbapt} 336262395Sbapt 337262395Sbapt/** 338262395Sbapt * Expand a single variable 339262395Sbapt * @param parser 340262395Sbapt * @param ptr 341262395Sbapt * @param remain 342262395Sbapt * @param dest 343262395Sbapt * @return 344262395Sbapt */ 345262395Sbaptstatic const char * 346262395Sbaptucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 347262395Sbapt size_t remain, unsigned char **dest) 348262395Sbapt{ 349268896Sbapt unsigned char *d = *dest, *dst; 350262395Sbapt const char *p = ptr + 1, *ret; 351262395Sbapt struct ucl_variable *var; 352268896Sbapt size_t dstlen; 353268896Sbapt bool need_free = false; 354262395Sbapt bool found = false; 355268896Sbapt bool strict = false; 356262395Sbapt 357262395Sbapt ret = ptr + 1; 358262395Sbapt remain --; 359262395Sbapt 360262395Sbapt if (*p == '$') { 361262395Sbapt *d++ = *p++; 362262395Sbapt *dest = d; 363262395Sbapt return p; 364262395Sbapt } 365262395Sbapt else if (*p == '{') { 366262395Sbapt p ++; 367268896Sbapt strict = true; 368262395Sbapt ret += 2; 369262395Sbapt remain -= 2; 370262395Sbapt } 371262395Sbapt 372262395Sbapt LL_FOREACH (parser->variables, var) { 373262395Sbapt if (remain >= var->var_len) { 374262395Sbapt if (memcmp (p, var->var, var->var_len) == 0) { 375262395Sbapt memcpy (d, var->value, var->value_len); 376262395Sbapt ret += var->var_len; 377262395Sbapt d += var->value_len; 378262395Sbapt found = true; 379262395Sbapt break; 380262395Sbapt } 381262395Sbapt } 382262395Sbapt } 383262395Sbapt if (!found) { 384268896Sbapt if (strict && parser->var_handler != NULL) { 385268896Sbapt if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 386268896Sbapt parser->var_data)) { 387268896Sbapt memcpy (d, dst, dstlen); 388268896Sbapt ret += dstlen; 389268896Sbapt d += remain; 390268896Sbapt found = true; 391268896Sbapt } 392268896Sbapt } 393268896Sbapt 394268896Sbapt /* Leave variable as is */ 395268896Sbapt if (!found) { 396268896Sbapt memcpy (d, ptr, 2); 397268896Sbapt d += 2; 398268896Sbapt ret --; 399268896Sbapt } 400262395Sbapt } 401262395Sbapt 402262395Sbapt *dest = d; 403262395Sbapt return ret; 404262395Sbapt} 405262395Sbapt 406262395Sbapt/** 407262395Sbapt * Expand variables in string 408262395Sbapt * @param parser 409262395Sbapt * @param dst 410262395Sbapt * @param src 411262395Sbapt * @param in_len 412262395Sbapt * @return 413262395Sbapt */ 414262395Sbaptstatic ssize_t 415262395Sbaptucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 416262395Sbapt const char *src, size_t in_len) 417262395Sbapt{ 418262395Sbapt const char *p, *end = src + in_len; 419262395Sbapt unsigned char *d; 420262395Sbapt size_t out_len = 0; 421262395Sbapt bool vars_found = false; 422262395Sbapt 423262395Sbapt p = src; 424262395Sbapt while (p != end) { 425262395Sbapt if (*p == '$') { 426262395Sbapt p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 427262395Sbapt } 428262395Sbapt else { 429262395Sbapt p ++; 430262395Sbapt out_len ++; 431262395Sbapt } 432262395Sbapt } 433262395Sbapt 434262395Sbapt if (!vars_found) { 435262395Sbapt /* Trivial case */ 436262395Sbapt *dst = NULL; 437262395Sbapt return in_len; 438262395Sbapt } 439262395Sbapt 440262395Sbapt *dst = UCL_ALLOC (out_len + 1); 441262395Sbapt if (*dst == NULL) { 442262395Sbapt return in_len; 443262395Sbapt } 444262395Sbapt 445262395Sbapt d = *dst; 446262395Sbapt p = src; 447262395Sbapt while (p != end) { 448262395Sbapt if (*p == '$') { 449262395Sbapt p = ucl_expand_single_variable (parser, p, end - p, &d); 450262395Sbapt } 451262395Sbapt else { 452262395Sbapt *d++ = *p++; 453262395Sbapt } 454262395Sbapt } 455262395Sbapt 456262395Sbapt *d = '\0'; 457262395Sbapt 458262395Sbapt return out_len; 459262395Sbapt} 460262395Sbapt 461262395Sbapt/** 462262395Sbapt * Store or copy pointer to the trash stack 463262395Sbapt * @param parser parser object 464262395Sbapt * @param src src string 465262395Sbapt * @param dst destination buffer (trash stack pointer) 466262395Sbapt * @param dst_const const destination pointer (e.g. value of object) 467262395Sbapt * @param in_len input length 468262395Sbapt * @param need_unescape need to unescape source (and copy it) 469262395Sbapt * @param need_lowercase need to lowercase value (and copy) 470262395Sbapt * @param need_expand need to expand variables (and copy as well) 471262395Sbapt * @return output length (excluding \0 symbol) 472262395Sbapt */ 473262395Sbaptstatic inline ssize_t 474262395Sbaptucl_copy_or_store_ptr (struct ucl_parser *parser, 475262395Sbapt const unsigned char *src, unsigned char **dst, 476262395Sbapt const char **dst_const, size_t in_len, 477262395Sbapt bool need_unescape, bool need_lowercase, bool need_expand) 478262395Sbapt{ 479262395Sbapt ssize_t ret = -1, tret; 480262395Sbapt unsigned char *tmp; 481262395Sbapt 482262395Sbapt if (need_unescape || need_lowercase || 483262395Sbapt (need_expand && parser->variables != NULL) || 484262395Sbapt !(parser->flags & UCL_PARSER_ZEROCOPY)) { 485262395Sbapt /* Copy string */ 486262395Sbapt *dst = UCL_ALLOC (in_len + 1); 487262395Sbapt if (*dst == NULL) { 488262395Sbapt ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err); 489262395Sbapt return false; 490262395Sbapt } 491262395Sbapt if (need_lowercase) { 492262395Sbapt ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 493262395Sbapt } 494262395Sbapt else { 495262395Sbapt ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 496262395Sbapt } 497262395Sbapt 498262395Sbapt if (need_unescape) { 499262395Sbapt ret = ucl_unescape_json_string (*dst, ret); 500262395Sbapt } 501262395Sbapt if (need_expand) { 502262395Sbapt tmp = *dst; 503262395Sbapt tret = ret; 504262395Sbapt ret = ucl_expand_variable (parser, dst, tmp, ret); 505262395Sbapt if (*dst == NULL) { 506262395Sbapt /* Nothing to expand */ 507262395Sbapt *dst = tmp; 508262395Sbapt ret = tret; 509262395Sbapt } 510262395Sbapt } 511262395Sbapt *dst_const = *dst; 512262395Sbapt } 513262395Sbapt else { 514262395Sbapt *dst_const = src; 515262395Sbapt ret = in_len; 516262395Sbapt } 517262395Sbapt 518262395Sbapt return ret; 519262395Sbapt} 520262395Sbapt 521262395Sbapt/** 522262395Sbapt * Create and append an object at the specified level 523262395Sbapt * @param parser 524262395Sbapt * @param is_array 525262395Sbapt * @param level 526262395Sbapt * @return 527262395Sbapt */ 528262395Sbaptstatic inline ucl_object_t * 529262395Sbaptucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level) 530262395Sbapt{ 531262395Sbapt struct ucl_stack *st; 532262395Sbapt 533262395Sbapt if (!is_array) { 534262395Sbapt if (obj == NULL) { 535262395Sbapt obj = ucl_object_typed_new (UCL_OBJECT); 536262395Sbapt } 537262395Sbapt else { 538262395Sbapt obj->type = UCL_OBJECT; 539262395Sbapt } 540262395Sbapt obj->value.ov = ucl_hash_create (); 541262395Sbapt parser->state = UCL_STATE_KEY; 542262395Sbapt } 543262395Sbapt else { 544262395Sbapt if (obj == NULL) { 545262395Sbapt obj = ucl_object_typed_new (UCL_ARRAY); 546262395Sbapt } 547262395Sbapt else { 548262395Sbapt obj->type = UCL_ARRAY; 549262395Sbapt } 550262395Sbapt parser->state = UCL_STATE_VALUE; 551262395Sbapt } 552262395Sbapt 553262395Sbapt st = UCL_ALLOC (sizeof (struct ucl_stack)); 554268896Sbapt if (st == NULL) { 555268896Sbapt ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err); 556268896Sbapt return NULL; 557268896Sbapt } 558262395Sbapt st->obj = obj; 559262395Sbapt st->level = level; 560262395Sbapt LL_PREPEND (parser->stack, st); 561262395Sbapt parser->cur_obj = obj; 562262395Sbapt 563262395Sbapt return obj; 564262395Sbapt} 565262395Sbapt 566262395Sbaptint 567262395Sbaptucl_maybe_parse_number (ucl_object_t *obj, 568268896Sbapt const char *start, const char *end, const char **pos, 569268896Sbapt bool allow_double, bool number_bytes, bool allow_time) 570262395Sbapt{ 571262395Sbapt const char *p = start, *c = start; 572262395Sbapt char *endptr; 573262395Sbapt bool got_dot = false, got_exp = false, need_double = false, 574268896Sbapt is_time = false, valid_start = false, is_hex = false, 575262395Sbapt is_neg = false; 576262395Sbapt double dv = 0; 577262395Sbapt int64_t lv = 0; 578262395Sbapt 579262395Sbapt if (*p == '-') { 580262395Sbapt is_neg = true; 581262395Sbapt c ++; 582262395Sbapt p ++; 583262395Sbapt } 584262395Sbapt while (p < end) { 585262395Sbapt if (is_hex && isxdigit (*p)) { 586262395Sbapt p ++; 587262395Sbapt } 588262395Sbapt else if (isdigit (*p)) { 589262395Sbapt valid_start = true; 590262395Sbapt p ++; 591262395Sbapt } 592262395Sbapt else if (!is_hex && (*p == 'x' || *p == 'X')) { 593262395Sbapt is_hex = true; 594262395Sbapt allow_double = false; 595262395Sbapt c = p + 1; 596262395Sbapt } 597262395Sbapt else if (allow_double) { 598262395Sbapt if (p == c) { 599262395Sbapt /* Empty digits sequence, not a number */ 600262395Sbapt *pos = start; 601262395Sbapt return EINVAL; 602262395Sbapt } 603262395Sbapt else if (*p == '.') { 604262395Sbapt if (got_dot) { 605262395Sbapt /* Double dots, not a number */ 606262395Sbapt *pos = start; 607262395Sbapt return EINVAL; 608262395Sbapt } 609262395Sbapt else { 610262395Sbapt got_dot = true; 611262395Sbapt need_double = true; 612262395Sbapt p ++; 613262395Sbapt } 614262395Sbapt } 615262395Sbapt else if (*p == 'e' || *p == 'E') { 616262395Sbapt if (got_exp) { 617262395Sbapt /* Double exp, not a number */ 618262395Sbapt *pos = start; 619262395Sbapt return EINVAL; 620262395Sbapt } 621262395Sbapt else { 622262395Sbapt got_exp = true; 623262395Sbapt need_double = true; 624262395Sbapt p ++; 625262395Sbapt if (p >= end) { 626262395Sbapt *pos = start; 627262395Sbapt return EINVAL; 628262395Sbapt } 629262395Sbapt if (!isdigit (*p) && *p != '+' && *p != '-') { 630262395Sbapt /* Wrong exponent sign */ 631262395Sbapt *pos = start; 632262395Sbapt return EINVAL; 633262395Sbapt } 634262395Sbapt else { 635262395Sbapt p ++; 636262395Sbapt } 637262395Sbapt } 638262395Sbapt } 639262395Sbapt else { 640262395Sbapt /* Got the end of the number, need to check */ 641262395Sbapt break; 642262395Sbapt } 643262395Sbapt } 644262395Sbapt else { 645262395Sbapt break; 646262395Sbapt } 647262395Sbapt } 648262395Sbapt 649262395Sbapt if (!valid_start) { 650262395Sbapt *pos = start; 651262395Sbapt return EINVAL; 652262395Sbapt } 653262395Sbapt 654262395Sbapt errno = 0; 655262395Sbapt if (need_double) { 656262395Sbapt dv = strtod (c, &endptr); 657262395Sbapt } 658262395Sbapt else { 659262395Sbapt if (is_hex) { 660262395Sbapt lv = strtoimax (c, &endptr, 16); 661262395Sbapt } 662262395Sbapt else { 663262395Sbapt lv = strtoimax (c, &endptr, 10); 664262395Sbapt } 665262395Sbapt } 666262395Sbapt if (errno == ERANGE) { 667262395Sbapt *pos = start; 668262395Sbapt return ERANGE; 669262395Sbapt } 670262395Sbapt 671262395Sbapt /* Now check endptr */ 672268896Sbapt if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' || 673268896Sbapt ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 674262395Sbapt p = endptr; 675262395Sbapt goto set_obj; 676262395Sbapt } 677262395Sbapt 678262395Sbapt if (endptr < end && endptr != start) { 679262395Sbapt p = endptr; 680262395Sbapt switch (*p) { 681262395Sbapt case 'm': 682262395Sbapt case 'M': 683262395Sbapt case 'g': 684262395Sbapt case 'G': 685262395Sbapt case 'k': 686262395Sbapt case 'K': 687262395Sbapt if (end - p >= 2) { 688262395Sbapt if (p[1] == 's' || p[1] == 'S') { 689262395Sbapt /* Milliseconds */ 690262395Sbapt if (!need_double) { 691262395Sbapt need_double = true; 692262395Sbapt dv = lv; 693262395Sbapt } 694268896Sbapt is_time = true; 695262395Sbapt if (p[0] == 'm' || p[0] == 'M') { 696262395Sbapt dv /= 1000.; 697262395Sbapt } 698262395Sbapt else { 699262395Sbapt dv *= ucl_lex_num_multiplier (*p, false); 700262395Sbapt } 701262395Sbapt p += 2; 702262395Sbapt goto set_obj; 703262395Sbapt } 704262395Sbapt else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 705262395Sbapt /* Bytes */ 706262395Sbapt if (need_double) { 707262395Sbapt need_double = false; 708262395Sbapt lv = dv; 709262395Sbapt } 710262395Sbapt lv *= ucl_lex_num_multiplier (*p, true); 711262395Sbapt p += 2; 712262395Sbapt goto set_obj; 713262395Sbapt } 714262395Sbapt else if (ucl_lex_is_atom_end (p[1])) { 715262395Sbapt if (need_double) { 716262395Sbapt dv *= ucl_lex_num_multiplier (*p, false); 717262395Sbapt } 718262395Sbapt else { 719262395Sbapt lv *= ucl_lex_num_multiplier (*p, number_bytes); 720262395Sbapt } 721262395Sbapt p ++; 722262395Sbapt goto set_obj; 723262395Sbapt } 724268896Sbapt else if (allow_time && end - p >= 3) { 725262395Sbapt if (tolower (p[0]) == 'm' && 726262395Sbapt tolower (p[1]) == 'i' && 727262395Sbapt tolower (p[2]) == 'n') { 728262395Sbapt /* Minutes */ 729262395Sbapt if (!need_double) { 730262395Sbapt need_double = true; 731262395Sbapt dv = lv; 732262395Sbapt } 733268896Sbapt is_time = true; 734262395Sbapt dv *= 60.; 735262395Sbapt p += 3; 736262395Sbapt goto set_obj; 737262395Sbapt } 738262395Sbapt } 739262395Sbapt } 740262395Sbapt else { 741262395Sbapt if (need_double) { 742262395Sbapt dv *= ucl_lex_num_multiplier (*p, false); 743262395Sbapt } 744262395Sbapt else { 745262395Sbapt lv *= ucl_lex_num_multiplier (*p, number_bytes); 746262395Sbapt } 747262395Sbapt p ++; 748262395Sbapt goto set_obj; 749262395Sbapt } 750262395Sbapt break; 751262395Sbapt case 'S': 752262395Sbapt case 's': 753268896Sbapt if (allow_time && 754268896Sbapt (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 755262395Sbapt if (!need_double) { 756262395Sbapt need_double = true; 757262395Sbapt dv = lv; 758262395Sbapt } 759262395Sbapt p ++; 760268896Sbapt is_time = true; 761262395Sbapt goto set_obj; 762262395Sbapt } 763262395Sbapt break; 764262395Sbapt case 'h': 765262395Sbapt case 'H': 766262395Sbapt case 'd': 767262395Sbapt case 'D': 768262395Sbapt case 'w': 769262395Sbapt case 'W': 770262395Sbapt case 'Y': 771262395Sbapt case 'y': 772268896Sbapt if (allow_time && 773268896Sbapt (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 774262395Sbapt if (!need_double) { 775262395Sbapt need_double = true; 776262395Sbapt dv = lv; 777262395Sbapt } 778268896Sbapt is_time = true; 779262395Sbapt dv *= ucl_lex_time_multiplier (*p); 780262395Sbapt p ++; 781262395Sbapt goto set_obj; 782262395Sbapt } 783262395Sbapt break; 784262395Sbapt } 785262395Sbapt } 786262395Sbapt 787262395Sbapt *pos = c; 788262395Sbapt return EINVAL; 789262395Sbapt 790262395Sbapt set_obj: 791268896Sbapt if (allow_double && (need_double || is_time)) { 792268896Sbapt if (!is_time) { 793262395Sbapt obj->type = UCL_FLOAT; 794262395Sbapt } 795262395Sbapt else { 796262395Sbapt obj->type = UCL_TIME; 797262395Sbapt } 798262395Sbapt obj->value.dv = is_neg ? (-dv) : dv; 799262395Sbapt } 800262395Sbapt else { 801262395Sbapt obj->type = UCL_INT; 802262395Sbapt obj->value.iv = is_neg ? (-lv) : lv; 803262395Sbapt } 804262395Sbapt *pos = p; 805262395Sbapt return 0; 806262395Sbapt} 807262395Sbapt 808262395Sbapt/** 809262395Sbapt * Parse possible number 810262395Sbapt * @param parser 811262395Sbapt * @param chunk 812262395Sbapt * @return true if a number has been parsed 813262395Sbapt */ 814262395Sbaptstatic bool 815262395Sbaptucl_lex_number (struct ucl_parser *parser, 816262395Sbapt struct ucl_chunk *chunk, ucl_object_t *obj) 817262395Sbapt{ 818262395Sbapt const unsigned char *pos; 819262395Sbapt int ret; 820262395Sbapt 821268896Sbapt ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 822268896Sbapt true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 823262395Sbapt 824262395Sbapt if (ret == 0) { 825262395Sbapt chunk->remain -= pos - chunk->pos; 826262395Sbapt chunk->column += pos - chunk->pos; 827262395Sbapt chunk->pos = pos; 828262395Sbapt return true; 829262395Sbapt } 830262395Sbapt else if (ret == ERANGE) { 831262395Sbapt ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err); 832262395Sbapt } 833262395Sbapt 834262395Sbapt return false; 835262395Sbapt} 836262395Sbapt 837262395Sbapt/** 838262395Sbapt * Parse quoted string with possible escapes 839262395Sbapt * @param parser 840262395Sbapt * @param chunk 841262395Sbapt * @return true if a string has been parsed 842262395Sbapt */ 843262395Sbaptstatic bool 844262395Sbaptucl_lex_json_string (struct ucl_parser *parser, 845262395Sbapt struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 846262395Sbapt{ 847262395Sbapt const unsigned char *p = chunk->pos; 848262395Sbapt unsigned char c; 849262395Sbapt int i; 850262395Sbapt 851262395Sbapt while (p < chunk->end) { 852262395Sbapt c = *p; 853262395Sbapt if (c < 0x1F) { 854262395Sbapt /* Unmasked control character */ 855262395Sbapt if (c == '\n') { 856262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err); 857262395Sbapt } 858262395Sbapt else { 859262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err); 860262395Sbapt } 861262395Sbapt return false; 862262395Sbapt } 863262395Sbapt else if (c == '\\') { 864262395Sbapt ucl_chunk_skipc (chunk, p); 865262395Sbapt c = *p; 866262395Sbapt if (p >= chunk->end) { 867262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 868262395Sbapt return false; 869262395Sbapt } 870262395Sbapt else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 871262395Sbapt if (c == 'u') { 872262395Sbapt ucl_chunk_skipc (chunk, p); 873262395Sbapt for (i = 0; i < 4 && p < chunk->end; i ++) { 874262395Sbapt if (!isxdigit (*p)) { 875262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err); 876262395Sbapt return false; 877262395Sbapt } 878262395Sbapt ucl_chunk_skipc (chunk, p); 879262395Sbapt } 880262395Sbapt if (p >= chunk->end) { 881262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 882262395Sbapt return false; 883262395Sbapt } 884262395Sbapt } 885262395Sbapt else { 886262395Sbapt ucl_chunk_skipc (chunk, p); 887262395Sbapt } 888262395Sbapt } 889262395Sbapt *need_unescape = true; 890262395Sbapt *ucl_escape = true; 891262395Sbapt continue; 892262395Sbapt } 893262395Sbapt else if (c == '"') { 894262395Sbapt ucl_chunk_skipc (chunk, p); 895262395Sbapt return true; 896262395Sbapt } 897262395Sbapt else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 898262395Sbapt *ucl_escape = true; 899262395Sbapt } 900262395Sbapt else if (c == '$') { 901262395Sbapt *var_expand = true; 902262395Sbapt } 903262395Sbapt ucl_chunk_skipc (chunk, p); 904262395Sbapt } 905262395Sbapt 906262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err); 907262395Sbapt return false; 908262395Sbapt} 909262395Sbapt 910262395Sbapt/** 911262395Sbapt * Parse a key in an object 912262395Sbapt * @param parser 913262395Sbapt * @param chunk 914262395Sbapt * @return true if a key has been parsed 915262395Sbapt */ 916262395Sbaptstatic bool 917262395Sbaptucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object) 918262395Sbapt{ 919262395Sbapt const unsigned char *p, *c = NULL, *end, *t; 920262395Sbapt const char *key = NULL; 921262395Sbapt bool got_quote = false, got_eq = false, got_semicolon = false, 922262395Sbapt need_unescape = false, ucl_escape = false, var_expand = false, 923262395Sbapt got_content = false, got_sep = false; 924262395Sbapt ucl_object_t *nobj, *tobj; 925262395Sbapt ucl_hash_t *container; 926262395Sbapt ssize_t keylen; 927262395Sbapt 928262395Sbapt p = chunk->pos; 929262395Sbapt 930262395Sbapt if (*p == '.') { 931262395Sbapt /* It is macro actually */ 932262395Sbapt ucl_chunk_skipc (chunk, p); 933262395Sbapt parser->prev_state = parser->state; 934262395Sbapt parser->state = UCL_STATE_MACRO_NAME; 935262395Sbapt return true; 936262395Sbapt } 937262395Sbapt while (p < chunk->end) { 938262395Sbapt /* 939262395Sbapt * A key must start with alpha, number, '/' or '_' and end with space character 940262395Sbapt */ 941262395Sbapt if (c == NULL) { 942262395Sbapt if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 943262395Sbapt if (!ucl_skip_comments (parser)) { 944262395Sbapt return false; 945262395Sbapt } 946262395Sbapt p = chunk->pos; 947262395Sbapt } 948262395Sbapt else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 949262395Sbapt ucl_chunk_skipc (chunk, p); 950262395Sbapt } 951262395Sbapt else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 952262395Sbapt /* The first symbol */ 953262395Sbapt c = p; 954262395Sbapt ucl_chunk_skipc (chunk, p); 955262395Sbapt got_content = true; 956262395Sbapt } 957262395Sbapt else if (*p == '"') { 958262395Sbapt /* JSON style key */ 959262395Sbapt c = p + 1; 960262395Sbapt got_quote = true; 961262395Sbapt got_content = true; 962262395Sbapt ucl_chunk_skipc (chunk, p); 963262395Sbapt } 964262395Sbapt else if (*p == '}') { 965262395Sbapt /* We have actually end of an object */ 966262395Sbapt *end_of_object = true; 967262395Sbapt return true; 968262395Sbapt } 969262395Sbapt else if (*p == '.') { 970262395Sbapt ucl_chunk_skipc (chunk, p); 971262395Sbapt parser->prev_state = parser->state; 972262395Sbapt parser->state = UCL_STATE_MACRO_NAME; 973262395Sbapt return true; 974262395Sbapt } 975262395Sbapt else { 976262395Sbapt /* Invalid identifier */ 977262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err); 978262395Sbapt return false; 979262395Sbapt } 980262395Sbapt } 981262395Sbapt else { 982262395Sbapt /* Parse the body of a key */ 983262395Sbapt if (!got_quote) { 984262395Sbapt if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 985262395Sbapt got_content = true; 986262395Sbapt ucl_chunk_skipc (chunk, p); 987262395Sbapt } 988262395Sbapt else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 989262395Sbapt end = p; 990262395Sbapt break; 991262395Sbapt } 992262395Sbapt else { 993262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err); 994262395Sbapt return false; 995262395Sbapt } 996262395Sbapt } 997262395Sbapt else { 998262395Sbapt /* We need to parse json like quoted string */ 999262395Sbapt if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1000262395Sbapt return false; 1001262395Sbapt } 1002262395Sbapt /* Always escape keys obtained via json */ 1003262395Sbapt end = chunk->pos - 1; 1004262395Sbapt p = chunk->pos; 1005262395Sbapt break; 1006262395Sbapt } 1007262395Sbapt } 1008262395Sbapt } 1009262395Sbapt 1010262395Sbapt if (p >= chunk->end && got_content) { 1011262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1012262395Sbapt return false; 1013262395Sbapt } 1014262395Sbapt else if (!got_content) { 1015262395Sbapt return true; 1016262395Sbapt } 1017262395Sbapt *end_of_object = false; 1018262395Sbapt /* We are now at the end of the key, need to parse the rest */ 1019262395Sbapt while (p < chunk->end) { 1020262395Sbapt if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1021262395Sbapt ucl_chunk_skipc (chunk, p); 1022262395Sbapt } 1023262395Sbapt else if (*p == '=') { 1024262395Sbapt if (!got_eq && !got_semicolon) { 1025262395Sbapt ucl_chunk_skipc (chunk, p); 1026262395Sbapt got_eq = true; 1027262395Sbapt } 1028262395Sbapt else { 1029262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err); 1030262395Sbapt return false; 1031262395Sbapt } 1032262395Sbapt } 1033262395Sbapt else if (*p == ':') { 1034262395Sbapt if (!got_eq && !got_semicolon) { 1035262395Sbapt ucl_chunk_skipc (chunk, p); 1036262395Sbapt got_semicolon = true; 1037262395Sbapt } 1038262395Sbapt else { 1039262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err); 1040262395Sbapt return false; 1041262395Sbapt } 1042262395Sbapt } 1043262395Sbapt else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1044262395Sbapt /* Check for comment */ 1045262395Sbapt if (!ucl_skip_comments (parser)) { 1046262395Sbapt return false; 1047262395Sbapt } 1048262395Sbapt p = chunk->pos; 1049262395Sbapt } 1050262395Sbapt else { 1051262395Sbapt /* Start value */ 1052262395Sbapt break; 1053262395Sbapt } 1054262395Sbapt } 1055262395Sbapt 1056262395Sbapt if (p >= chunk->end && got_content) { 1057262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1058262395Sbapt return false; 1059262395Sbapt } 1060262395Sbapt 1061262395Sbapt got_sep = got_semicolon || got_eq; 1062262395Sbapt 1063262395Sbapt if (!got_sep) { 1064262395Sbapt /* 1065262395Sbapt * Maybe we have more keys nested, so search for termination character. 1066262395Sbapt * Possible choices: 1067262395Sbapt * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1068262395Sbapt * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1069262395Sbapt * 3) key1 value[;,\n] <- we treat that as linear object 1070262395Sbapt */ 1071262395Sbapt t = p; 1072262395Sbapt *next_key = false; 1073262395Sbapt while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1074262395Sbapt t ++; 1075262395Sbapt } 1076262395Sbapt /* Check first non-space character after a key */ 1077262395Sbapt if (*t != '{' && *t != '[') { 1078262395Sbapt while (t < chunk->end) { 1079262395Sbapt if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1080262395Sbapt break; 1081262395Sbapt } 1082262395Sbapt else if (*t == '{' || *t == '[') { 1083262395Sbapt *next_key = true; 1084262395Sbapt break; 1085262395Sbapt } 1086262395Sbapt t ++; 1087262395Sbapt } 1088262395Sbapt } 1089262395Sbapt } 1090262395Sbapt 1091262395Sbapt /* Create a new object */ 1092262395Sbapt nobj = ucl_object_new (); 1093262395Sbapt keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1094262395Sbapt &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1095262395Sbapt if (keylen == -1) { 1096268896Sbapt ucl_object_unref (nobj); 1097262395Sbapt return false; 1098262395Sbapt } 1099262395Sbapt else if (keylen == 0) { 1100262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1101268896Sbapt ucl_object_unref (nobj); 1102262395Sbapt return false; 1103262395Sbapt } 1104262395Sbapt 1105262395Sbapt container = parser->stack->obj->value.ov; 1106262395Sbapt nobj->key = key; 1107262395Sbapt nobj->keylen = keylen; 1108268896Sbapt tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); 1109262395Sbapt if (tobj == NULL) { 1110262395Sbapt container = ucl_hash_insert_object (container, nobj); 1111262395Sbapt nobj->prev = nobj; 1112262395Sbapt nobj->next = NULL; 1113262395Sbapt parser->stack->obj->len ++; 1114262395Sbapt } 1115262395Sbapt else { 1116262395Sbapt DL_APPEND (tobj, nobj); 1117262395Sbapt } 1118262395Sbapt 1119262395Sbapt if (ucl_escape) { 1120262395Sbapt nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1121262395Sbapt } 1122262395Sbapt parser->stack->obj->value.ov = container; 1123262395Sbapt 1124262395Sbapt parser->cur_obj = nobj; 1125262395Sbapt 1126262395Sbapt return true; 1127262395Sbapt} 1128262395Sbapt 1129262395Sbapt/** 1130262395Sbapt * Parse a cl string 1131262395Sbapt * @param parser 1132262395Sbapt * @param chunk 1133262395Sbapt * @return true if a key has been parsed 1134262395Sbapt */ 1135262395Sbaptstatic bool 1136262395Sbaptucl_parse_string_value (struct ucl_parser *parser, 1137262395Sbapt struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1138262395Sbapt{ 1139262395Sbapt const unsigned char *p; 1140262395Sbapt enum { 1141262395Sbapt UCL_BRACE_ROUND = 0, 1142262395Sbapt UCL_BRACE_SQUARE, 1143262395Sbapt UCL_BRACE_FIGURE 1144262395Sbapt }; 1145262395Sbapt int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1146262395Sbapt 1147262395Sbapt p = chunk->pos; 1148262395Sbapt 1149262395Sbapt while (p < chunk->end) { 1150262395Sbapt 1151262395Sbapt /* Skip pairs of figure braces */ 1152262395Sbapt if (*p == '{') { 1153262395Sbapt braces[UCL_BRACE_FIGURE][0] ++; 1154262395Sbapt } 1155262395Sbapt else if (*p == '}') { 1156262395Sbapt braces[UCL_BRACE_FIGURE][1] ++; 1157262395Sbapt if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1158262395Sbapt /* This is not a termination symbol, continue */ 1159262395Sbapt ucl_chunk_skipc (chunk, p); 1160262395Sbapt continue; 1161262395Sbapt } 1162262395Sbapt } 1163262395Sbapt /* Skip pairs of square braces */ 1164262395Sbapt else if (*p == '[') { 1165262395Sbapt braces[UCL_BRACE_SQUARE][0] ++; 1166262395Sbapt } 1167262395Sbapt else if (*p == ']') { 1168262395Sbapt braces[UCL_BRACE_SQUARE][1] ++; 1169262395Sbapt if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1170262395Sbapt /* This is not a termination symbol, continue */ 1171262395Sbapt ucl_chunk_skipc (chunk, p); 1172262395Sbapt continue; 1173262395Sbapt } 1174262395Sbapt } 1175262395Sbapt else if (*p == '$') { 1176262395Sbapt *var_expand = true; 1177262395Sbapt } 1178262395Sbapt else if (*p == '\\') { 1179262395Sbapt *need_unescape = true; 1180262395Sbapt ucl_chunk_skipc (chunk, p); 1181262395Sbapt if (p < chunk->end) { 1182262395Sbapt ucl_chunk_skipc (chunk, p); 1183262395Sbapt } 1184262395Sbapt continue; 1185262395Sbapt } 1186262395Sbapt 1187262395Sbapt if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1188262395Sbapt break; 1189262395Sbapt } 1190262395Sbapt ucl_chunk_skipc (chunk, p); 1191262395Sbapt } 1192262395Sbapt 1193262395Sbapt if (p >= chunk->end) { 1194262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err); 1195262395Sbapt return false; 1196262395Sbapt } 1197262395Sbapt 1198262395Sbapt return true; 1199262395Sbapt} 1200262395Sbapt 1201262395Sbapt/** 1202262395Sbapt * Parse multiline string ending with \n{term}\n 1203262395Sbapt * @param parser 1204262395Sbapt * @param chunk 1205262395Sbapt * @param term 1206262395Sbapt * @param term_len 1207262395Sbapt * @return size of multiline string or 0 in case of error 1208262395Sbapt */ 1209262395Sbaptstatic int 1210262395Sbaptucl_parse_multiline_string (struct ucl_parser *parser, 1211262395Sbapt struct ucl_chunk *chunk, const unsigned char *term, 1212262395Sbapt int term_len, unsigned char const **beg, 1213262395Sbapt bool *var_expand) 1214262395Sbapt{ 1215262395Sbapt const unsigned char *p, *c; 1216262395Sbapt bool newline = false; 1217262395Sbapt int len = 0; 1218262395Sbapt 1219262395Sbapt p = chunk->pos; 1220262395Sbapt 1221262395Sbapt c = p; 1222262395Sbapt 1223262395Sbapt while (p < chunk->end) { 1224262395Sbapt if (newline) { 1225262395Sbapt if (chunk->end - p < term_len) { 1226262395Sbapt return 0; 1227262395Sbapt } 1228262395Sbapt else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) { 1229262395Sbapt len = p - c; 1230262395Sbapt chunk->remain -= term_len; 1231262395Sbapt chunk->pos = p + term_len; 1232262395Sbapt chunk->column = term_len; 1233262395Sbapt *beg = c; 1234262395Sbapt break; 1235262395Sbapt } 1236262395Sbapt } 1237262395Sbapt if (*p == '\n') { 1238262395Sbapt newline = true; 1239262395Sbapt } 1240262395Sbapt else { 1241262395Sbapt if (*p == '$') { 1242262395Sbapt *var_expand = true; 1243262395Sbapt } 1244262395Sbapt newline = false; 1245262395Sbapt } 1246262395Sbapt ucl_chunk_skipc (chunk, p); 1247262395Sbapt } 1248262395Sbapt 1249262395Sbapt return len; 1250262395Sbapt} 1251262395Sbapt 1252263019Sbaptstatic ucl_object_t* 1253263019Sbaptucl_get_value_object (struct ucl_parser *parser) 1254263019Sbapt{ 1255263019Sbapt ucl_object_t *t, *obj = NULL; 1256263019Sbapt 1257263019Sbapt if (parser->stack->obj->type == UCL_ARRAY) { 1258263019Sbapt /* Object must be allocated */ 1259263019Sbapt obj = ucl_object_new (); 1260263019Sbapt t = parser->stack->obj->value.av; 1261263019Sbapt DL_APPEND (t, obj); 1262263019Sbapt parser->cur_obj = obj; 1263263019Sbapt parser->stack->obj->value.av = t; 1264263019Sbapt parser->stack->obj->len ++; 1265263019Sbapt } 1266263019Sbapt else { 1267263019Sbapt /* Object has been already allocated */ 1268263019Sbapt obj = parser->cur_obj; 1269263019Sbapt } 1270263019Sbapt 1271263019Sbapt return obj; 1272263019Sbapt} 1273263019Sbapt 1274262395Sbapt/** 1275262395Sbapt * Handle value data 1276262395Sbapt * @param parser 1277262395Sbapt * @param chunk 1278262395Sbapt * @return 1279262395Sbapt */ 1280262395Sbaptstatic bool 1281262395Sbaptucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1282262395Sbapt{ 1283262395Sbapt const unsigned char *p, *c; 1284263019Sbapt ucl_object_t *obj = NULL; 1285262395Sbapt unsigned int stripped_spaces; 1286262395Sbapt int str_len; 1287262395Sbapt bool need_unescape = false, ucl_escape = false, var_expand = false; 1288262395Sbapt 1289262395Sbapt p = chunk->pos; 1290262395Sbapt 1291263019Sbapt /* Skip any spaces and comments */ 1292263019Sbapt if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1293263019Sbapt (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1294263019Sbapt while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1295263019Sbapt ucl_chunk_skipc (chunk, p); 1296263019Sbapt } 1297263019Sbapt if (!ucl_skip_comments (parser)) { 1298263019Sbapt return false; 1299263019Sbapt } 1300263019Sbapt p = chunk->pos; 1301263019Sbapt } 1302263019Sbapt 1303262395Sbapt while (p < chunk->end) { 1304262395Sbapt c = p; 1305262395Sbapt switch (*p) { 1306262395Sbapt case '"': 1307263019Sbapt obj = ucl_get_value_object (parser); 1308262395Sbapt ucl_chunk_skipc (chunk, p); 1309262395Sbapt if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1310262395Sbapt return false; 1311262395Sbapt } 1312262395Sbapt str_len = chunk->pos - c - 2; 1313262395Sbapt obj->type = UCL_STRING; 1314262395Sbapt if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], 1315262395Sbapt &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { 1316262395Sbapt return false; 1317262395Sbapt } 1318262395Sbapt obj->len = str_len; 1319262395Sbapt parser->state = UCL_STATE_AFTER_VALUE; 1320262395Sbapt p = chunk->pos; 1321262395Sbapt return true; 1322262395Sbapt break; 1323262395Sbapt case '{': 1324263019Sbapt obj = ucl_get_value_object (parser); 1325262395Sbapt /* We have a new object */ 1326262395Sbapt obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); 1327268896Sbapt if (obj == NULL) { 1328268896Sbapt return false; 1329268896Sbapt } 1330262395Sbapt 1331262395Sbapt ucl_chunk_skipc (chunk, p); 1332262395Sbapt return true; 1333262395Sbapt break; 1334262395Sbapt case '[': 1335263019Sbapt obj = ucl_get_value_object (parser); 1336262395Sbapt /* We have a new array */ 1337262395Sbapt obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); 1338268896Sbapt if (obj == NULL) { 1339268896Sbapt return false; 1340268896Sbapt } 1341262395Sbapt 1342262395Sbapt ucl_chunk_skipc (chunk, p); 1343262395Sbapt return true; 1344262395Sbapt break; 1345263019Sbapt case ']': 1346263019Sbapt /* We have the array ending */ 1347263019Sbapt if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1348263019Sbapt parser->state = UCL_STATE_AFTER_VALUE; 1349263019Sbapt return true; 1350263019Sbapt } 1351263019Sbapt else { 1352263019Sbapt goto parse_string; 1353263019Sbapt } 1354263019Sbapt break; 1355262395Sbapt case '<': 1356263019Sbapt obj = ucl_get_value_object (parser); 1357262395Sbapt /* We have something like multiline value, which must be <<[A-Z]+\n */ 1358262395Sbapt if (chunk->end - p > 3) { 1359262395Sbapt if (memcmp (p, "<<", 2) == 0) { 1360262395Sbapt p += 2; 1361262395Sbapt /* We allow only uppercase characters in multiline definitions */ 1362262395Sbapt while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1363262395Sbapt p ++; 1364262395Sbapt } 1365262395Sbapt if (*p =='\n') { 1366262395Sbapt /* Set chunk positions and start multiline parsing */ 1367262395Sbapt c += 2; 1368262395Sbapt chunk->remain -= p - c; 1369262395Sbapt chunk->pos = p + 1; 1370262395Sbapt chunk->column = 0; 1371262395Sbapt chunk->line ++; 1372262395Sbapt if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1373262395Sbapt p - c, &c, &var_expand)) == 0) { 1374262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err); 1375262395Sbapt return false; 1376262395Sbapt } 1377262395Sbapt obj->type = UCL_STRING; 1378262395Sbapt if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1379262395Sbapt &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) { 1380262395Sbapt return false; 1381262395Sbapt } 1382262395Sbapt obj->len = str_len; 1383262395Sbapt parser->state = UCL_STATE_AFTER_VALUE; 1384262395Sbapt return true; 1385262395Sbapt } 1386262395Sbapt } 1387262395Sbapt } 1388262395Sbapt /* Fallback to ordinary strings */ 1389262395Sbapt default: 1390263019Sbaptparse_string: 1391263019Sbapt if (obj == NULL) { 1392263019Sbapt obj = ucl_get_value_object (parser); 1393262395Sbapt } 1394262395Sbapt /* Parse atom */ 1395262395Sbapt if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1396262395Sbapt if (!ucl_lex_number (parser, chunk, obj)) { 1397262395Sbapt if (parser->state == UCL_STATE_ERROR) { 1398262395Sbapt return false; 1399262395Sbapt } 1400262395Sbapt } 1401262395Sbapt else { 1402262395Sbapt parser->state = UCL_STATE_AFTER_VALUE; 1403262395Sbapt return true; 1404262395Sbapt } 1405262395Sbapt /* Fallback to normal string */ 1406262395Sbapt } 1407262395Sbapt 1408262395Sbapt if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) { 1409262395Sbapt return false; 1410262395Sbapt } 1411262395Sbapt /* Cut trailing spaces */ 1412262395Sbapt stripped_spaces = 0; 1413262395Sbapt while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1414262395Sbapt UCL_CHARACTER_WHITESPACE)) { 1415262395Sbapt stripped_spaces ++; 1416262395Sbapt } 1417262395Sbapt str_len = chunk->pos - c - stripped_spaces; 1418262395Sbapt if (str_len <= 0) { 1419262395Sbapt ucl_set_err (chunk, 0, "string value must not be empty", &parser->err); 1420262395Sbapt return false; 1421262395Sbapt } 1422262395Sbapt else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1423262395Sbapt obj->len = 0; 1424262395Sbapt obj->type = UCL_NULL; 1425262395Sbapt } 1426262395Sbapt else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1427262395Sbapt obj->type = UCL_STRING; 1428262395Sbapt if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1429262395Sbapt &obj->value.sv, str_len, need_unescape, 1430262395Sbapt false, var_expand)) == -1) { 1431262395Sbapt return false; 1432262395Sbapt } 1433262395Sbapt obj->len = str_len; 1434262395Sbapt } 1435262395Sbapt parser->state = UCL_STATE_AFTER_VALUE; 1436262395Sbapt p = chunk->pos; 1437262395Sbapt 1438262395Sbapt return true; 1439262395Sbapt break; 1440262395Sbapt } 1441262395Sbapt } 1442262395Sbapt 1443262395Sbapt return true; 1444262395Sbapt} 1445262395Sbapt 1446262395Sbapt/** 1447262395Sbapt * Handle after value data 1448262395Sbapt * @param parser 1449262395Sbapt * @param chunk 1450262395Sbapt * @return 1451262395Sbapt */ 1452262395Sbaptstatic bool 1453262395Sbaptucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1454262395Sbapt{ 1455262395Sbapt const unsigned char *p; 1456262395Sbapt bool got_sep = false; 1457262395Sbapt struct ucl_stack *st; 1458262395Sbapt 1459262395Sbapt p = chunk->pos; 1460262395Sbapt 1461262395Sbapt while (p < chunk->end) { 1462262395Sbapt if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1463262395Sbapt /* Skip whitespaces */ 1464262395Sbapt ucl_chunk_skipc (chunk, p); 1465262395Sbapt } 1466262395Sbapt else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1467262395Sbapt /* Skip comment */ 1468262395Sbapt if (!ucl_skip_comments (parser)) { 1469262395Sbapt return false; 1470262395Sbapt } 1471262395Sbapt /* Treat comment as a separator */ 1472262395Sbapt got_sep = true; 1473262395Sbapt p = chunk->pos; 1474262395Sbapt } 1475262395Sbapt else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1476262395Sbapt if (*p == '}' || *p == ']') { 1477262395Sbapt if (parser->stack == NULL) { 1478262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err); 1479262395Sbapt return false; 1480262395Sbapt } 1481262395Sbapt if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1482262395Sbapt (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1483262395Sbapt 1484262395Sbapt /* Pop all nested objects from a stack */ 1485262395Sbapt st = parser->stack; 1486262395Sbapt parser->stack = st->next; 1487262395Sbapt UCL_FREE (sizeof (struct ucl_stack), st); 1488262395Sbapt 1489262395Sbapt while (parser->stack != NULL) { 1490262395Sbapt st = parser->stack; 1491262395Sbapt if (st->next == NULL || st->next->level == st->level) { 1492262395Sbapt break; 1493262395Sbapt } 1494262395Sbapt parser->stack = st->next; 1495262395Sbapt UCL_FREE (sizeof (struct ucl_stack), st); 1496262395Sbapt } 1497262395Sbapt } 1498262395Sbapt else { 1499262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err); 1500262395Sbapt return false; 1501262395Sbapt } 1502262395Sbapt 1503262395Sbapt if (parser->stack == NULL) { 1504262395Sbapt /* Ignore everything after a top object */ 1505262395Sbapt return true; 1506262395Sbapt } 1507262395Sbapt else { 1508262395Sbapt ucl_chunk_skipc (chunk, p); 1509262395Sbapt } 1510262395Sbapt got_sep = true; 1511262395Sbapt } 1512262395Sbapt else { 1513262395Sbapt /* Got a separator */ 1514262395Sbapt got_sep = true; 1515262395Sbapt ucl_chunk_skipc (chunk, p); 1516262395Sbapt } 1517262395Sbapt } 1518262395Sbapt else { 1519262395Sbapt /* Anything else */ 1520262395Sbapt if (!got_sep) { 1521262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err); 1522262395Sbapt return false; 1523262395Sbapt } 1524262395Sbapt return true; 1525262395Sbapt } 1526262395Sbapt } 1527262395Sbapt 1528262395Sbapt return true; 1529262395Sbapt} 1530262395Sbapt 1531262395Sbapt/** 1532262395Sbapt * Handle macro data 1533262395Sbapt * @param parser 1534262395Sbapt * @param chunk 1535262395Sbapt * @return 1536262395Sbapt */ 1537262395Sbaptstatic bool 1538262395Sbaptucl_parse_macro_value (struct ucl_parser *parser, 1539262395Sbapt struct ucl_chunk *chunk, struct ucl_macro *macro, 1540262395Sbapt unsigned char const **macro_start, size_t *macro_len) 1541262395Sbapt{ 1542262395Sbapt const unsigned char *p, *c; 1543262395Sbapt bool need_unescape = false, ucl_escape = false, var_expand = false; 1544262395Sbapt 1545262395Sbapt p = chunk->pos; 1546262395Sbapt 1547262395Sbapt switch (*p) { 1548262395Sbapt case '"': 1549262395Sbapt /* We have macro value encoded in quotes */ 1550262395Sbapt c = p; 1551262395Sbapt ucl_chunk_skipc (chunk, p); 1552262395Sbapt if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1553262395Sbapt return false; 1554262395Sbapt } 1555262395Sbapt 1556262395Sbapt *macro_start = c + 1; 1557262395Sbapt *macro_len = chunk->pos - c - 2; 1558262395Sbapt p = chunk->pos; 1559262395Sbapt break; 1560262395Sbapt case '{': 1561262395Sbapt /* We got a multiline macro body */ 1562262395Sbapt ucl_chunk_skipc (chunk, p); 1563262395Sbapt /* Skip spaces at the beginning */ 1564262395Sbapt while (p < chunk->end) { 1565262395Sbapt if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1566262395Sbapt ucl_chunk_skipc (chunk, p); 1567262395Sbapt } 1568262395Sbapt else { 1569262395Sbapt break; 1570262395Sbapt } 1571262395Sbapt } 1572262395Sbapt c = p; 1573262395Sbapt while (p < chunk->end) { 1574262395Sbapt if (*p == '}') { 1575262395Sbapt break; 1576262395Sbapt } 1577262395Sbapt ucl_chunk_skipc (chunk, p); 1578262395Sbapt } 1579262395Sbapt *macro_start = c; 1580262395Sbapt *macro_len = p - c; 1581262395Sbapt ucl_chunk_skipc (chunk, p); 1582262395Sbapt break; 1583262395Sbapt default: 1584262395Sbapt /* Macro is not enclosed in quotes or braces */ 1585262395Sbapt c = p; 1586262395Sbapt while (p < chunk->end) { 1587262395Sbapt if (ucl_lex_is_atom_end (*p)) { 1588262395Sbapt break; 1589262395Sbapt } 1590262395Sbapt ucl_chunk_skipc (chunk, p); 1591262395Sbapt } 1592262395Sbapt *macro_start = c; 1593262395Sbapt *macro_len = p - c; 1594262395Sbapt break; 1595262395Sbapt } 1596262395Sbapt 1597262395Sbapt /* We are at the end of a macro */ 1598262395Sbapt /* Skip ';' and space characters and return to previous state */ 1599262395Sbapt while (p < chunk->end) { 1600262395Sbapt if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 1601262395Sbapt break; 1602262395Sbapt } 1603262395Sbapt ucl_chunk_skipc (chunk, p); 1604262395Sbapt } 1605262395Sbapt return true; 1606262395Sbapt} 1607262395Sbapt 1608262395Sbapt/** 1609262395Sbapt * Handle the main states of rcl parser 1610262395Sbapt * @param parser parser structure 1611262395Sbapt * @param data the pointer to the beginning of a chunk 1612262395Sbapt * @param len the length of a chunk 1613262395Sbapt * @return true if chunk has been parsed and false in case of error 1614262395Sbapt */ 1615262395Sbaptstatic bool 1616262395Sbaptucl_state_machine (struct ucl_parser *parser) 1617262395Sbapt{ 1618262395Sbapt ucl_object_t *obj; 1619262395Sbapt struct ucl_chunk *chunk = parser->chunks; 1620262395Sbapt const unsigned char *p, *c = NULL, *macro_start = NULL; 1621262395Sbapt unsigned char *macro_escaped; 1622262395Sbapt size_t macro_len = 0; 1623262395Sbapt struct ucl_macro *macro = NULL; 1624262395Sbapt bool next_key = false, end_of_object = false; 1625262395Sbapt 1626262395Sbapt if (parser->top_obj == NULL) { 1627262395Sbapt if (*chunk->pos == '[') { 1628262395Sbapt obj = ucl_add_parser_stack (NULL, parser, true, 0); 1629262395Sbapt } 1630262395Sbapt else { 1631262395Sbapt obj = ucl_add_parser_stack (NULL, parser, false, 0); 1632262395Sbapt } 1633268896Sbapt if (obj == NULL) { 1634268896Sbapt return false; 1635268896Sbapt } 1636262395Sbapt parser->top_obj = obj; 1637262395Sbapt parser->cur_obj = obj; 1638262395Sbapt parser->state = UCL_STATE_INIT; 1639262395Sbapt } 1640262395Sbapt 1641262395Sbapt p = chunk->pos; 1642262395Sbapt while (chunk->pos < chunk->end) { 1643262395Sbapt switch (parser->state) { 1644262395Sbapt case UCL_STATE_INIT: 1645262395Sbapt /* 1646262395Sbapt * At the init state we can either go to the parse array or object 1647262395Sbapt * if we got [ or { correspondingly or can just treat new data as 1648262395Sbapt * a key of newly created object 1649262395Sbapt */ 1650262395Sbapt obj = parser->cur_obj; 1651262395Sbapt if (!ucl_skip_comments (parser)) { 1652262395Sbapt parser->prev_state = parser->state; 1653262395Sbapt parser->state = UCL_STATE_ERROR; 1654262395Sbapt return false; 1655262395Sbapt } 1656262395Sbapt else { 1657262395Sbapt p = chunk->pos; 1658262395Sbapt if (*p == '[') { 1659262395Sbapt parser->state = UCL_STATE_VALUE; 1660262395Sbapt ucl_chunk_skipc (chunk, p); 1661262395Sbapt } 1662262395Sbapt else { 1663262395Sbapt parser->state = UCL_STATE_KEY; 1664262395Sbapt if (*p == '{') { 1665262395Sbapt ucl_chunk_skipc (chunk, p); 1666262395Sbapt } 1667262395Sbapt } 1668262395Sbapt } 1669262395Sbapt break; 1670262395Sbapt case UCL_STATE_KEY: 1671262395Sbapt /* Skip any spaces */ 1672262395Sbapt while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1673262395Sbapt ucl_chunk_skipc (chunk, p); 1674262395Sbapt } 1675262395Sbapt if (*p == '}') { 1676262395Sbapt /* We have the end of an object */ 1677262395Sbapt parser->state = UCL_STATE_AFTER_VALUE; 1678262395Sbapt continue; 1679262395Sbapt } 1680262395Sbapt if (parser->stack == NULL) { 1681262395Sbapt /* No objects are on stack, but we want to parse a key */ 1682262395Sbapt ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser " 1683262395Sbapt "expects a key", &parser->err); 1684262395Sbapt parser->prev_state = parser->state; 1685262395Sbapt parser->state = UCL_STATE_ERROR; 1686262395Sbapt return false; 1687262395Sbapt } 1688262395Sbapt if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 1689262395Sbapt parser->prev_state = parser->state; 1690262395Sbapt parser->state = UCL_STATE_ERROR; 1691262395Sbapt return false; 1692262395Sbapt } 1693262395Sbapt if (end_of_object) { 1694262395Sbapt p = chunk->pos; 1695262395Sbapt parser->state = UCL_STATE_AFTER_VALUE; 1696262395Sbapt continue; 1697262395Sbapt } 1698262395Sbapt else if (parser->state != UCL_STATE_MACRO_NAME) { 1699262395Sbapt if (next_key && parser->stack->obj->type == UCL_OBJECT) { 1700262395Sbapt /* Parse more keys and nest objects accordingly */ 1701268896Sbapt obj = ucl_add_parser_stack (parser->cur_obj, parser, false, 1702268896Sbapt parser->stack->level + 1); 1703268896Sbapt if (obj == NULL) { 1704268896Sbapt return false; 1705268896Sbapt } 1706262395Sbapt } 1707262395Sbapt else { 1708262395Sbapt parser->state = UCL_STATE_VALUE; 1709262395Sbapt } 1710262395Sbapt } 1711262395Sbapt else { 1712262395Sbapt c = chunk->pos; 1713262395Sbapt } 1714262395Sbapt p = chunk->pos; 1715262395Sbapt break; 1716262395Sbapt case UCL_STATE_VALUE: 1717262395Sbapt /* We need to check what we do have */ 1718262395Sbapt if (!ucl_parse_value (parser, chunk)) { 1719262395Sbapt parser->prev_state = parser->state; 1720262395Sbapt parser->state = UCL_STATE_ERROR; 1721262395Sbapt return false; 1722262395Sbapt } 1723262395Sbapt /* State is set in ucl_parse_value call */ 1724262395Sbapt p = chunk->pos; 1725262395Sbapt break; 1726262395Sbapt case UCL_STATE_AFTER_VALUE: 1727262395Sbapt if (!ucl_parse_after_value (parser, chunk)) { 1728262395Sbapt parser->prev_state = parser->state; 1729262395Sbapt parser->state = UCL_STATE_ERROR; 1730262395Sbapt return false; 1731262395Sbapt } 1732262395Sbapt if (parser->stack != NULL) { 1733262395Sbapt if (parser->stack->obj->type == UCL_OBJECT) { 1734262395Sbapt parser->state = UCL_STATE_KEY; 1735262395Sbapt } 1736262395Sbapt else { 1737262395Sbapt /* Array */ 1738262395Sbapt parser->state = UCL_STATE_VALUE; 1739262395Sbapt } 1740262395Sbapt } 1741262395Sbapt else { 1742262395Sbapt /* Skip everything at the end */ 1743262395Sbapt return true; 1744262395Sbapt } 1745262395Sbapt p = chunk->pos; 1746262395Sbapt break; 1747262395Sbapt case UCL_STATE_MACRO_NAME: 1748262395Sbapt if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1749262395Sbapt ucl_chunk_skipc (chunk, p); 1750262395Sbapt } 1751262395Sbapt else if (p - c > 0) { 1752262395Sbapt /* We got macro name */ 1753262395Sbapt macro_len = (size_t)(p - c); 1754262395Sbapt HASH_FIND (hh, parser->macroes, c, macro_len, macro); 1755262395Sbapt if (macro == NULL) { 1756262395Sbapt ucl_create_err (&parser->err, "error on line %d at column %d: " 1757262395Sbapt "unknown macro: '%.*s', character: '%c'", 1758262395Sbapt chunk->line, chunk->column, (int)(p - c), c, *chunk->pos); 1759262395Sbapt parser->state = UCL_STATE_ERROR; 1760262395Sbapt return false; 1761262395Sbapt } 1762262395Sbapt /* Now we need to skip all spaces */ 1763262395Sbapt while (p < chunk->end) { 1764262395Sbapt if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1765262395Sbapt if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1766262395Sbapt /* Skip comment */ 1767262395Sbapt if (!ucl_skip_comments (parser)) { 1768262395Sbapt return false; 1769262395Sbapt } 1770262395Sbapt p = chunk->pos; 1771262395Sbapt } 1772262395Sbapt break; 1773262395Sbapt } 1774262395Sbapt ucl_chunk_skipc (chunk, p); 1775262395Sbapt } 1776262395Sbapt parser->state = UCL_STATE_MACRO; 1777262395Sbapt } 1778262395Sbapt break; 1779262395Sbapt case UCL_STATE_MACRO: 1780262395Sbapt if (!ucl_parse_macro_value (parser, chunk, macro, 1781262395Sbapt ¯o_start, ¯o_len)) { 1782262395Sbapt parser->prev_state = parser->state; 1783262395Sbapt parser->state = UCL_STATE_ERROR; 1784262395Sbapt return false; 1785262395Sbapt } 1786262395Sbapt macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len); 1787262395Sbapt parser->state = parser->prev_state; 1788262395Sbapt if (macro_escaped == NULL) { 1789262395Sbapt if (!macro->handler (macro_start, macro_len, macro->ud)) { 1790262395Sbapt return false; 1791262395Sbapt } 1792262395Sbapt } 1793262395Sbapt else { 1794262395Sbapt if (!macro->handler (macro_escaped, macro_len, macro->ud)) { 1795262395Sbapt UCL_FREE (macro_len + 1, macro_escaped); 1796262395Sbapt return false; 1797262395Sbapt } 1798262395Sbapt UCL_FREE (macro_len + 1, macro_escaped); 1799262395Sbapt } 1800262395Sbapt p = chunk->pos; 1801262395Sbapt break; 1802262395Sbapt default: 1803262395Sbapt /* TODO: add all states */ 1804262395Sbapt ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err); 1805262395Sbapt parser->state = UCL_STATE_ERROR; 1806262395Sbapt return false; 1807262395Sbapt } 1808262395Sbapt } 1809262395Sbapt 1810262395Sbapt return true; 1811262395Sbapt} 1812262395Sbapt 1813262395Sbaptstruct ucl_parser* 1814262395Sbaptucl_parser_new (int flags) 1815262395Sbapt{ 1816262395Sbapt struct ucl_parser *new; 1817262395Sbapt 1818262395Sbapt new = UCL_ALLOC (sizeof (struct ucl_parser)); 1819268896Sbapt if (new == NULL) { 1820268896Sbapt return NULL; 1821268896Sbapt } 1822262395Sbapt memset (new, 0, sizeof (struct ucl_parser)); 1823262395Sbapt 1824262395Sbapt ucl_parser_register_macro (new, "include", ucl_include_handler, new); 1825262395Sbapt ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new); 1826262395Sbapt ucl_parser_register_macro (new, "includes", ucl_includes_handler, new); 1827262395Sbapt 1828262395Sbapt new->flags = flags; 1829262395Sbapt 1830262395Sbapt /* Initial assumption about filevars */ 1831262395Sbapt ucl_parser_set_filevars (new, NULL, false); 1832262395Sbapt 1833262395Sbapt return new; 1834262395Sbapt} 1835262395Sbapt 1836262395Sbapt 1837262395Sbaptvoid 1838262395Sbaptucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 1839262395Sbapt ucl_macro_handler handler, void* ud) 1840262395Sbapt{ 1841262395Sbapt struct ucl_macro *new; 1842262395Sbapt 1843268896Sbapt if (macro == NULL || handler == NULL) { 1844268896Sbapt return; 1845268896Sbapt } 1846262395Sbapt new = UCL_ALLOC (sizeof (struct ucl_macro)); 1847268896Sbapt if (new == NULL) { 1848268896Sbapt return; 1849268896Sbapt } 1850262395Sbapt memset (new, 0, sizeof (struct ucl_macro)); 1851262395Sbapt new->handler = handler; 1852262395Sbapt new->name = strdup (macro); 1853262395Sbapt new->ud = ud; 1854262395Sbapt HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 1855262395Sbapt} 1856262395Sbapt 1857262395Sbaptvoid 1858262395Sbaptucl_parser_register_variable (struct ucl_parser *parser, const char *var, 1859262395Sbapt const char *value) 1860262395Sbapt{ 1861262395Sbapt struct ucl_variable *new = NULL, *cur; 1862262395Sbapt 1863262395Sbapt if (var == NULL) { 1864262395Sbapt return; 1865262395Sbapt } 1866262395Sbapt 1867262395Sbapt /* Find whether a variable already exists */ 1868262395Sbapt LL_FOREACH (parser->variables, cur) { 1869262395Sbapt if (strcmp (cur->var, var) == 0) { 1870262395Sbapt new = cur; 1871262395Sbapt break; 1872262395Sbapt } 1873262395Sbapt } 1874262395Sbapt 1875262395Sbapt if (value == NULL) { 1876262395Sbapt 1877262395Sbapt if (new != NULL) { 1878262395Sbapt /* Remove variable */ 1879262395Sbapt LL_DELETE (parser->variables, new); 1880262395Sbapt free (new->var); 1881262395Sbapt free (new->value); 1882262395Sbapt UCL_FREE (sizeof (struct ucl_variable), new); 1883262395Sbapt } 1884262395Sbapt else { 1885262395Sbapt /* Do nothing */ 1886262395Sbapt return; 1887262395Sbapt } 1888262395Sbapt } 1889262395Sbapt else { 1890262395Sbapt if (new == NULL) { 1891262395Sbapt new = UCL_ALLOC (sizeof (struct ucl_variable)); 1892268896Sbapt if (new == NULL) { 1893268896Sbapt return; 1894268896Sbapt } 1895262395Sbapt memset (new, 0, sizeof (struct ucl_variable)); 1896262395Sbapt new->var = strdup (var); 1897262395Sbapt new->var_len = strlen (var); 1898262395Sbapt new->value = strdup (value); 1899262395Sbapt new->value_len = strlen (value); 1900262395Sbapt 1901262395Sbapt LL_PREPEND (parser->variables, new); 1902262395Sbapt } 1903262395Sbapt else { 1904262395Sbapt free (new->value); 1905262395Sbapt new->value = strdup (value); 1906262395Sbapt new->value_len = strlen (value); 1907262395Sbapt } 1908262395Sbapt } 1909262395Sbapt} 1910262395Sbapt 1911268896Sbaptvoid 1912268896Sbaptucl_parser_set_variables_handler (struct ucl_parser *parser, 1913268896Sbapt ucl_variable_handler handler, void *ud) 1914268896Sbapt{ 1915268896Sbapt parser->var_handler = handler; 1916268896Sbapt parser->var_data = ud; 1917268896Sbapt} 1918268896Sbapt 1919262395Sbaptbool 1920262395Sbaptucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 1921262395Sbapt size_t len) 1922262395Sbapt{ 1923262395Sbapt struct ucl_chunk *chunk; 1924262395Sbapt 1925268896Sbapt if (data == NULL || len == 0) { 1926268896Sbapt ucl_create_err (&parser->err, "invalid chunk added"); 1927268896Sbapt return false; 1928268896Sbapt } 1929262395Sbapt if (parser->state != UCL_STATE_ERROR) { 1930262395Sbapt chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 1931268896Sbapt if (chunk == NULL) { 1932268896Sbapt ucl_create_err (&parser->err, "cannot allocate chunk structure"); 1933268896Sbapt return false; 1934268896Sbapt } 1935262395Sbapt chunk->begin = data; 1936262395Sbapt chunk->remain = len; 1937262395Sbapt chunk->pos = chunk->begin; 1938262395Sbapt chunk->end = chunk->begin + len; 1939262395Sbapt chunk->line = 1; 1940262395Sbapt chunk->column = 0; 1941262395Sbapt LL_PREPEND (parser->chunks, chunk); 1942262395Sbapt parser->recursion ++; 1943262395Sbapt if (parser->recursion > UCL_MAX_RECURSION) { 1944262395Sbapt ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 1945262395Sbapt parser->recursion); 1946262395Sbapt return false; 1947262395Sbapt } 1948262395Sbapt return ucl_state_machine (parser); 1949262395Sbapt } 1950262395Sbapt 1951262395Sbapt ucl_create_err (&parser->err, "a parser is in an invalid state"); 1952262395Sbapt 1953262395Sbapt return false; 1954262395Sbapt} 1955268896Sbapt 1956268896Sbaptbool 1957268896Sbaptucl_parser_add_string (struct ucl_parser *parser, const char *data, 1958268896Sbapt size_t len) 1959268896Sbapt{ 1960268896Sbapt if (data == NULL) { 1961268896Sbapt ucl_create_err (&parser->err, "invalid string added"); 1962268896Sbapt return false; 1963268896Sbapt } 1964268896Sbapt if (len == 0) { 1965268896Sbapt len = strlen (data); 1966268896Sbapt } 1967268896Sbapt 1968268896Sbapt return ucl_parser_add_chunk (parser, (const unsigned char *)data, len); 1969268896Sbapt} 1970