Deleted Added
full compact
ucl_parser.c (262975) ucl_parser.c (263648)
1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#include "ucl.h"
25#include "ucl_internal.h"
26#include "ucl_chartable.h"
27
28/**
29 * @file rcl_parser.c
30 * The implementation of rcl parser
31 */
32
33struct ucl_parser_saved_state {
34 unsigned int line;
35 unsigned int column;
36 size_t remain;
37 const unsigned char *pos;
38};
39
40/**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47#define ucl_chunk_skipc(chunk, p) do{ \
48 if (*(p) == '\n') { \
49 (chunk)->line ++; \
50 (chunk)->column = 0; \
51 } \
52 else (chunk)->column ++; \
53 (p++); \
54 (chunk)->pos ++; \
55 (chunk)->remain --; \
56 } while (0)
57
58/**
59 * Save parser state
60 * @param chunk
61 * @param s
62 */
63static inline void
64ucl_chunk_save_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
65{
66 s->column = chunk->column;
67 s->pos = chunk->pos;
68 s->line = chunk->line;
69 s->remain = chunk->remain;
70}
71
72/**
73 * Restore parser state
74 * @param chunk
75 * @param s
76 */
77static inline void
78ucl_chunk_restore_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
79{
80 chunk->column = s->column;
81 chunk->pos = s->pos;
82 chunk->line = s->line;
83 chunk->remain = s->remain;
84}
85
86static inline void
87ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
88{
89 if (chunk->pos < chunk->end) {
90 if (isgraph (*chunk->pos)) {
91 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
92 chunk->line, chunk->column, str, *chunk->pos);
93 }
94 else {
95 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
96 chunk->line, chunk->column, str, (int)*chunk->pos);
97 }
98 }
99 else {
100 ucl_create_err (err, "error at the end of chunk: %s", str);
101 }
102}
103
104/**
105 * Skip all comments from the current pos resolving nested and multiline comments
106 * @param parser
107 * @return
108 */
109static bool
110ucl_skip_comments (struct ucl_parser *parser)
111{
112 struct ucl_chunk *chunk = parser->chunks;
113 const unsigned char *p;
114 int comments_nested = 0;
115
116 p = chunk->pos;
117
118start:
119 if (*p == '#') {
120 if (parser->state != UCL_STATE_SCOMMENT &&
121 parser->state != UCL_STATE_MCOMMENT) {
122 while (p < chunk->end) {
123 if (*p == '\n') {
124 ucl_chunk_skipc (chunk, p);
125 goto start;
126 }
127 ucl_chunk_skipc (chunk, p);
128 }
129 }
130 }
131 else if (*p == '/' && chunk->remain >= 2) {
132 if (p[1] == '*') {
133 ucl_chunk_skipc (chunk, p);
134 comments_nested ++;
135 ucl_chunk_skipc (chunk, p);
136
137 while (p < chunk->end) {
138 if (*p == '*') {
139 ucl_chunk_skipc (chunk, p);
140 if (*p == '/') {
141 comments_nested --;
142 if (comments_nested == 0) {
143 ucl_chunk_skipc (chunk, p);
144 goto start;
145 }
146 }
147 ucl_chunk_skipc (chunk, p);
148 }
149 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
150 comments_nested ++;
151 ucl_chunk_skipc (chunk, p);
152 ucl_chunk_skipc (chunk, p);
153 continue;
154 }
155 ucl_chunk_skipc (chunk, p);
156 }
157 if (comments_nested != 0) {
158 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
159 return false;
160 }
161 }
162 }
163
164 return true;
165}
166
167/**
168 * Return multiplier for a character
169 * @param c multiplier character
170 * @param is_bytes if true use 1024 multiplier
171 * @return multiplier
172 */
173static inline unsigned long
174ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
175 const struct {
176 char c;
177 long mult_normal;
178 long mult_bytes;
179 } multipliers[] = {
180 {'m', 1000 * 1000, 1024 * 1024},
181 {'k', 1000, 1024},
182 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
183 };
184 int i;
185
186 for (i = 0; i < 3; i ++) {
187 if (tolower (c) == multipliers[i].c) {
188 if (is_bytes) {
189 return multipliers[i].mult_bytes;
190 }
191 return multipliers[i].mult_normal;
192 }
193 }
194
195 return 1;
196}
197
198
199/**
200 * Return multiplier for time scaling
201 * @param c
202 * @return
203 */
204static inline double
205ucl_lex_time_multiplier (const unsigned char c) {
206 const struct {
207 char c;
208 double mult;
209 } multipliers[] = {
210 {'m', 60},
211 {'h', 60 * 60},
212 {'d', 60 * 60 * 24},
213 {'w', 60 * 60 * 24 * 7},
214 {'y', 60 * 60 * 24 * 7 * 365}
215 };
216 int i;
217
218 for (i = 0; i < 5; i ++) {
219 if (tolower (c) == multipliers[i].c) {
220 return multipliers[i].mult;
221 }
222 }
223
224 return 1;
225}
226
227/**
228 * Return true if a character is a end of an atom
229 * @param c
230 * @return
231 */
232static inline bool
233ucl_lex_is_atom_end (const unsigned char c)
234{
235 return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
236}
237
238static inline bool
239ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
240{
241 if (c1 == '/') {
242 if (c2 == '*') {
243 return true;
244 }
245 }
246 else if (c1 == '#') {
247 return true;
248 }
249 return false;
250}
251
252/**
253 * Check variable found
254 * @param parser
255 * @param ptr
256 * @param remain
257 * @param out_len
258 * @param strict
259 * @param found
260 * @return
261 */
262static inline const char *
263ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
264 size_t *out_len, bool strict, bool *found)
265{
266 struct ucl_variable *var;
267
268 LL_FOREACH (parser->variables, var) {
269 if (strict) {
270 if (remain == var->var_len) {
271 if (memcmp (ptr, var->var, var->var_len) == 0) {
272 *out_len += var->value_len;
273 *found = true;
274 return (ptr + var->var_len);
275 }
276 }
277 }
278 else {
279 if (remain >= var->var_len) {
280 if (memcmp (ptr, var->var, var->var_len) == 0) {
281 *out_len += var->value_len;
282 *found = true;
283 return (ptr + var->var_len);
284 }
285 }
286 }
287 }
288
289 return ptr;
290}
291
292/**
293 * Check for a variable in a given string
294 * @param parser
295 * @param ptr
296 * @param remain
297 * @param out_len
298 * @param vars_found
299 * @return
300 */
301static const char *
302ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
303{
304 const char *p, *end, *ret = ptr;
305 bool found = false;
306
307 if (*ptr == '{') {
308 /* We need to match the variable enclosed in braces */
309 p = ptr + 1;
310 end = ptr + remain;
311 while (p < end) {
312 if (*p == '}') {
313 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
314 if (found) {
315 /* {} must be excluded actually */
316 ret ++;
317 if (!*vars_found) {
318 *vars_found = true;
319 }
320 }
321 else {
322 *out_len += 2;
323 }
324 break;
325 }
326 p ++;
327 }
328 }
329 else if (*ptr != '$') {
330 /* Not count escaped dollar sign */
331 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
332 if (found && !*vars_found) {
333 *vars_found = true;
334 }
335 if (!found) {
336 (*out_len) ++;
337 }
338 }
339 else {
340 ret ++;
341 (*out_len) ++;
342 }
343
344 return ret;
345}
346
347/**
348 * Expand a single variable
349 * @param parser
350 * @param ptr
351 * @param remain
352 * @param dest
353 * @return
354 */
355static const char *
356ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
357 size_t remain, unsigned char **dest)
358{
359 unsigned char *d = *dest;
360 const char *p = ptr + 1, *ret;
361 struct ucl_variable *var;
362 bool found = false;
363
364 ret = ptr + 1;
365 remain --;
366
367 if (*p == '$') {
368 *d++ = *p++;
369 *dest = d;
370 return p;
371 }
372 else if (*p == '{') {
373 p ++;
374 ret += 2;
375 remain -= 2;
376 }
377
378 LL_FOREACH (parser->variables, var) {
379 if (remain >= var->var_len) {
380 if (memcmp (p, var->var, var->var_len) == 0) {
381 memcpy (d, var->value, var->value_len);
382 ret += var->var_len;
383 d += var->value_len;
384 found = true;
385 break;
386 }
387 }
388 }
389 if (!found) {
390 memcpy (d, ptr, 2);
391 d += 2;
392 ret --;
393 }
394
395 *dest = d;
396 return ret;
397}
398
399/**
400 * Expand variables in string
401 * @param parser
402 * @param dst
403 * @param src
404 * @param in_len
405 * @return
406 */
407static ssize_t
408ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
409 const char *src, size_t in_len)
410{
411 const char *p, *end = src + in_len;
412 unsigned char *d;
413 size_t out_len = 0;
414 bool vars_found = false;
415
416 p = src;
417 while (p != end) {
418 if (*p == '$') {
419 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
420 }
421 else {
422 p ++;
423 out_len ++;
424 }
425 }
426
427 if (!vars_found) {
428 /* Trivial case */
429 *dst = NULL;
430 return in_len;
431 }
432
433 *dst = UCL_ALLOC (out_len + 1);
434 if (*dst == NULL) {
435 return in_len;
436 }
437
438 d = *dst;
439 p = src;
440 while (p != end) {
441 if (*p == '$') {
442 p = ucl_expand_single_variable (parser, p, end - p, &d);
443 }
444 else {
445 *d++ = *p++;
446 }
447 }
448
449 *d = '\0';
450
451 return out_len;
452}
453
454/**
455 * Store or copy pointer to the trash stack
456 * @param parser parser object
457 * @param src src string
458 * @param dst destination buffer (trash stack pointer)
459 * @param dst_const const destination pointer (e.g. value of object)
460 * @param in_len input length
461 * @param need_unescape need to unescape source (and copy it)
462 * @param need_lowercase need to lowercase value (and copy)
463 * @param need_expand need to expand variables (and copy as well)
464 * @return output length (excluding \0 symbol)
465 */
466static inline ssize_t
467ucl_copy_or_store_ptr (struct ucl_parser *parser,
468 const unsigned char *src, unsigned char **dst,
469 const char **dst_const, size_t in_len,
470 bool need_unescape, bool need_lowercase, bool need_expand)
471{
472 ssize_t ret = -1, tret;
473 unsigned char *tmp;
474
475 if (need_unescape || need_lowercase ||
476 (need_expand && parser->variables != NULL) ||
477 !(parser->flags & UCL_PARSER_ZEROCOPY)) {
478 /* Copy string */
479 *dst = UCL_ALLOC (in_len + 1);
480 if (*dst == NULL) {
481 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
482 return false;
483 }
484 if (need_lowercase) {
485 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
486 }
487 else {
488 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
489 }
490
491 if (need_unescape) {
492 ret = ucl_unescape_json_string (*dst, ret);
493 }
494 if (need_expand) {
495 tmp = *dst;
496 tret = ret;
497 ret = ucl_expand_variable (parser, dst, tmp, ret);
498 if (*dst == NULL) {
499 /* Nothing to expand */
500 *dst = tmp;
501 ret = tret;
502 }
503 }
504 *dst_const = *dst;
505 }
506 else {
507 *dst_const = src;
508 ret = in_len;
509 }
510
511 return ret;
512}
513
514/**
515 * Create and append an object at the specified level
516 * @param parser
517 * @param is_array
518 * @param level
519 * @return
520 */
521static inline ucl_object_t *
522ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
523{
524 struct ucl_stack *st;
525
526 if (!is_array) {
527 if (obj == NULL) {
528 obj = ucl_object_typed_new (UCL_OBJECT);
529 }
530 else {
531 obj->type = UCL_OBJECT;
532 }
533 obj->value.ov = ucl_hash_create ();
534 parser->state = UCL_STATE_KEY;
535 }
536 else {
537 if (obj == NULL) {
538 obj = ucl_object_typed_new (UCL_ARRAY);
539 }
540 else {
541 obj->type = UCL_ARRAY;
542 }
543 parser->state = UCL_STATE_VALUE;
544 }
545
546 st = UCL_ALLOC (sizeof (struct ucl_stack));
1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#include "ucl.h"
25#include "ucl_internal.h"
26#include "ucl_chartable.h"
27
28/**
29 * @file rcl_parser.c
30 * The implementation of rcl parser
31 */
32
33struct ucl_parser_saved_state {
34 unsigned int line;
35 unsigned int column;
36 size_t remain;
37 const unsigned char *pos;
38};
39
40/**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47#define ucl_chunk_skipc(chunk, p) do{ \
48 if (*(p) == '\n') { \
49 (chunk)->line ++; \
50 (chunk)->column = 0; \
51 } \
52 else (chunk)->column ++; \
53 (p++); \
54 (chunk)->pos ++; \
55 (chunk)->remain --; \
56 } while (0)
57
58/**
59 * Save parser state
60 * @param chunk
61 * @param s
62 */
63static inline void
64ucl_chunk_save_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
65{
66 s->column = chunk->column;
67 s->pos = chunk->pos;
68 s->line = chunk->line;
69 s->remain = chunk->remain;
70}
71
72/**
73 * Restore parser state
74 * @param chunk
75 * @param s
76 */
77static inline void
78ucl_chunk_restore_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
79{
80 chunk->column = s->column;
81 chunk->pos = s->pos;
82 chunk->line = s->line;
83 chunk->remain = s->remain;
84}
85
86static inline void
87ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
88{
89 if (chunk->pos < chunk->end) {
90 if (isgraph (*chunk->pos)) {
91 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
92 chunk->line, chunk->column, str, *chunk->pos);
93 }
94 else {
95 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
96 chunk->line, chunk->column, str, (int)*chunk->pos);
97 }
98 }
99 else {
100 ucl_create_err (err, "error at the end of chunk: %s", str);
101 }
102}
103
104/**
105 * Skip all comments from the current pos resolving nested and multiline comments
106 * @param parser
107 * @return
108 */
109static bool
110ucl_skip_comments (struct ucl_parser *parser)
111{
112 struct ucl_chunk *chunk = parser->chunks;
113 const unsigned char *p;
114 int comments_nested = 0;
115
116 p = chunk->pos;
117
118start:
119 if (*p == '#') {
120 if (parser->state != UCL_STATE_SCOMMENT &&
121 parser->state != UCL_STATE_MCOMMENT) {
122 while (p < chunk->end) {
123 if (*p == '\n') {
124 ucl_chunk_skipc (chunk, p);
125 goto start;
126 }
127 ucl_chunk_skipc (chunk, p);
128 }
129 }
130 }
131 else if (*p == '/' && chunk->remain >= 2) {
132 if (p[1] == '*') {
133 ucl_chunk_skipc (chunk, p);
134 comments_nested ++;
135 ucl_chunk_skipc (chunk, p);
136
137 while (p < chunk->end) {
138 if (*p == '*') {
139 ucl_chunk_skipc (chunk, p);
140 if (*p == '/') {
141 comments_nested --;
142 if (comments_nested == 0) {
143 ucl_chunk_skipc (chunk, p);
144 goto start;
145 }
146 }
147 ucl_chunk_skipc (chunk, p);
148 }
149 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
150 comments_nested ++;
151 ucl_chunk_skipc (chunk, p);
152 ucl_chunk_skipc (chunk, p);
153 continue;
154 }
155 ucl_chunk_skipc (chunk, p);
156 }
157 if (comments_nested != 0) {
158 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
159 return false;
160 }
161 }
162 }
163
164 return true;
165}
166
167/**
168 * Return multiplier for a character
169 * @param c multiplier character
170 * @param is_bytes if true use 1024 multiplier
171 * @return multiplier
172 */
173static inline unsigned long
174ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
175 const struct {
176 char c;
177 long mult_normal;
178 long mult_bytes;
179 } multipliers[] = {
180 {'m', 1000 * 1000, 1024 * 1024},
181 {'k', 1000, 1024},
182 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
183 };
184 int i;
185
186 for (i = 0; i < 3; i ++) {
187 if (tolower (c) == multipliers[i].c) {
188 if (is_bytes) {
189 return multipliers[i].mult_bytes;
190 }
191 return multipliers[i].mult_normal;
192 }
193 }
194
195 return 1;
196}
197
198
199/**
200 * Return multiplier for time scaling
201 * @param c
202 * @return
203 */
204static inline double
205ucl_lex_time_multiplier (const unsigned char c) {
206 const struct {
207 char c;
208 double mult;
209 } multipliers[] = {
210 {'m', 60},
211 {'h', 60 * 60},
212 {'d', 60 * 60 * 24},
213 {'w', 60 * 60 * 24 * 7},
214 {'y', 60 * 60 * 24 * 7 * 365}
215 };
216 int i;
217
218 for (i = 0; i < 5; i ++) {
219 if (tolower (c) == multipliers[i].c) {
220 return multipliers[i].mult;
221 }
222 }
223
224 return 1;
225}
226
227/**
228 * Return true if a character is a end of an atom
229 * @param c
230 * @return
231 */
232static inline bool
233ucl_lex_is_atom_end (const unsigned char c)
234{
235 return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
236}
237
238static inline bool
239ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
240{
241 if (c1 == '/') {
242 if (c2 == '*') {
243 return true;
244 }
245 }
246 else if (c1 == '#') {
247 return true;
248 }
249 return false;
250}
251
252/**
253 * Check variable found
254 * @param parser
255 * @param ptr
256 * @param remain
257 * @param out_len
258 * @param strict
259 * @param found
260 * @return
261 */
262static inline const char *
263ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
264 size_t *out_len, bool strict, bool *found)
265{
266 struct ucl_variable *var;
267
268 LL_FOREACH (parser->variables, var) {
269 if (strict) {
270 if (remain == var->var_len) {
271 if (memcmp (ptr, var->var, var->var_len) == 0) {
272 *out_len += var->value_len;
273 *found = true;
274 return (ptr + var->var_len);
275 }
276 }
277 }
278 else {
279 if (remain >= var->var_len) {
280 if (memcmp (ptr, var->var, var->var_len) == 0) {
281 *out_len += var->value_len;
282 *found = true;
283 return (ptr + var->var_len);
284 }
285 }
286 }
287 }
288
289 return ptr;
290}
291
292/**
293 * Check for a variable in a given string
294 * @param parser
295 * @param ptr
296 * @param remain
297 * @param out_len
298 * @param vars_found
299 * @return
300 */
301static const char *
302ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
303{
304 const char *p, *end, *ret = ptr;
305 bool found = false;
306
307 if (*ptr == '{') {
308 /* We need to match the variable enclosed in braces */
309 p = ptr + 1;
310 end = ptr + remain;
311 while (p < end) {
312 if (*p == '}') {
313 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
314 if (found) {
315 /* {} must be excluded actually */
316 ret ++;
317 if (!*vars_found) {
318 *vars_found = true;
319 }
320 }
321 else {
322 *out_len += 2;
323 }
324 break;
325 }
326 p ++;
327 }
328 }
329 else if (*ptr != '$') {
330 /* Not count escaped dollar sign */
331 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
332 if (found && !*vars_found) {
333 *vars_found = true;
334 }
335 if (!found) {
336 (*out_len) ++;
337 }
338 }
339 else {
340 ret ++;
341 (*out_len) ++;
342 }
343
344 return ret;
345}
346
347/**
348 * Expand a single variable
349 * @param parser
350 * @param ptr
351 * @param remain
352 * @param dest
353 * @return
354 */
355static const char *
356ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
357 size_t remain, unsigned char **dest)
358{
359 unsigned char *d = *dest;
360 const char *p = ptr + 1, *ret;
361 struct ucl_variable *var;
362 bool found = false;
363
364 ret = ptr + 1;
365 remain --;
366
367 if (*p == '$') {
368 *d++ = *p++;
369 *dest = d;
370 return p;
371 }
372 else if (*p == '{') {
373 p ++;
374 ret += 2;
375 remain -= 2;
376 }
377
378 LL_FOREACH (parser->variables, var) {
379 if (remain >= var->var_len) {
380 if (memcmp (p, var->var, var->var_len) == 0) {
381 memcpy (d, var->value, var->value_len);
382 ret += var->var_len;
383 d += var->value_len;
384 found = true;
385 break;
386 }
387 }
388 }
389 if (!found) {
390 memcpy (d, ptr, 2);
391 d += 2;
392 ret --;
393 }
394
395 *dest = d;
396 return ret;
397}
398
399/**
400 * Expand variables in string
401 * @param parser
402 * @param dst
403 * @param src
404 * @param in_len
405 * @return
406 */
407static ssize_t
408ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
409 const char *src, size_t in_len)
410{
411 const char *p, *end = src + in_len;
412 unsigned char *d;
413 size_t out_len = 0;
414 bool vars_found = false;
415
416 p = src;
417 while (p != end) {
418 if (*p == '$') {
419 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
420 }
421 else {
422 p ++;
423 out_len ++;
424 }
425 }
426
427 if (!vars_found) {
428 /* Trivial case */
429 *dst = NULL;
430 return in_len;
431 }
432
433 *dst = UCL_ALLOC (out_len + 1);
434 if (*dst == NULL) {
435 return in_len;
436 }
437
438 d = *dst;
439 p = src;
440 while (p != end) {
441 if (*p == '$') {
442 p = ucl_expand_single_variable (parser, p, end - p, &d);
443 }
444 else {
445 *d++ = *p++;
446 }
447 }
448
449 *d = '\0';
450
451 return out_len;
452}
453
454/**
455 * Store or copy pointer to the trash stack
456 * @param parser parser object
457 * @param src src string
458 * @param dst destination buffer (trash stack pointer)
459 * @param dst_const const destination pointer (e.g. value of object)
460 * @param in_len input length
461 * @param need_unescape need to unescape source (and copy it)
462 * @param need_lowercase need to lowercase value (and copy)
463 * @param need_expand need to expand variables (and copy as well)
464 * @return output length (excluding \0 symbol)
465 */
466static inline ssize_t
467ucl_copy_or_store_ptr (struct ucl_parser *parser,
468 const unsigned char *src, unsigned char **dst,
469 const char **dst_const, size_t in_len,
470 bool need_unescape, bool need_lowercase, bool need_expand)
471{
472 ssize_t ret = -1, tret;
473 unsigned char *tmp;
474
475 if (need_unescape || need_lowercase ||
476 (need_expand && parser->variables != NULL) ||
477 !(parser->flags & UCL_PARSER_ZEROCOPY)) {
478 /* Copy string */
479 *dst = UCL_ALLOC (in_len + 1);
480 if (*dst == NULL) {
481 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
482 return false;
483 }
484 if (need_lowercase) {
485 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
486 }
487 else {
488 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
489 }
490
491 if (need_unescape) {
492 ret = ucl_unescape_json_string (*dst, ret);
493 }
494 if (need_expand) {
495 tmp = *dst;
496 tret = ret;
497 ret = ucl_expand_variable (parser, dst, tmp, ret);
498 if (*dst == NULL) {
499 /* Nothing to expand */
500 *dst = tmp;
501 ret = tret;
502 }
503 }
504 *dst_const = *dst;
505 }
506 else {
507 *dst_const = src;
508 ret = in_len;
509 }
510
511 return ret;
512}
513
514/**
515 * Create and append an object at the specified level
516 * @param parser
517 * @param is_array
518 * @param level
519 * @return
520 */
521static inline ucl_object_t *
522ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
523{
524 struct ucl_stack *st;
525
526 if (!is_array) {
527 if (obj == NULL) {
528 obj = ucl_object_typed_new (UCL_OBJECT);
529 }
530 else {
531 obj->type = UCL_OBJECT;
532 }
533 obj->value.ov = ucl_hash_create ();
534 parser->state = UCL_STATE_KEY;
535 }
536 else {
537 if (obj == NULL) {
538 obj = ucl_object_typed_new (UCL_ARRAY);
539 }
540 else {
541 obj->type = UCL_ARRAY;
542 }
543 parser->state = UCL_STATE_VALUE;
544 }
545
546 st = UCL_ALLOC (sizeof (struct ucl_stack));
547 if (st == NULL) {
548 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
549 return NULL;
550 }
547 st->obj = obj;
548 st->level = level;
549 LL_PREPEND (parser->stack, st);
550 parser->cur_obj = obj;
551
552 return obj;
553}
554
555int
556ucl_maybe_parse_number (ucl_object_t *obj,
551 st->obj = obj;
552 st->level = level;
553 LL_PREPEND (parser->stack, st);
554 parser->cur_obj = obj;
555
556 return obj;
557}
558
559int
560ucl_maybe_parse_number (ucl_object_t *obj,
557 const char *start, const char *end, const char **pos, bool allow_double, bool number_bytes)
561 const char *start, const char *end, const char **pos,
562 bool allow_double, bool number_bytes, bool allow_time)
558{
559 const char *p = start, *c = start;
560 char *endptr;
561 bool got_dot = false, got_exp = false, need_double = false,
563{
564 const char *p = start, *c = start;
565 char *endptr;
566 bool got_dot = false, got_exp = false, need_double = false,
562 is_date = false, valid_start = false, is_hex = false,
567 is_time = false, valid_start = false, is_hex = false,
563 is_neg = false;
564 double dv = 0;
565 int64_t lv = 0;
566
567 if (*p == '-') {
568 is_neg = true;
569 c ++;
570 p ++;
571 }
572 while (p < end) {
573 if (is_hex && isxdigit (*p)) {
574 p ++;
575 }
576 else if (isdigit (*p)) {
577 valid_start = true;
578 p ++;
579 }
580 else if (!is_hex && (*p == 'x' || *p == 'X')) {
581 is_hex = true;
582 allow_double = false;
583 c = p + 1;
584 }
585 else if (allow_double) {
586 if (p == c) {
587 /* Empty digits sequence, not a number */
588 *pos = start;
589 return EINVAL;
590 }
591 else if (*p == '.') {
592 if (got_dot) {
593 /* Double dots, not a number */
594 *pos = start;
595 return EINVAL;
596 }
597 else {
598 got_dot = true;
599 need_double = true;
600 p ++;
601 }
602 }
603 else if (*p == 'e' || *p == 'E') {
604 if (got_exp) {
605 /* Double exp, not a number */
606 *pos = start;
607 return EINVAL;
608 }
609 else {
610 got_exp = true;
611 need_double = true;
612 p ++;
613 if (p >= end) {
614 *pos = start;
615 return EINVAL;
616 }
617 if (!isdigit (*p) && *p != '+' && *p != '-') {
618 /* Wrong exponent sign */
619 *pos = start;
620 return EINVAL;
621 }
622 else {
623 p ++;
624 }
625 }
626 }
627 else {
628 /* Got the end of the number, need to check */
629 break;
630 }
631 }
632 else {
633 break;
634 }
635 }
636
637 if (!valid_start) {
638 *pos = start;
639 return EINVAL;
640 }
641
642 errno = 0;
643 if (need_double) {
644 dv = strtod (c, &endptr);
645 }
646 else {
647 if (is_hex) {
648 lv = strtoimax (c, &endptr, 16);
649 }
650 else {
651 lv = strtoimax (c, &endptr, 10);
652 }
653 }
654 if (errno == ERANGE) {
655 *pos = start;
656 return ERANGE;
657 }
658
659 /* Now check endptr */
568 is_neg = false;
569 double dv = 0;
570 int64_t lv = 0;
571
572 if (*p == '-') {
573 is_neg = true;
574 c ++;
575 p ++;
576 }
577 while (p < end) {
578 if (is_hex && isxdigit (*p)) {
579 p ++;
580 }
581 else if (isdigit (*p)) {
582 valid_start = true;
583 p ++;
584 }
585 else if (!is_hex && (*p == 'x' || *p == 'X')) {
586 is_hex = true;
587 allow_double = false;
588 c = p + 1;
589 }
590 else if (allow_double) {
591 if (p == c) {
592 /* Empty digits sequence, not a number */
593 *pos = start;
594 return EINVAL;
595 }
596 else if (*p == '.') {
597 if (got_dot) {
598 /* Double dots, not a number */
599 *pos = start;
600 return EINVAL;
601 }
602 else {
603 got_dot = true;
604 need_double = true;
605 p ++;
606 }
607 }
608 else if (*p == 'e' || *p == 'E') {
609 if (got_exp) {
610 /* Double exp, not a number */
611 *pos = start;
612 return EINVAL;
613 }
614 else {
615 got_exp = true;
616 need_double = true;
617 p ++;
618 if (p >= end) {
619 *pos = start;
620 return EINVAL;
621 }
622 if (!isdigit (*p) && *p != '+' && *p != '-') {
623 /* Wrong exponent sign */
624 *pos = start;
625 return EINVAL;
626 }
627 else {
628 p ++;
629 }
630 }
631 }
632 else {
633 /* Got the end of the number, need to check */
634 break;
635 }
636 }
637 else {
638 break;
639 }
640 }
641
642 if (!valid_start) {
643 *pos = start;
644 return EINVAL;
645 }
646
647 errno = 0;
648 if (need_double) {
649 dv = strtod (c, &endptr);
650 }
651 else {
652 if (is_hex) {
653 lv = strtoimax (c, &endptr, 16);
654 }
655 else {
656 lv = strtoimax (c, &endptr, 10);
657 }
658 }
659 if (errno == ERANGE) {
660 *pos = start;
661 return ERANGE;
662 }
663
664 /* Now check endptr */
660 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
665 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
666 ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
661 p = endptr;
662 goto set_obj;
663 }
664
665 if (endptr < end && endptr != start) {
666 p = endptr;
667 switch (*p) {
668 case 'm':
669 case 'M':
670 case 'g':
671 case 'G':
672 case 'k':
673 case 'K':
674 if (end - p >= 2) {
675 if (p[1] == 's' || p[1] == 'S') {
676 /* Milliseconds */
677 if (!need_double) {
678 need_double = true;
679 dv = lv;
680 }
667 p = endptr;
668 goto set_obj;
669 }
670
671 if (endptr < end && endptr != start) {
672 p = endptr;
673 switch (*p) {
674 case 'm':
675 case 'M':
676 case 'g':
677 case 'G':
678 case 'k':
679 case 'K':
680 if (end - p >= 2) {
681 if (p[1] == 's' || p[1] == 'S') {
682 /* Milliseconds */
683 if (!need_double) {
684 need_double = true;
685 dv = lv;
686 }
681 is_date = true;
687 is_time = true;
682 if (p[0] == 'm' || p[0] == 'M') {
683 dv /= 1000.;
684 }
685 else {
686 dv *= ucl_lex_num_multiplier (*p, false);
687 }
688 p += 2;
689 goto set_obj;
690 }
691 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
692 /* Bytes */
693 if (need_double) {
694 need_double = false;
695 lv = dv;
696 }
697 lv *= ucl_lex_num_multiplier (*p, true);
698 p += 2;
699 goto set_obj;
700 }
701 else if (ucl_lex_is_atom_end (p[1])) {
702 if (need_double) {
703 dv *= ucl_lex_num_multiplier (*p, false);
704 }
705 else {
706 lv *= ucl_lex_num_multiplier (*p, number_bytes);
707 }
708 p ++;
709 goto set_obj;
710 }
688 if (p[0] == 'm' || p[0] == 'M') {
689 dv /= 1000.;
690 }
691 else {
692 dv *= ucl_lex_num_multiplier (*p, false);
693 }
694 p += 2;
695 goto set_obj;
696 }
697 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
698 /* Bytes */
699 if (need_double) {
700 need_double = false;
701 lv = dv;
702 }
703 lv *= ucl_lex_num_multiplier (*p, true);
704 p += 2;
705 goto set_obj;
706 }
707 else if (ucl_lex_is_atom_end (p[1])) {
708 if (need_double) {
709 dv *= ucl_lex_num_multiplier (*p, false);
710 }
711 else {
712 lv *= ucl_lex_num_multiplier (*p, number_bytes);
713 }
714 p ++;
715 goto set_obj;
716 }
711 else if (end - p >= 3) {
717 else if (allow_time && end - p >= 3) {
712 if (tolower (p[0]) == 'm' &&
713 tolower (p[1]) == 'i' &&
714 tolower (p[2]) == 'n') {
715 /* Minutes */
716 if (!need_double) {
717 need_double = true;
718 dv = lv;
719 }
718 if (tolower (p[0]) == 'm' &&
719 tolower (p[1]) == 'i' &&
720 tolower (p[2]) == 'n') {
721 /* Minutes */
722 if (!need_double) {
723 need_double = true;
724 dv = lv;
725 }
720 is_date = true;
726 is_time = true;
721 dv *= 60.;
722 p += 3;
723 goto set_obj;
724 }
725 }
726 }
727 else {
728 if (need_double) {
729 dv *= ucl_lex_num_multiplier (*p, false);
730 }
731 else {
732 lv *= ucl_lex_num_multiplier (*p, number_bytes);
733 }
734 p ++;
735 goto set_obj;
736 }
737 break;
738 case 'S':
739 case 's':
727 dv *= 60.;
728 p += 3;
729 goto set_obj;
730 }
731 }
732 }
733 else {
734 if (need_double) {
735 dv *= ucl_lex_num_multiplier (*p, false);
736 }
737 else {
738 lv *= ucl_lex_num_multiplier (*p, number_bytes);
739 }
740 p ++;
741 goto set_obj;
742 }
743 break;
744 case 'S':
745 case 's':
740 if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
746 if (allow_time &&
747 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
741 if (!need_double) {
742 need_double = true;
743 dv = lv;
744 }
745 p ++;
748 if (!need_double) {
749 need_double = true;
750 dv = lv;
751 }
752 p ++;
746 is_date = true;
753 is_time = true;
747 goto set_obj;
748 }
749 break;
750 case 'h':
751 case 'H':
752 case 'd':
753 case 'D':
754 case 'w':
755 case 'W':
756 case 'Y':
757 case 'y':
754 goto set_obj;
755 }
756 break;
757 case 'h':
758 case 'H':
759 case 'd':
760 case 'D':
761 case 'w':
762 case 'W':
763 case 'Y':
764 case 'y':
758 if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
765 if (allow_time &&
766 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
759 if (!need_double) {
760 need_double = true;
761 dv = lv;
762 }
767 if (!need_double) {
768 need_double = true;
769 dv = lv;
770 }
763 is_date = true;
771 is_time = true;
764 dv *= ucl_lex_time_multiplier (*p);
765 p ++;
766 goto set_obj;
767 }
768 break;
769 }
770 }
771
772 *pos = c;
773 return EINVAL;
774
775 set_obj:
772 dv *= ucl_lex_time_multiplier (*p);
773 p ++;
774 goto set_obj;
775 }
776 break;
777 }
778 }
779
780 *pos = c;
781 return EINVAL;
782
783 set_obj:
776 if (allow_double && (need_double || is_date)) {
777 if (!is_date) {
784 if (allow_double && (need_double || is_time)) {
785 if (!is_time) {
778 obj->type = UCL_FLOAT;
779 }
780 else {
781 obj->type = UCL_TIME;
782 }
783 obj->value.dv = is_neg ? (-dv) : dv;
784 }
785 else {
786 obj->type = UCL_INT;
787 obj->value.iv = is_neg ? (-lv) : lv;
788 }
789 *pos = p;
790 return 0;
791}
792
793/**
794 * Parse possible number
795 * @param parser
796 * @param chunk
797 * @return true if a number has been parsed
798 */
799static bool
800ucl_lex_number (struct ucl_parser *parser,
801 struct ucl_chunk *chunk, ucl_object_t *obj)
802{
803 const unsigned char *pos;
804 int ret;
805
786 obj->type = UCL_FLOAT;
787 }
788 else {
789 obj->type = UCL_TIME;
790 }
791 obj->value.dv = is_neg ? (-dv) : dv;
792 }
793 else {
794 obj->type = UCL_INT;
795 obj->value.iv = is_neg ? (-lv) : lv;
796 }
797 *pos = p;
798 return 0;
799}
800
801/**
802 * Parse possible number
803 * @param parser
804 * @param chunk
805 * @return true if a number has been parsed
806 */
807static bool
808ucl_lex_number (struct ucl_parser *parser,
809 struct ucl_chunk *chunk, ucl_object_t *obj)
810{
811 const unsigned char *pos;
812 int ret;
813
806 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, true, false);
814 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
815 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
807
808 if (ret == 0) {
809 chunk->remain -= pos - chunk->pos;
810 chunk->column += pos - chunk->pos;
811 chunk->pos = pos;
812 return true;
813 }
814 else if (ret == ERANGE) {
815 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
816 }
817
818 return false;
819}
820
821/**
822 * Parse quoted string with possible escapes
823 * @param parser
824 * @param chunk
825 * @return true if a string has been parsed
826 */
827static bool
828ucl_lex_json_string (struct ucl_parser *parser,
829 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
830{
831 const unsigned char *p = chunk->pos;
832 unsigned char c;
833 int i;
834
835 while (p < chunk->end) {
836 c = *p;
837 if (c < 0x1F) {
838 /* Unmasked control character */
839 if (c == '\n') {
840 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
841 }
842 else {
843 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
844 }
845 return false;
846 }
847 else if (c == '\\') {
848 ucl_chunk_skipc (chunk, p);
849 c = *p;
850 if (p >= chunk->end) {
851 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
852 return false;
853 }
854 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
855 if (c == 'u') {
856 ucl_chunk_skipc (chunk, p);
857 for (i = 0; i < 4 && p < chunk->end; i ++) {
858 if (!isxdigit (*p)) {
859 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
860 return false;
861 }
862 ucl_chunk_skipc (chunk, p);
863 }
864 if (p >= chunk->end) {
865 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
866 return false;
867 }
868 }
869 else {
870 ucl_chunk_skipc (chunk, p);
871 }
872 }
873 *need_unescape = true;
874 *ucl_escape = true;
875 continue;
876 }
877 else if (c == '"') {
878 ucl_chunk_skipc (chunk, p);
879 return true;
880 }
881 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
882 *ucl_escape = true;
883 }
884 else if (c == '$') {
885 *var_expand = true;
886 }
887 ucl_chunk_skipc (chunk, p);
888 }
889
890 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
891 return false;
892}
893
894/**
895 * Parse a key in an object
896 * @param parser
897 * @param chunk
898 * @return true if a key has been parsed
899 */
900static bool
901ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
902{
903 const unsigned char *p, *c = NULL, *end, *t;
904 const char *key = NULL;
905 bool got_quote = false, got_eq = false, got_semicolon = false,
906 need_unescape = false, ucl_escape = false, var_expand = false,
907 got_content = false, got_sep = false;
908 ucl_object_t *nobj, *tobj;
909 ucl_hash_t *container;
910 ssize_t keylen;
911
912 p = chunk->pos;
913
914 if (*p == '.') {
915 /* It is macro actually */
916 ucl_chunk_skipc (chunk, p);
917 parser->prev_state = parser->state;
918 parser->state = UCL_STATE_MACRO_NAME;
919 return true;
920 }
921 while (p < chunk->end) {
922 /*
923 * A key must start with alpha, number, '/' or '_' and end with space character
924 */
925 if (c == NULL) {
926 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
927 if (!ucl_skip_comments (parser)) {
928 return false;
929 }
930 p = chunk->pos;
931 }
932 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
933 ucl_chunk_skipc (chunk, p);
934 }
935 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
936 /* The first symbol */
937 c = p;
938 ucl_chunk_skipc (chunk, p);
939 got_content = true;
940 }
941 else if (*p == '"') {
942 /* JSON style key */
943 c = p + 1;
944 got_quote = true;
945 got_content = true;
946 ucl_chunk_skipc (chunk, p);
947 }
948 else if (*p == '}') {
949 /* We have actually end of an object */
950 *end_of_object = true;
951 return true;
952 }
953 else if (*p == '.') {
954 ucl_chunk_skipc (chunk, p);
955 parser->prev_state = parser->state;
956 parser->state = UCL_STATE_MACRO_NAME;
957 return true;
958 }
959 else {
960 /* Invalid identifier */
961 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
962 return false;
963 }
964 }
965 else {
966 /* Parse the body of a key */
967 if (!got_quote) {
968 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
969 got_content = true;
970 ucl_chunk_skipc (chunk, p);
971 }
972 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
973 end = p;
974 break;
975 }
976 else {
977 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
978 return false;
979 }
980 }
981 else {
982 /* We need to parse json like quoted string */
983 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
984 return false;
985 }
986 /* Always escape keys obtained via json */
987 end = chunk->pos - 1;
988 p = chunk->pos;
989 break;
990 }
991 }
992 }
993
994 if (p >= chunk->end && got_content) {
995 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
996 return false;
997 }
998 else if (!got_content) {
999 return true;
1000 }
1001 *end_of_object = false;
1002 /* We are now at the end of the key, need to parse the rest */
1003 while (p < chunk->end) {
1004 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1005 ucl_chunk_skipc (chunk, p);
1006 }
1007 else if (*p == '=') {
1008 if (!got_eq && !got_semicolon) {
1009 ucl_chunk_skipc (chunk, p);
1010 got_eq = true;
1011 }
1012 else {
1013 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1014 return false;
1015 }
1016 }
1017 else if (*p == ':') {
1018 if (!got_eq && !got_semicolon) {
1019 ucl_chunk_skipc (chunk, p);
1020 got_semicolon = true;
1021 }
1022 else {
1023 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1024 return false;
1025 }
1026 }
1027 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1028 /* Check for comment */
1029 if (!ucl_skip_comments (parser)) {
1030 return false;
1031 }
1032 p = chunk->pos;
1033 }
1034 else {
1035 /* Start value */
1036 break;
1037 }
1038 }
1039
1040 if (p >= chunk->end && got_content) {
1041 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1042 return false;
1043 }
1044
1045 got_sep = got_semicolon || got_eq;
1046
1047 if (!got_sep) {
1048 /*
1049 * Maybe we have more keys nested, so search for termination character.
1050 * Possible choices:
1051 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1052 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1053 * 3) key1 value[;,\n] <- we treat that as linear object
1054 */
1055 t = p;
1056 *next_key = false;
1057 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1058 t ++;
1059 }
1060 /* Check first non-space character after a key */
1061 if (*t != '{' && *t != '[') {
1062 while (t < chunk->end) {
1063 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1064 break;
1065 }
1066 else if (*t == '{' || *t == '[') {
1067 *next_key = true;
1068 break;
1069 }
1070 t ++;
1071 }
1072 }
1073 }
1074
1075 /* Create a new object */
1076 nobj = ucl_object_new ();
1077 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1078 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1079 if (keylen == -1) {
1080 ucl_object_free(nobj);
1081 return false;
1082 }
1083 else if (keylen == 0) {
1084 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1085 ucl_object_free(nobj);
1086 return false;
1087 }
1088
1089 container = parser->stack->obj->value.ov;
1090 nobj->key = key;
1091 nobj->keylen = keylen;
1092 tobj = ucl_hash_search_obj (container, nobj);
1093 if (tobj == NULL) {
1094 container = ucl_hash_insert_object (container, nobj);
1095 nobj->prev = nobj;
1096 nobj->next = NULL;
1097 parser->stack->obj->len ++;
1098 }
1099 else {
1100 DL_APPEND (tobj, nobj);
1101 }
1102
1103 if (ucl_escape) {
1104 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1105 }
1106 parser->stack->obj->value.ov = container;
1107
1108 parser->cur_obj = nobj;
1109
1110 return true;
1111}
1112
1113/**
1114 * Parse a cl string
1115 * @param parser
1116 * @param chunk
1117 * @return true if a key has been parsed
1118 */
1119static bool
1120ucl_parse_string_value (struct ucl_parser *parser,
1121 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1122{
1123 const unsigned char *p;
1124 enum {
1125 UCL_BRACE_ROUND = 0,
1126 UCL_BRACE_SQUARE,
1127 UCL_BRACE_FIGURE
1128 };
1129 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1130
1131 p = chunk->pos;
1132
1133 while (p < chunk->end) {
1134
1135 /* Skip pairs of figure braces */
1136 if (*p == '{') {
1137 braces[UCL_BRACE_FIGURE][0] ++;
1138 }
1139 else if (*p == '}') {
1140 braces[UCL_BRACE_FIGURE][1] ++;
1141 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1142 /* This is not a termination symbol, continue */
1143 ucl_chunk_skipc (chunk, p);
1144 continue;
1145 }
1146 }
1147 /* Skip pairs of square braces */
1148 else if (*p == '[') {
1149 braces[UCL_BRACE_SQUARE][0] ++;
1150 }
1151 else if (*p == ']') {
1152 braces[UCL_BRACE_SQUARE][1] ++;
1153 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1154 /* This is not a termination symbol, continue */
1155 ucl_chunk_skipc (chunk, p);
1156 continue;
1157 }
1158 }
1159 else if (*p == '$') {
1160 *var_expand = true;
1161 }
1162 else if (*p == '\\') {
1163 *need_unescape = true;
1164 ucl_chunk_skipc (chunk, p);
1165 if (p < chunk->end) {
1166 ucl_chunk_skipc (chunk, p);
1167 }
1168 continue;
1169 }
1170
1171 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1172 break;
1173 }
1174 ucl_chunk_skipc (chunk, p);
1175 }
1176
1177 if (p >= chunk->end) {
1178 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1179 return false;
1180 }
1181
1182 return true;
1183}
1184
1185/**
1186 * Parse multiline string ending with \n{term}\n
1187 * @param parser
1188 * @param chunk
1189 * @param term
1190 * @param term_len
1191 * @return size of multiline string or 0 in case of error
1192 */
1193static int
1194ucl_parse_multiline_string (struct ucl_parser *parser,
1195 struct ucl_chunk *chunk, const unsigned char *term,
1196 int term_len, unsigned char const **beg,
1197 bool *var_expand)
1198{
1199 const unsigned char *p, *c;
1200 bool newline = false;
1201 int len = 0;
1202
1203 p = chunk->pos;
1204
1205 c = p;
1206
1207 while (p < chunk->end) {
1208 if (newline) {
1209 if (chunk->end - p < term_len) {
1210 return 0;
1211 }
1212 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1213 len = p - c;
1214 chunk->remain -= term_len;
1215 chunk->pos = p + term_len;
1216 chunk->column = term_len;
1217 *beg = c;
1218 break;
1219 }
1220 }
1221 if (*p == '\n') {
1222 newline = true;
1223 }
1224 else {
1225 if (*p == '$') {
1226 *var_expand = true;
1227 }
1228 newline = false;
1229 }
1230 ucl_chunk_skipc (chunk, p);
1231 }
1232
1233 return len;
1234}
1235
1236static ucl_object_t*
1237ucl_get_value_object (struct ucl_parser *parser)
1238{
1239 ucl_object_t *t, *obj = NULL;
1240
1241 if (parser->stack->obj->type == UCL_ARRAY) {
1242 /* Object must be allocated */
1243 obj = ucl_object_new ();
1244 t = parser->stack->obj->value.av;
1245 DL_APPEND (t, obj);
1246 parser->cur_obj = obj;
1247 parser->stack->obj->value.av = t;
1248 parser->stack->obj->len ++;
1249 }
1250 else {
1251 /* Object has been already allocated */
1252 obj = parser->cur_obj;
1253 }
1254
1255 return obj;
1256}
1257
1258/**
1259 * Handle value data
1260 * @param parser
1261 * @param chunk
1262 * @return
1263 */
1264static bool
1265ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1266{
1267 const unsigned char *p, *c;
1268 ucl_object_t *obj = NULL;
1269 unsigned int stripped_spaces;
1270 int str_len;
1271 bool need_unescape = false, ucl_escape = false, var_expand = false;
1272
1273 p = chunk->pos;
1274
1275 /* Skip any spaces and comments */
1276 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1277 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1278 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1279 ucl_chunk_skipc (chunk, p);
1280 }
1281 if (!ucl_skip_comments (parser)) {
1282 return false;
1283 }
1284 p = chunk->pos;
1285 }
1286
1287 while (p < chunk->end) {
1288 c = p;
1289 switch (*p) {
1290 case '"':
1291 obj = ucl_get_value_object (parser);
1292 ucl_chunk_skipc (chunk, p);
1293 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1294 return false;
1295 }
1296 str_len = chunk->pos - c - 2;
1297 obj->type = UCL_STRING;
1298 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1299 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1300 return false;
1301 }
1302 obj->len = str_len;
1303 parser->state = UCL_STATE_AFTER_VALUE;
1304 p = chunk->pos;
1305 return true;
1306 break;
1307 case '{':
1308 obj = ucl_get_value_object (parser);
1309 /* We have a new object */
1310 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
816
817 if (ret == 0) {
818 chunk->remain -= pos - chunk->pos;
819 chunk->column += pos - chunk->pos;
820 chunk->pos = pos;
821 return true;
822 }
823 else if (ret == ERANGE) {
824 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
825 }
826
827 return false;
828}
829
830/**
831 * Parse quoted string with possible escapes
832 * @param parser
833 * @param chunk
834 * @return true if a string has been parsed
835 */
836static bool
837ucl_lex_json_string (struct ucl_parser *parser,
838 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
839{
840 const unsigned char *p = chunk->pos;
841 unsigned char c;
842 int i;
843
844 while (p < chunk->end) {
845 c = *p;
846 if (c < 0x1F) {
847 /* Unmasked control character */
848 if (c == '\n') {
849 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
850 }
851 else {
852 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
853 }
854 return false;
855 }
856 else if (c == '\\') {
857 ucl_chunk_skipc (chunk, p);
858 c = *p;
859 if (p >= chunk->end) {
860 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
861 return false;
862 }
863 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
864 if (c == 'u') {
865 ucl_chunk_skipc (chunk, p);
866 for (i = 0; i < 4 && p < chunk->end; i ++) {
867 if (!isxdigit (*p)) {
868 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
869 return false;
870 }
871 ucl_chunk_skipc (chunk, p);
872 }
873 if (p >= chunk->end) {
874 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
875 return false;
876 }
877 }
878 else {
879 ucl_chunk_skipc (chunk, p);
880 }
881 }
882 *need_unescape = true;
883 *ucl_escape = true;
884 continue;
885 }
886 else if (c == '"') {
887 ucl_chunk_skipc (chunk, p);
888 return true;
889 }
890 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
891 *ucl_escape = true;
892 }
893 else if (c == '$') {
894 *var_expand = true;
895 }
896 ucl_chunk_skipc (chunk, p);
897 }
898
899 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
900 return false;
901}
902
903/**
904 * Parse a key in an object
905 * @param parser
906 * @param chunk
907 * @return true if a key has been parsed
908 */
909static bool
910ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
911{
912 const unsigned char *p, *c = NULL, *end, *t;
913 const char *key = NULL;
914 bool got_quote = false, got_eq = false, got_semicolon = false,
915 need_unescape = false, ucl_escape = false, var_expand = false,
916 got_content = false, got_sep = false;
917 ucl_object_t *nobj, *tobj;
918 ucl_hash_t *container;
919 ssize_t keylen;
920
921 p = chunk->pos;
922
923 if (*p == '.') {
924 /* It is macro actually */
925 ucl_chunk_skipc (chunk, p);
926 parser->prev_state = parser->state;
927 parser->state = UCL_STATE_MACRO_NAME;
928 return true;
929 }
930 while (p < chunk->end) {
931 /*
932 * A key must start with alpha, number, '/' or '_' and end with space character
933 */
934 if (c == NULL) {
935 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
936 if (!ucl_skip_comments (parser)) {
937 return false;
938 }
939 p = chunk->pos;
940 }
941 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
942 ucl_chunk_skipc (chunk, p);
943 }
944 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
945 /* The first symbol */
946 c = p;
947 ucl_chunk_skipc (chunk, p);
948 got_content = true;
949 }
950 else if (*p == '"') {
951 /* JSON style key */
952 c = p + 1;
953 got_quote = true;
954 got_content = true;
955 ucl_chunk_skipc (chunk, p);
956 }
957 else if (*p == '}') {
958 /* We have actually end of an object */
959 *end_of_object = true;
960 return true;
961 }
962 else if (*p == '.') {
963 ucl_chunk_skipc (chunk, p);
964 parser->prev_state = parser->state;
965 parser->state = UCL_STATE_MACRO_NAME;
966 return true;
967 }
968 else {
969 /* Invalid identifier */
970 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
971 return false;
972 }
973 }
974 else {
975 /* Parse the body of a key */
976 if (!got_quote) {
977 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
978 got_content = true;
979 ucl_chunk_skipc (chunk, p);
980 }
981 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
982 end = p;
983 break;
984 }
985 else {
986 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
987 return false;
988 }
989 }
990 else {
991 /* We need to parse json like quoted string */
992 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
993 return false;
994 }
995 /* Always escape keys obtained via json */
996 end = chunk->pos - 1;
997 p = chunk->pos;
998 break;
999 }
1000 }
1001 }
1002
1003 if (p >= chunk->end && got_content) {
1004 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1005 return false;
1006 }
1007 else if (!got_content) {
1008 return true;
1009 }
1010 *end_of_object = false;
1011 /* We are now at the end of the key, need to parse the rest */
1012 while (p < chunk->end) {
1013 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1014 ucl_chunk_skipc (chunk, p);
1015 }
1016 else if (*p == '=') {
1017 if (!got_eq && !got_semicolon) {
1018 ucl_chunk_skipc (chunk, p);
1019 got_eq = true;
1020 }
1021 else {
1022 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1023 return false;
1024 }
1025 }
1026 else if (*p == ':') {
1027 if (!got_eq && !got_semicolon) {
1028 ucl_chunk_skipc (chunk, p);
1029 got_semicolon = true;
1030 }
1031 else {
1032 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1033 return false;
1034 }
1035 }
1036 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1037 /* Check for comment */
1038 if (!ucl_skip_comments (parser)) {
1039 return false;
1040 }
1041 p = chunk->pos;
1042 }
1043 else {
1044 /* Start value */
1045 break;
1046 }
1047 }
1048
1049 if (p >= chunk->end && got_content) {
1050 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1051 return false;
1052 }
1053
1054 got_sep = got_semicolon || got_eq;
1055
1056 if (!got_sep) {
1057 /*
1058 * Maybe we have more keys nested, so search for termination character.
1059 * Possible choices:
1060 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1061 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1062 * 3) key1 value[;,\n] <- we treat that as linear object
1063 */
1064 t = p;
1065 *next_key = false;
1066 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1067 t ++;
1068 }
1069 /* Check first non-space character after a key */
1070 if (*t != '{' && *t != '[') {
1071 while (t < chunk->end) {
1072 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1073 break;
1074 }
1075 else if (*t == '{' || *t == '[') {
1076 *next_key = true;
1077 break;
1078 }
1079 t ++;
1080 }
1081 }
1082 }
1083
1084 /* Create a new object */
1085 nobj = ucl_object_new ();
1086 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1087 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1088 if (keylen == -1) {
1089 ucl_object_free(nobj);
1090 return false;
1091 }
1092 else if (keylen == 0) {
1093 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1094 ucl_object_free(nobj);
1095 return false;
1096 }
1097
1098 container = parser->stack->obj->value.ov;
1099 nobj->key = key;
1100 nobj->keylen = keylen;
1101 tobj = ucl_hash_search_obj (container, nobj);
1102 if (tobj == NULL) {
1103 container = ucl_hash_insert_object (container, nobj);
1104 nobj->prev = nobj;
1105 nobj->next = NULL;
1106 parser->stack->obj->len ++;
1107 }
1108 else {
1109 DL_APPEND (tobj, nobj);
1110 }
1111
1112 if (ucl_escape) {
1113 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1114 }
1115 parser->stack->obj->value.ov = container;
1116
1117 parser->cur_obj = nobj;
1118
1119 return true;
1120}
1121
1122/**
1123 * Parse a cl string
1124 * @param parser
1125 * @param chunk
1126 * @return true if a key has been parsed
1127 */
1128static bool
1129ucl_parse_string_value (struct ucl_parser *parser,
1130 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1131{
1132 const unsigned char *p;
1133 enum {
1134 UCL_BRACE_ROUND = 0,
1135 UCL_BRACE_SQUARE,
1136 UCL_BRACE_FIGURE
1137 };
1138 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1139
1140 p = chunk->pos;
1141
1142 while (p < chunk->end) {
1143
1144 /* Skip pairs of figure braces */
1145 if (*p == '{') {
1146 braces[UCL_BRACE_FIGURE][0] ++;
1147 }
1148 else if (*p == '}') {
1149 braces[UCL_BRACE_FIGURE][1] ++;
1150 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1151 /* This is not a termination symbol, continue */
1152 ucl_chunk_skipc (chunk, p);
1153 continue;
1154 }
1155 }
1156 /* Skip pairs of square braces */
1157 else if (*p == '[') {
1158 braces[UCL_BRACE_SQUARE][0] ++;
1159 }
1160 else if (*p == ']') {
1161 braces[UCL_BRACE_SQUARE][1] ++;
1162 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1163 /* This is not a termination symbol, continue */
1164 ucl_chunk_skipc (chunk, p);
1165 continue;
1166 }
1167 }
1168 else if (*p == '$') {
1169 *var_expand = true;
1170 }
1171 else if (*p == '\\') {
1172 *need_unescape = true;
1173 ucl_chunk_skipc (chunk, p);
1174 if (p < chunk->end) {
1175 ucl_chunk_skipc (chunk, p);
1176 }
1177 continue;
1178 }
1179
1180 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1181 break;
1182 }
1183 ucl_chunk_skipc (chunk, p);
1184 }
1185
1186 if (p >= chunk->end) {
1187 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1188 return false;
1189 }
1190
1191 return true;
1192}
1193
1194/**
1195 * Parse multiline string ending with \n{term}\n
1196 * @param parser
1197 * @param chunk
1198 * @param term
1199 * @param term_len
1200 * @return size of multiline string or 0 in case of error
1201 */
1202static int
1203ucl_parse_multiline_string (struct ucl_parser *parser,
1204 struct ucl_chunk *chunk, const unsigned char *term,
1205 int term_len, unsigned char const **beg,
1206 bool *var_expand)
1207{
1208 const unsigned char *p, *c;
1209 bool newline = false;
1210 int len = 0;
1211
1212 p = chunk->pos;
1213
1214 c = p;
1215
1216 while (p < chunk->end) {
1217 if (newline) {
1218 if (chunk->end - p < term_len) {
1219 return 0;
1220 }
1221 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1222 len = p - c;
1223 chunk->remain -= term_len;
1224 chunk->pos = p + term_len;
1225 chunk->column = term_len;
1226 *beg = c;
1227 break;
1228 }
1229 }
1230 if (*p == '\n') {
1231 newline = true;
1232 }
1233 else {
1234 if (*p == '$') {
1235 *var_expand = true;
1236 }
1237 newline = false;
1238 }
1239 ucl_chunk_skipc (chunk, p);
1240 }
1241
1242 return len;
1243}
1244
1245static ucl_object_t*
1246ucl_get_value_object (struct ucl_parser *parser)
1247{
1248 ucl_object_t *t, *obj = NULL;
1249
1250 if (parser->stack->obj->type == UCL_ARRAY) {
1251 /* Object must be allocated */
1252 obj = ucl_object_new ();
1253 t = parser->stack->obj->value.av;
1254 DL_APPEND (t, obj);
1255 parser->cur_obj = obj;
1256 parser->stack->obj->value.av = t;
1257 parser->stack->obj->len ++;
1258 }
1259 else {
1260 /* Object has been already allocated */
1261 obj = parser->cur_obj;
1262 }
1263
1264 return obj;
1265}
1266
1267/**
1268 * Handle value data
1269 * @param parser
1270 * @param chunk
1271 * @return
1272 */
1273static bool
1274ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1275{
1276 const unsigned char *p, *c;
1277 ucl_object_t *obj = NULL;
1278 unsigned int stripped_spaces;
1279 int str_len;
1280 bool need_unescape = false, ucl_escape = false, var_expand = false;
1281
1282 p = chunk->pos;
1283
1284 /* Skip any spaces and comments */
1285 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1286 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1287 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1288 ucl_chunk_skipc (chunk, p);
1289 }
1290 if (!ucl_skip_comments (parser)) {
1291 return false;
1292 }
1293 p = chunk->pos;
1294 }
1295
1296 while (p < chunk->end) {
1297 c = p;
1298 switch (*p) {
1299 case '"':
1300 obj = ucl_get_value_object (parser);
1301 ucl_chunk_skipc (chunk, p);
1302 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1303 return false;
1304 }
1305 str_len = chunk->pos - c - 2;
1306 obj->type = UCL_STRING;
1307 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1308 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1309 return false;
1310 }
1311 obj->len = str_len;
1312 parser->state = UCL_STATE_AFTER_VALUE;
1313 p = chunk->pos;
1314 return true;
1315 break;
1316 case '{':
1317 obj = ucl_get_value_object (parser);
1318 /* We have a new object */
1319 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1320 if (obj == NULL) {
1321 return false;
1322 }
1311
1312 ucl_chunk_skipc (chunk, p);
1313 return true;
1314 break;
1315 case '[':
1316 obj = ucl_get_value_object (parser);
1317 /* We have a new array */
1318 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1323
1324 ucl_chunk_skipc (chunk, p);
1325 return true;
1326 break;
1327 case '[':
1328 obj = ucl_get_value_object (parser);
1329 /* We have a new array */
1330 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1331 if (obj == NULL) {
1332 return false;
1333 }
1319
1320 ucl_chunk_skipc (chunk, p);
1321 return true;
1322 break;
1323 case ']':
1324 /* We have the array ending */
1325 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1326 parser->state = UCL_STATE_AFTER_VALUE;
1327 return true;
1328 }
1329 else {
1330 goto parse_string;
1331 }
1332 break;
1333 case '<':
1334 obj = ucl_get_value_object (parser);
1335 /* We have something like multiline value, which must be <<[A-Z]+\n */
1336 if (chunk->end - p > 3) {
1337 if (memcmp (p, "<<", 2) == 0) {
1338 p += 2;
1339 /* We allow only uppercase characters in multiline definitions */
1340 while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1341 p ++;
1342 }
1343 if (*p =='\n') {
1344 /* Set chunk positions and start multiline parsing */
1345 c += 2;
1346 chunk->remain -= p - c;
1347 chunk->pos = p + 1;
1348 chunk->column = 0;
1349 chunk->line ++;
1350 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1351 p - c, &c, &var_expand)) == 0) {
1352 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1353 return false;
1354 }
1355 obj->type = UCL_STRING;
1356 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1357 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1358 return false;
1359 }
1360 obj->len = str_len;
1361 parser->state = UCL_STATE_AFTER_VALUE;
1362 return true;
1363 }
1364 }
1365 }
1366 /* Fallback to ordinary strings */
1367 default:
1368parse_string:
1369 if (obj == NULL) {
1370 obj = ucl_get_value_object (parser);
1371 }
1372 /* Parse atom */
1373 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1374 if (!ucl_lex_number (parser, chunk, obj)) {
1375 if (parser->state == UCL_STATE_ERROR) {
1376 return false;
1377 }
1378 }
1379 else {
1380 parser->state = UCL_STATE_AFTER_VALUE;
1381 return true;
1382 }
1383 /* Fallback to normal string */
1384 }
1385
1386 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1387 return false;
1388 }
1389 /* Cut trailing spaces */
1390 stripped_spaces = 0;
1391 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1392 UCL_CHARACTER_WHITESPACE)) {
1393 stripped_spaces ++;
1394 }
1395 str_len = chunk->pos - c - stripped_spaces;
1396 if (str_len <= 0) {
1397 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1398 return false;
1399 }
1400 else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1401 obj->len = 0;
1402 obj->type = UCL_NULL;
1403 }
1404 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1405 obj->type = UCL_STRING;
1406 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1407 &obj->value.sv, str_len, need_unescape,
1408 false, var_expand)) == -1) {
1409 return false;
1410 }
1411 obj->len = str_len;
1412 }
1413 parser->state = UCL_STATE_AFTER_VALUE;
1414 p = chunk->pos;
1415
1416 return true;
1417 break;
1418 }
1419 }
1420
1421 return true;
1422}
1423
1424/**
1425 * Handle after value data
1426 * @param parser
1427 * @param chunk
1428 * @return
1429 */
1430static bool
1431ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1432{
1433 const unsigned char *p;
1434 bool got_sep = false;
1435 struct ucl_stack *st;
1436
1437 p = chunk->pos;
1438
1439 while (p < chunk->end) {
1440 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1441 /* Skip whitespaces */
1442 ucl_chunk_skipc (chunk, p);
1443 }
1444 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1445 /* Skip comment */
1446 if (!ucl_skip_comments (parser)) {
1447 return false;
1448 }
1449 /* Treat comment as a separator */
1450 got_sep = true;
1451 p = chunk->pos;
1452 }
1453 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1454 if (*p == '}' || *p == ']') {
1455 if (parser->stack == NULL) {
1456 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1457 return false;
1458 }
1459 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1460 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1461
1462 /* Pop all nested objects from a stack */
1463 st = parser->stack;
1464 parser->stack = st->next;
1465 UCL_FREE (sizeof (struct ucl_stack), st);
1466
1467 while (parser->stack != NULL) {
1468 st = parser->stack;
1469 if (st->next == NULL || st->next->level == st->level) {
1470 break;
1471 }
1472 parser->stack = st->next;
1473 UCL_FREE (sizeof (struct ucl_stack), st);
1474 }
1475 }
1476 else {
1477 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1478 return false;
1479 }
1480
1481 if (parser->stack == NULL) {
1482 /* Ignore everything after a top object */
1483 return true;
1484 }
1485 else {
1486 ucl_chunk_skipc (chunk, p);
1487 }
1488 got_sep = true;
1489 }
1490 else {
1491 /* Got a separator */
1492 got_sep = true;
1493 ucl_chunk_skipc (chunk, p);
1494 }
1495 }
1496 else {
1497 /* Anything else */
1498 if (!got_sep) {
1499 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1500 return false;
1501 }
1502 return true;
1503 }
1504 }
1505
1506 return true;
1507}
1508
1509/**
1510 * Handle macro data
1511 * @param parser
1512 * @param chunk
1513 * @return
1514 */
1515static bool
1516ucl_parse_macro_value (struct ucl_parser *parser,
1517 struct ucl_chunk *chunk, struct ucl_macro *macro,
1518 unsigned char const **macro_start, size_t *macro_len)
1519{
1520 const unsigned char *p, *c;
1521 bool need_unescape = false, ucl_escape = false, var_expand = false;
1522
1523 p = chunk->pos;
1524
1525 switch (*p) {
1526 case '"':
1527 /* We have macro value encoded in quotes */
1528 c = p;
1529 ucl_chunk_skipc (chunk, p);
1530 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1531 return false;
1532 }
1533
1534 *macro_start = c + 1;
1535 *macro_len = chunk->pos - c - 2;
1536 p = chunk->pos;
1537 break;
1538 case '{':
1539 /* We got a multiline macro body */
1540 ucl_chunk_skipc (chunk, p);
1541 /* Skip spaces at the beginning */
1542 while (p < chunk->end) {
1543 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1544 ucl_chunk_skipc (chunk, p);
1545 }
1546 else {
1547 break;
1548 }
1549 }
1550 c = p;
1551 while (p < chunk->end) {
1552 if (*p == '}') {
1553 break;
1554 }
1555 ucl_chunk_skipc (chunk, p);
1556 }
1557 *macro_start = c;
1558 *macro_len = p - c;
1559 ucl_chunk_skipc (chunk, p);
1560 break;
1561 default:
1562 /* Macro is not enclosed in quotes or braces */
1563 c = p;
1564 while (p < chunk->end) {
1565 if (ucl_lex_is_atom_end (*p)) {
1566 break;
1567 }
1568 ucl_chunk_skipc (chunk, p);
1569 }
1570 *macro_start = c;
1571 *macro_len = p - c;
1572 break;
1573 }
1574
1575 /* We are at the end of a macro */
1576 /* Skip ';' and space characters and return to previous state */
1577 while (p < chunk->end) {
1578 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1579 break;
1580 }
1581 ucl_chunk_skipc (chunk, p);
1582 }
1583 return true;
1584}
1585
1586/**
1587 * Handle the main states of rcl parser
1588 * @param parser parser structure
1589 * @param data the pointer to the beginning of a chunk
1590 * @param len the length of a chunk
1591 * @return true if chunk has been parsed and false in case of error
1592 */
1593static bool
1594ucl_state_machine (struct ucl_parser *parser)
1595{
1596 ucl_object_t *obj;
1597 struct ucl_chunk *chunk = parser->chunks;
1598 const unsigned char *p, *c = NULL, *macro_start = NULL;
1599 unsigned char *macro_escaped;
1600 size_t macro_len = 0;
1601 struct ucl_macro *macro = NULL;
1602 bool next_key = false, end_of_object = false;
1603
1604 if (parser->top_obj == NULL) {
1605 if (*chunk->pos == '[') {
1606 obj = ucl_add_parser_stack (NULL, parser, true, 0);
1607 }
1608 else {
1609 obj = ucl_add_parser_stack (NULL, parser, false, 0);
1610 }
1334
1335 ucl_chunk_skipc (chunk, p);
1336 return true;
1337 break;
1338 case ']':
1339 /* We have the array ending */
1340 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1341 parser->state = UCL_STATE_AFTER_VALUE;
1342 return true;
1343 }
1344 else {
1345 goto parse_string;
1346 }
1347 break;
1348 case '<':
1349 obj = ucl_get_value_object (parser);
1350 /* We have something like multiline value, which must be <<[A-Z]+\n */
1351 if (chunk->end - p > 3) {
1352 if (memcmp (p, "<<", 2) == 0) {
1353 p += 2;
1354 /* We allow only uppercase characters in multiline definitions */
1355 while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1356 p ++;
1357 }
1358 if (*p =='\n') {
1359 /* Set chunk positions and start multiline parsing */
1360 c += 2;
1361 chunk->remain -= p - c;
1362 chunk->pos = p + 1;
1363 chunk->column = 0;
1364 chunk->line ++;
1365 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1366 p - c, &c, &var_expand)) == 0) {
1367 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1368 return false;
1369 }
1370 obj->type = UCL_STRING;
1371 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1372 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1373 return false;
1374 }
1375 obj->len = str_len;
1376 parser->state = UCL_STATE_AFTER_VALUE;
1377 return true;
1378 }
1379 }
1380 }
1381 /* Fallback to ordinary strings */
1382 default:
1383parse_string:
1384 if (obj == NULL) {
1385 obj = ucl_get_value_object (parser);
1386 }
1387 /* Parse atom */
1388 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1389 if (!ucl_lex_number (parser, chunk, obj)) {
1390 if (parser->state == UCL_STATE_ERROR) {
1391 return false;
1392 }
1393 }
1394 else {
1395 parser->state = UCL_STATE_AFTER_VALUE;
1396 return true;
1397 }
1398 /* Fallback to normal string */
1399 }
1400
1401 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1402 return false;
1403 }
1404 /* Cut trailing spaces */
1405 stripped_spaces = 0;
1406 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1407 UCL_CHARACTER_WHITESPACE)) {
1408 stripped_spaces ++;
1409 }
1410 str_len = chunk->pos - c - stripped_spaces;
1411 if (str_len <= 0) {
1412 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1413 return false;
1414 }
1415 else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1416 obj->len = 0;
1417 obj->type = UCL_NULL;
1418 }
1419 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1420 obj->type = UCL_STRING;
1421 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1422 &obj->value.sv, str_len, need_unescape,
1423 false, var_expand)) == -1) {
1424 return false;
1425 }
1426 obj->len = str_len;
1427 }
1428 parser->state = UCL_STATE_AFTER_VALUE;
1429 p = chunk->pos;
1430
1431 return true;
1432 break;
1433 }
1434 }
1435
1436 return true;
1437}
1438
1439/**
1440 * Handle after value data
1441 * @param parser
1442 * @param chunk
1443 * @return
1444 */
1445static bool
1446ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1447{
1448 const unsigned char *p;
1449 bool got_sep = false;
1450 struct ucl_stack *st;
1451
1452 p = chunk->pos;
1453
1454 while (p < chunk->end) {
1455 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1456 /* Skip whitespaces */
1457 ucl_chunk_skipc (chunk, p);
1458 }
1459 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1460 /* Skip comment */
1461 if (!ucl_skip_comments (parser)) {
1462 return false;
1463 }
1464 /* Treat comment as a separator */
1465 got_sep = true;
1466 p = chunk->pos;
1467 }
1468 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1469 if (*p == '}' || *p == ']') {
1470 if (parser->stack == NULL) {
1471 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1472 return false;
1473 }
1474 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1475 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1476
1477 /* Pop all nested objects from a stack */
1478 st = parser->stack;
1479 parser->stack = st->next;
1480 UCL_FREE (sizeof (struct ucl_stack), st);
1481
1482 while (parser->stack != NULL) {
1483 st = parser->stack;
1484 if (st->next == NULL || st->next->level == st->level) {
1485 break;
1486 }
1487 parser->stack = st->next;
1488 UCL_FREE (sizeof (struct ucl_stack), st);
1489 }
1490 }
1491 else {
1492 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1493 return false;
1494 }
1495
1496 if (parser->stack == NULL) {
1497 /* Ignore everything after a top object */
1498 return true;
1499 }
1500 else {
1501 ucl_chunk_skipc (chunk, p);
1502 }
1503 got_sep = true;
1504 }
1505 else {
1506 /* Got a separator */
1507 got_sep = true;
1508 ucl_chunk_skipc (chunk, p);
1509 }
1510 }
1511 else {
1512 /* Anything else */
1513 if (!got_sep) {
1514 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1515 return false;
1516 }
1517 return true;
1518 }
1519 }
1520
1521 return true;
1522}
1523
1524/**
1525 * Handle macro data
1526 * @param parser
1527 * @param chunk
1528 * @return
1529 */
1530static bool
1531ucl_parse_macro_value (struct ucl_parser *parser,
1532 struct ucl_chunk *chunk, struct ucl_macro *macro,
1533 unsigned char const **macro_start, size_t *macro_len)
1534{
1535 const unsigned char *p, *c;
1536 bool need_unescape = false, ucl_escape = false, var_expand = false;
1537
1538 p = chunk->pos;
1539
1540 switch (*p) {
1541 case '"':
1542 /* We have macro value encoded in quotes */
1543 c = p;
1544 ucl_chunk_skipc (chunk, p);
1545 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1546 return false;
1547 }
1548
1549 *macro_start = c + 1;
1550 *macro_len = chunk->pos - c - 2;
1551 p = chunk->pos;
1552 break;
1553 case '{':
1554 /* We got a multiline macro body */
1555 ucl_chunk_skipc (chunk, p);
1556 /* Skip spaces at the beginning */
1557 while (p < chunk->end) {
1558 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1559 ucl_chunk_skipc (chunk, p);
1560 }
1561 else {
1562 break;
1563 }
1564 }
1565 c = p;
1566 while (p < chunk->end) {
1567 if (*p == '}') {
1568 break;
1569 }
1570 ucl_chunk_skipc (chunk, p);
1571 }
1572 *macro_start = c;
1573 *macro_len = p - c;
1574 ucl_chunk_skipc (chunk, p);
1575 break;
1576 default:
1577 /* Macro is not enclosed in quotes or braces */
1578 c = p;
1579 while (p < chunk->end) {
1580 if (ucl_lex_is_atom_end (*p)) {
1581 break;
1582 }
1583 ucl_chunk_skipc (chunk, p);
1584 }
1585 *macro_start = c;
1586 *macro_len = p - c;
1587 break;
1588 }
1589
1590 /* We are at the end of a macro */
1591 /* Skip ';' and space characters and return to previous state */
1592 while (p < chunk->end) {
1593 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1594 break;
1595 }
1596 ucl_chunk_skipc (chunk, p);
1597 }
1598 return true;
1599}
1600
1601/**
1602 * Handle the main states of rcl parser
1603 * @param parser parser structure
1604 * @param data the pointer to the beginning of a chunk
1605 * @param len the length of a chunk
1606 * @return true if chunk has been parsed and false in case of error
1607 */
1608static bool
1609ucl_state_machine (struct ucl_parser *parser)
1610{
1611 ucl_object_t *obj;
1612 struct ucl_chunk *chunk = parser->chunks;
1613 const unsigned char *p, *c = NULL, *macro_start = NULL;
1614 unsigned char *macro_escaped;
1615 size_t macro_len = 0;
1616 struct ucl_macro *macro = NULL;
1617 bool next_key = false, end_of_object = false;
1618
1619 if (parser->top_obj == NULL) {
1620 if (*chunk->pos == '[') {
1621 obj = ucl_add_parser_stack (NULL, parser, true, 0);
1622 }
1623 else {
1624 obj = ucl_add_parser_stack (NULL, parser, false, 0);
1625 }
1626 if (obj == NULL) {
1627 return false;
1628 }
1611 parser->top_obj = obj;
1612 parser->cur_obj = obj;
1613 parser->state = UCL_STATE_INIT;
1614 }
1615
1616 p = chunk->pos;
1617 while (chunk->pos < chunk->end) {
1618 switch (parser->state) {
1619 case UCL_STATE_INIT:
1620 /*
1621 * At the init state we can either go to the parse array or object
1622 * if we got [ or { correspondingly or can just treat new data as
1623 * a key of newly created object
1624 */
1625 obj = parser->cur_obj;
1626 if (!ucl_skip_comments (parser)) {
1627 parser->prev_state = parser->state;
1628 parser->state = UCL_STATE_ERROR;
1629 return false;
1630 }
1631 else {
1632 p = chunk->pos;
1633 if (*p == '[') {
1634 parser->state = UCL_STATE_VALUE;
1635 ucl_chunk_skipc (chunk, p);
1636 }
1637 else {
1638 parser->state = UCL_STATE_KEY;
1639 if (*p == '{') {
1640 ucl_chunk_skipc (chunk, p);
1641 }
1642 }
1643 }
1644 break;
1645 case UCL_STATE_KEY:
1646 /* Skip any spaces */
1647 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1648 ucl_chunk_skipc (chunk, p);
1649 }
1650 if (*p == '}') {
1651 /* We have the end of an object */
1652 parser->state = UCL_STATE_AFTER_VALUE;
1653 continue;
1654 }
1655 if (parser->stack == NULL) {
1656 /* No objects are on stack, but we want to parse a key */
1657 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1658 "expects a key", &parser->err);
1659 parser->prev_state = parser->state;
1660 parser->state = UCL_STATE_ERROR;
1661 return false;
1662 }
1663 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1664 parser->prev_state = parser->state;
1665 parser->state = UCL_STATE_ERROR;
1666 return false;
1667 }
1668 if (end_of_object) {
1669 p = chunk->pos;
1670 parser->state = UCL_STATE_AFTER_VALUE;
1671 continue;
1672 }
1673 else if (parser->state != UCL_STATE_MACRO_NAME) {
1674 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1675 /* Parse more keys and nest objects accordingly */
1629 parser->top_obj = obj;
1630 parser->cur_obj = obj;
1631 parser->state = UCL_STATE_INIT;
1632 }
1633
1634 p = chunk->pos;
1635 while (chunk->pos < chunk->end) {
1636 switch (parser->state) {
1637 case UCL_STATE_INIT:
1638 /*
1639 * At the init state we can either go to the parse array or object
1640 * if we got [ or { correspondingly or can just treat new data as
1641 * a key of newly created object
1642 */
1643 obj = parser->cur_obj;
1644 if (!ucl_skip_comments (parser)) {
1645 parser->prev_state = parser->state;
1646 parser->state = UCL_STATE_ERROR;
1647 return false;
1648 }
1649 else {
1650 p = chunk->pos;
1651 if (*p == '[') {
1652 parser->state = UCL_STATE_VALUE;
1653 ucl_chunk_skipc (chunk, p);
1654 }
1655 else {
1656 parser->state = UCL_STATE_KEY;
1657 if (*p == '{') {
1658 ucl_chunk_skipc (chunk, p);
1659 }
1660 }
1661 }
1662 break;
1663 case UCL_STATE_KEY:
1664 /* Skip any spaces */
1665 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1666 ucl_chunk_skipc (chunk, p);
1667 }
1668 if (*p == '}') {
1669 /* We have the end of an object */
1670 parser->state = UCL_STATE_AFTER_VALUE;
1671 continue;
1672 }
1673 if (parser->stack == NULL) {
1674 /* No objects are on stack, but we want to parse a key */
1675 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1676 "expects a key", &parser->err);
1677 parser->prev_state = parser->state;
1678 parser->state = UCL_STATE_ERROR;
1679 return false;
1680 }
1681 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1682 parser->prev_state = parser->state;
1683 parser->state = UCL_STATE_ERROR;
1684 return false;
1685 }
1686 if (end_of_object) {
1687 p = chunk->pos;
1688 parser->state = UCL_STATE_AFTER_VALUE;
1689 continue;
1690 }
1691 else if (parser->state != UCL_STATE_MACRO_NAME) {
1692 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1693 /* Parse more keys and nest objects accordingly */
1676 obj = ucl_add_parser_stack (parser->cur_obj, parser, false, parser->stack->level + 1);
1694 obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1695 parser->stack->level + 1);
1696 if (obj == NULL) {
1697 return false;
1698 }
1677 }
1678 else {
1679 parser->state = UCL_STATE_VALUE;
1680 }
1681 }
1682 else {
1683 c = chunk->pos;
1684 }
1685 p = chunk->pos;
1686 break;
1687 case UCL_STATE_VALUE:
1688 /* We need to check what we do have */
1689 if (!ucl_parse_value (parser, chunk)) {
1690 parser->prev_state = parser->state;
1691 parser->state = UCL_STATE_ERROR;
1692 return false;
1693 }
1694 /* State is set in ucl_parse_value call */
1695 p = chunk->pos;
1696 break;
1697 case UCL_STATE_AFTER_VALUE:
1698 if (!ucl_parse_after_value (parser, chunk)) {
1699 parser->prev_state = parser->state;
1700 parser->state = UCL_STATE_ERROR;
1701 return false;
1702 }
1703 if (parser->stack != NULL) {
1704 if (parser->stack->obj->type == UCL_OBJECT) {
1705 parser->state = UCL_STATE_KEY;
1706 }
1707 else {
1708 /* Array */
1709 parser->state = UCL_STATE_VALUE;
1710 }
1711 }
1712 else {
1713 /* Skip everything at the end */
1714 return true;
1715 }
1716 p = chunk->pos;
1717 break;
1718 case UCL_STATE_MACRO_NAME:
1719 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1720 ucl_chunk_skipc (chunk, p);
1721 }
1722 else if (p - c > 0) {
1723 /* We got macro name */
1724 macro_len = (size_t)(p - c);
1725 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1726 if (macro == NULL) {
1727 ucl_create_err (&parser->err, "error on line %d at column %d: "
1728 "unknown macro: '%.*s', character: '%c'",
1729 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1730 parser->state = UCL_STATE_ERROR;
1731 return false;
1732 }
1733 /* Now we need to skip all spaces */
1734 while (p < chunk->end) {
1735 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1736 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1737 /* Skip comment */
1738 if (!ucl_skip_comments (parser)) {
1739 return false;
1740 }
1741 p = chunk->pos;
1742 }
1743 break;
1744 }
1745 ucl_chunk_skipc (chunk, p);
1746 }
1747 parser->state = UCL_STATE_MACRO;
1748 }
1749 break;
1750 case UCL_STATE_MACRO:
1751 if (!ucl_parse_macro_value (parser, chunk, macro,
1752 &macro_start, &macro_len)) {
1753 parser->prev_state = parser->state;
1754 parser->state = UCL_STATE_ERROR;
1755 return false;
1756 }
1757 macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1758 parser->state = parser->prev_state;
1759 if (macro_escaped == NULL) {
1760 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1761 return false;
1762 }
1763 }
1764 else {
1765 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1766 UCL_FREE (macro_len + 1, macro_escaped);
1767 return false;
1768 }
1769 UCL_FREE (macro_len + 1, macro_escaped);
1770 }
1771 p = chunk->pos;
1772 break;
1773 default:
1774 /* TODO: add all states */
1775 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1776 parser->state = UCL_STATE_ERROR;
1777 return false;
1778 }
1779 }
1780
1781 return true;
1782}
1783
1784struct ucl_parser*
1785ucl_parser_new (int flags)
1786{
1787 struct ucl_parser *new;
1788
1789 new = UCL_ALLOC (sizeof (struct ucl_parser));
1699 }
1700 else {
1701 parser->state = UCL_STATE_VALUE;
1702 }
1703 }
1704 else {
1705 c = chunk->pos;
1706 }
1707 p = chunk->pos;
1708 break;
1709 case UCL_STATE_VALUE:
1710 /* We need to check what we do have */
1711 if (!ucl_parse_value (parser, chunk)) {
1712 parser->prev_state = parser->state;
1713 parser->state = UCL_STATE_ERROR;
1714 return false;
1715 }
1716 /* State is set in ucl_parse_value call */
1717 p = chunk->pos;
1718 break;
1719 case UCL_STATE_AFTER_VALUE:
1720 if (!ucl_parse_after_value (parser, chunk)) {
1721 parser->prev_state = parser->state;
1722 parser->state = UCL_STATE_ERROR;
1723 return false;
1724 }
1725 if (parser->stack != NULL) {
1726 if (parser->stack->obj->type == UCL_OBJECT) {
1727 parser->state = UCL_STATE_KEY;
1728 }
1729 else {
1730 /* Array */
1731 parser->state = UCL_STATE_VALUE;
1732 }
1733 }
1734 else {
1735 /* Skip everything at the end */
1736 return true;
1737 }
1738 p = chunk->pos;
1739 break;
1740 case UCL_STATE_MACRO_NAME:
1741 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1742 ucl_chunk_skipc (chunk, p);
1743 }
1744 else if (p - c > 0) {
1745 /* We got macro name */
1746 macro_len = (size_t)(p - c);
1747 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1748 if (macro == NULL) {
1749 ucl_create_err (&parser->err, "error on line %d at column %d: "
1750 "unknown macro: '%.*s', character: '%c'",
1751 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1752 parser->state = UCL_STATE_ERROR;
1753 return false;
1754 }
1755 /* Now we need to skip all spaces */
1756 while (p < chunk->end) {
1757 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1758 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1759 /* Skip comment */
1760 if (!ucl_skip_comments (parser)) {
1761 return false;
1762 }
1763 p = chunk->pos;
1764 }
1765 break;
1766 }
1767 ucl_chunk_skipc (chunk, p);
1768 }
1769 parser->state = UCL_STATE_MACRO;
1770 }
1771 break;
1772 case UCL_STATE_MACRO:
1773 if (!ucl_parse_macro_value (parser, chunk, macro,
1774 &macro_start, &macro_len)) {
1775 parser->prev_state = parser->state;
1776 parser->state = UCL_STATE_ERROR;
1777 return false;
1778 }
1779 macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1780 parser->state = parser->prev_state;
1781 if (macro_escaped == NULL) {
1782 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1783 return false;
1784 }
1785 }
1786 else {
1787 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1788 UCL_FREE (macro_len + 1, macro_escaped);
1789 return false;
1790 }
1791 UCL_FREE (macro_len + 1, macro_escaped);
1792 }
1793 p = chunk->pos;
1794 break;
1795 default:
1796 /* TODO: add all states */
1797 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1798 parser->state = UCL_STATE_ERROR;
1799 return false;
1800 }
1801 }
1802
1803 return true;
1804}
1805
1806struct ucl_parser*
1807ucl_parser_new (int flags)
1808{
1809 struct ucl_parser *new;
1810
1811 new = UCL_ALLOC (sizeof (struct ucl_parser));
1812 if (new == NULL) {
1813 return NULL;
1814 }
1790 memset (new, 0, sizeof (struct ucl_parser));
1791
1792 ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1793 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1794 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1795
1796 new->flags = flags;
1797
1798 /* Initial assumption about filevars */
1799 ucl_parser_set_filevars (new, NULL, false);
1800
1801 return new;
1802}
1803
1804
1805void
1806ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1807 ucl_macro_handler handler, void* ud)
1808{
1809 struct ucl_macro *new;
1810
1815 memset (new, 0, sizeof (struct ucl_parser));
1816
1817 ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1818 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1819 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1820
1821 new->flags = flags;
1822
1823 /* Initial assumption about filevars */
1824 ucl_parser_set_filevars (new, NULL, false);
1825
1826 return new;
1827}
1828
1829
1830void
1831ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1832 ucl_macro_handler handler, void* ud)
1833{
1834 struct ucl_macro *new;
1835
1836 if (macro == NULL || handler == NULL) {
1837 return;
1838 }
1811 new = UCL_ALLOC (sizeof (struct ucl_macro));
1839 new = UCL_ALLOC (sizeof (struct ucl_macro));
1840 if (new == NULL) {
1841 return;
1842 }
1812 memset (new, 0, sizeof (struct ucl_macro));
1813 new->handler = handler;
1814 new->name = strdup (macro);
1815 new->ud = ud;
1816 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1817}
1818
1819void
1820ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1821 const char *value)
1822{
1823 struct ucl_variable *new = NULL, *cur;
1824
1825 if (var == NULL) {
1826 return;
1827 }
1828
1829 /* Find whether a variable already exists */
1830 LL_FOREACH (parser->variables, cur) {
1831 if (strcmp (cur->var, var) == 0) {
1832 new = cur;
1833 break;
1834 }
1835 }
1836
1837 if (value == NULL) {
1838
1839 if (new != NULL) {
1840 /* Remove variable */
1841 LL_DELETE (parser->variables, new);
1842 free (new->var);
1843 free (new->value);
1844 UCL_FREE (sizeof (struct ucl_variable), new);
1845 }
1846 else {
1847 /* Do nothing */
1848 return;
1849 }
1850 }
1851 else {
1852 if (new == NULL) {
1853 new = UCL_ALLOC (sizeof (struct ucl_variable));
1843 memset (new, 0, sizeof (struct ucl_macro));
1844 new->handler = handler;
1845 new->name = strdup (macro);
1846 new->ud = ud;
1847 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1848}
1849
1850void
1851ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1852 const char *value)
1853{
1854 struct ucl_variable *new = NULL, *cur;
1855
1856 if (var == NULL) {
1857 return;
1858 }
1859
1860 /* Find whether a variable already exists */
1861 LL_FOREACH (parser->variables, cur) {
1862 if (strcmp (cur->var, var) == 0) {
1863 new = cur;
1864 break;
1865 }
1866 }
1867
1868 if (value == NULL) {
1869
1870 if (new != NULL) {
1871 /* Remove variable */
1872 LL_DELETE (parser->variables, new);
1873 free (new->var);
1874 free (new->value);
1875 UCL_FREE (sizeof (struct ucl_variable), new);
1876 }
1877 else {
1878 /* Do nothing */
1879 return;
1880 }
1881 }
1882 else {
1883 if (new == NULL) {
1884 new = UCL_ALLOC (sizeof (struct ucl_variable));
1885 if (new == NULL) {
1886 return;
1887 }
1854 memset (new, 0, sizeof (struct ucl_variable));
1855 new->var = strdup (var);
1856 new->var_len = strlen (var);
1857 new->value = strdup (value);
1858 new->value_len = strlen (value);
1859
1860 LL_PREPEND (parser->variables, new);
1861 }
1862 else {
1863 free (new->value);
1864 new->value = strdup (value);
1865 new->value_len = strlen (value);
1866 }
1867 }
1868}
1869
1870bool
1871ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1872 size_t len)
1873{
1874 struct ucl_chunk *chunk;
1875
1888 memset (new, 0, sizeof (struct ucl_variable));
1889 new->var = strdup (var);
1890 new->var_len = strlen (var);
1891 new->value = strdup (value);
1892 new->value_len = strlen (value);
1893
1894 LL_PREPEND (parser->variables, new);
1895 }
1896 else {
1897 free (new->value);
1898 new->value = strdup (value);
1899 new->value_len = strlen (value);
1900 }
1901 }
1902}
1903
1904bool
1905ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1906 size_t len)
1907{
1908 struct ucl_chunk *chunk;
1909
1910 if (data == NULL || len == 0) {
1911 ucl_create_err (&parser->err, "invalid chunk added");
1912 return false;
1913 }
1876 if (parser->state != UCL_STATE_ERROR) {
1877 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1914 if (parser->state != UCL_STATE_ERROR) {
1915 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1916 if (chunk == NULL) {
1917 ucl_create_err (&parser->err, "cannot allocate chunk structure");
1918 return false;
1919 }
1878 chunk->begin = data;
1879 chunk->remain = len;
1880 chunk->pos = chunk->begin;
1881 chunk->end = chunk->begin + len;
1882 chunk->line = 1;
1883 chunk->column = 0;
1884 LL_PREPEND (parser->chunks, chunk);
1885 parser->recursion ++;
1886 if (parser->recursion > UCL_MAX_RECURSION) {
1887 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1888 parser->recursion);
1889 return false;
1890 }
1891 return ucl_state_machine (parser);
1892 }
1893
1894 ucl_create_err (&parser->err, "a parser is in an invalid state");
1895
1896 return false;
1897}
1920 chunk->begin = data;
1921 chunk->remain = len;
1922 chunk->pos = chunk->begin;
1923 chunk->end = chunk->begin + len;
1924 chunk->line = 1;
1925 chunk->column = 0;
1926 LL_PREPEND (parser->chunks, chunk);
1927 parser->recursion ++;
1928 if (parser->recursion > UCL_MAX_RECURSION) {
1929 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1930 parser->recursion);
1931 return false;
1932 }
1933 return ucl_state_machine (parser);
1934 }
1935
1936 ucl_create_err (&parser->err, "a parser is in an invalid state");
1937
1938 return false;
1939}
1940
1941bool
1942ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1943 size_t len)
1944{
1945 if (data == NULL) {
1946 ucl_create_err (&parser->err, "invalid string added");
1947 return false;
1948 }
1949 if (len == 0) {
1950 len = strlen (data);
1951 }
1952
1953 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1954}