1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *       * Redistributions of source code must retain the above copyright
7 *         notice, this list of conditions and the following disclaimer.
8 *       * Redistributions in binary form must reproduce the above copyright
9 *         notice, this list of conditions and the following disclaimer in the
10 *         documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#include "ucl.h"
25#include "ucl_internal.h"
26#include "ucl_chartable.h"
27
28/**
29 * @file rcl_parser.c
30 * The implementation of rcl parser
31 */
32
33struct ucl_parser_saved_state {
34	unsigned int line;
35	unsigned int column;
36	size_t remain;
37	const unsigned char *pos;
38};
39
40/**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47#define ucl_chunk_skipc(chunk, p)    do{					\
48    if (*(p) == '\n') {										\
49        (chunk)->line ++;									\
50        (chunk)->column = 0;								\
51    }														\
52    else (chunk)->column ++;								\
53    (p++);													\
54    (chunk)->pos ++;										\
55    (chunk)->remain --;										\
56    } while (0)
57
58/**
59 * Save parser state
60 * @param chunk
61 * @param s
62 */
63static inline void
64ucl_chunk_save_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
65{
66	s->column = chunk->column;
67	s->pos = chunk->pos;
68	s->line = chunk->line;
69	s->remain = chunk->remain;
70}
71
72/**
73 * Restore parser state
74 * @param chunk
75 * @param s
76 */
77static inline void
78ucl_chunk_restore_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
79{
80	chunk->column = s->column;
81	chunk->pos = s->pos;
82	chunk->line = s->line;
83	chunk->remain = s->remain;
84}
85
86static inline void
87ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
88{
89	if (chunk->pos < chunk->end) {
90		if (isgraph (*chunk->pos)) {
91			ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
92					chunk->line, chunk->column, str, *chunk->pos);
93		}
94		else {
95			ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
96					chunk->line, chunk->column, str, (int)*chunk->pos);
97		}
98	}
99	else {
100		ucl_create_err (err, "error at the end of chunk: %s", str);
101	}
102}
103
104/**
105 * Skip all comments from the current pos resolving nested and multiline comments
106 * @param parser
107 * @return
108 */
109static bool
110ucl_skip_comments (struct ucl_parser *parser)
111{
112	struct ucl_chunk *chunk = parser->chunks;
113	const unsigned char *p;
114	int comments_nested = 0;
115
116	p = chunk->pos;
117
118start:
119	if (*p == '#') {
120		if (parser->state != UCL_STATE_SCOMMENT &&
121				parser->state != UCL_STATE_MCOMMENT) {
122			while (p < chunk->end) {
123				if (*p == '\n') {
124					ucl_chunk_skipc (chunk, p);
125					goto start;
126				}
127				ucl_chunk_skipc (chunk, p);
128			}
129		}
130	}
131	else if (*p == '/' && chunk->remain >= 2) {
132		if (p[1] == '*') {
133			ucl_chunk_skipc (chunk, p);
134			comments_nested ++;
135			ucl_chunk_skipc (chunk, p);
136
137			while (p < chunk->end) {
138				if (*p == '*') {
139					ucl_chunk_skipc (chunk, p);
140					if (*p == '/') {
141						comments_nested --;
142						if (comments_nested == 0) {
143							ucl_chunk_skipc (chunk, p);
144							goto start;
145						}
146					}
147					ucl_chunk_skipc (chunk, p);
148				}
149				else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
150					comments_nested ++;
151					ucl_chunk_skipc (chunk, p);
152					ucl_chunk_skipc (chunk, p);
153					continue;
154				}
155				ucl_chunk_skipc (chunk, p);
156			}
157			if (comments_nested != 0) {
158				ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
159				return false;
160			}
161		}
162	}
163
164	return true;
165}
166
167/**
168 * Return multiplier for a character
169 * @param c multiplier character
170 * @param is_bytes if true use 1024 multiplier
171 * @return multiplier
172 */
173static inline unsigned long
174ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
175	const struct {
176		char c;
177		long mult_normal;
178		long mult_bytes;
179	} multipliers[] = {
180			{'m', 1000 * 1000, 1024 * 1024},
181			{'k', 1000, 1024},
182			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
183	};
184	int i;
185
186	for (i = 0; i < 3; i ++) {
187		if (tolower (c) == multipliers[i].c) {
188			if (is_bytes) {
189				return multipliers[i].mult_bytes;
190			}
191			return multipliers[i].mult_normal;
192		}
193	}
194
195	return 1;
196}
197
198
199/**
200 * Return multiplier for time scaling
201 * @param c
202 * @return
203 */
204static inline double
205ucl_lex_time_multiplier (const unsigned char c) {
206	const struct {
207		char c;
208		double mult;
209	} multipliers[] = {
210			{'m', 60},
211			{'h', 60 * 60},
212			{'d', 60 * 60 * 24},
213			{'w', 60 * 60 * 24 * 7},
214			{'y', 60 * 60 * 24 * 7 * 365}
215	};
216	int i;
217
218	for (i = 0; i < 5; i ++) {
219		if (tolower (c) == multipliers[i].c) {
220			return multipliers[i].mult;
221		}
222	}
223
224	return 1;
225}
226
227/**
228 * Return true if a character is a end of an atom
229 * @param c
230 * @return
231 */
232static inline bool
233ucl_lex_is_atom_end (const unsigned char c)
234{
235	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
236}
237
238static inline bool
239ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
240{
241	if (c1 == '/') {
242		if (c2 == '*') {
243			return true;
244		}
245	}
246	else if (c1 == '#') {
247		return true;
248	}
249	return false;
250}
251
252/**
253 * Check variable found
254 * @param parser
255 * @param ptr
256 * @param remain
257 * @param out_len
258 * @param strict
259 * @param found
260 * @return
261 */
262static inline const char *
263ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
264		size_t *out_len, bool strict, bool *found)
265{
266	struct ucl_variable *var;
267
268	LL_FOREACH (parser->variables, var) {
269		if (strict) {
270			if (remain == var->var_len) {
271				if (memcmp (ptr, var->var, var->var_len) == 0) {
272					*out_len += var->value_len;
273					*found = true;
274					return (ptr + var->var_len);
275				}
276			}
277		}
278		else {
279			if (remain >= var->var_len) {
280				if (memcmp (ptr, var->var, var->var_len) == 0) {
281					*out_len += var->value_len;
282					*found = true;
283					return (ptr + var->var_len);
284				}
285			}
286		}
287	}
288
289	return ptr;
290}
291
292/**
293 * Check for a variable in a given string
294 * @param parser
295 * @param ptr
296 * @param remain
297 * @param out_len
298 * @param vars_found
299 * @return
300 */
301static const char *
302ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
303{
304	const char *p, *end, *ret = ptr;
305	bool found = false;
306
307	if (*ptr == '{') {
308		/* We need to match the variable enclosed in braces */
309		p = ptr + 1;
310		end = ptr + remain;
311		while (p < end) {
312			if (*p == '}') {
313				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
314				if (found) {
315					/* {} must be excluded actually */
316					ret ++;
317					if (!*vars_found) {
318						*vars_found = true;
319					}
320				}
321				else {
322					*out_len += 2;
323				}
324				break;
325			}
326			p ++;
327		}
328	}
329	else if (*ptr != '$') {
330		/* Not count escaped dollar sign */
331		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
332		if (found && !*vars_found) {
333			*vars_found = true;
334		}
335		if (!found) {
336			(*out_len) ++;
337		}
338	}
339	else {
340		ret ++;
341		(*out_len) ++;
342	}
343
344	return ret;
345}
346
347/**
348 * Expand a single variable
349 * @param parser
350 * @param ptr
351 * @param remain
352 * @param dest
353 * @return
354 */
355static const char *
356ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
357		size_t remain, unsigned char **dest)
358{
359	unsigned char *d = *dest;
360	const char *p = ptr + 1, *ret;
361	struct ucl_variable *var;
362	bool found = false;
363
364	ret = ptr + 1;
365	remain --;
366
367	if (*p == '$') {
368		*d++ = *p++;
369		*dest = d;
370		return p;
371	}
372	else if (*p == '{') {
373		p ++;
374		ret += 2;
375		remain -= 2;
376	}
377
378	LL_FOREACH (parser->variables, var) {
379		if (remain >= var->var_len) {
380			if (memcmp (p, var->var, var->var_len) == 0) {
381				memcpy (d, var->value, var->value_len);
382				ret += var->var_len;
383				d += var->value_len;
384				found = true;
385				break;
386			}
387		}
388	}
389	if (!found) {
390		memcpy (d, ptr, 2);
391		d += 2;
392		ret --;
393	}
394
395	*dest = d;
396	return ret;
397}
398
399/**
400 * Expand variables in string
401 * @param parser
402 * @param dst
403 * @param src
404 * @param in_len
405 * @return
406 */
407static ssize_t
408ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
409		const char *src, size_t in_len)
410{
411	const char *p, *end = src + in_len;
412	unsigned char *d;
413	size_t out_len = 0;
414	bool vars_found = false;
415
416	p = src;
417	while (p != end) {
418		if (*p == '$') {
419			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
420		}
421		else {
422			p ++;
423			out_len ++;
424		}
425	}
426
427	if (!vars_found) {
428		/* Trivial case */
429		*dst = NULL;
430		return in_len;
431	}
432
433	*dst = UCL_ALLOC (out_len + 1);
434	if (*dst == NULL) {
435		return in_len;
436	}
437
438	d = *dst;
439	p = src;
440	while (p != end) {
441		if (*p == '$') {
442			p = ucl_expand_single_variable (parser, p, end - p, &d);
443		}
444		else {
445			*d++ = *p++;
446		}
447	}
448
449	*d = '\0';
450
451	return out_len;
452}
453
454/**
455 * Store or copy pointer to the trash stack
456 * @param parser parser object
457 * @param src src string
458 * @param dst destination buffer (trash stack pointer)
459 * @param dst_const const destination pointer (e.g. value of object)
460 * @param in_len input length
461 * @param need_unescape need to unescape source (and copy it)
462 * @param need_lowercase need to lowercase value (and copy)
463 * @param need_expand need to expand variables (and copy as well)
464 * @return output length (excluding \0 symbol)
465 */
466static inline ssize_t
467ucl_copy_or_store_ptr (struct ucl_parser *parser,
468		const unsigned char *src, unsigned char **dst,
469		const char **dst_const, size_t in_len,
470		bool need_unescape, bool need_lowercase, bool need_expand)
471{
472	ssize_t ret = -1, tret;
473	unsigned char *tmp;
474
475	if (need_unescape || need_lowercase ||
476			(need_expand && parser->variables != NULL) ||
477			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
478		/* Copy string */
479		*dst = UCL_ALLOC (in_len + 1);
480		if (*dst == NULL) {
481			ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
482			return false;
483		}
484		if (need_lowercase) {
485			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
486		}
487		else {
488			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
489		}
490
491		if (need_unescape) {
492			ret = ucl_unescape_json_string (*dst, ret);
493		}
494		if (need_expand) {
495			tmp = *dst;
496			tret = ret;
497			ret = ucl_expand_variable (parser, dst, tmp, ret);
498			if (*dst == NULL) {
499				/* Nothing to expand */
500				*dst = tmp;
501				ret = tret;
502			}
503		}
504		*dst_const = *dst;
505	}
506	else {
507		*dst_const = src;
508		ret = in_len;
509	}
510
511	return ret;
512}
513
514/**
515 * Create and append an object at the specified level
516 * @param parser
517 * @param is_array
518 * @param level
519 * @return
520 */
521static inline ucl_object_t *
522ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
523{
524	struct ucl_stack *st;
525
526	if (!is_array) {
527		if (obj == NULL) {
528			obj = ucl_object_typed_new (UCL_OBJECT);
529		}
530		else {
531			obj->type = UCL_OBJECT;
532		}
533		obj->value.ov = ucl_hash_create ();
534		parser->state = UCL_STATE_KEY;
535	}
536	else {
537		if (obj == NULL) {
538			obj = ucl_object_typed_new (UCL_ARRAY);
539		}
540		else {
541			obj->type = UCL_ARRAY;
542		}
543		parser->state = UCL_STATE_VALUE;
544	}
545
546	st = UCL_ALLOC (sizeof (struct ucl_stack));
547	st->obj = obj;
548	st->level = level;
549	LL_PREPEND (parser->stack, st);
550	parser->cur_obj = obj;
551
552	return obj;
553}
554
555int
556ucl_maybe_parse_number (ucl_object_t *obj,
557		const char *start, const char *end, const char **pos, bool allow_double, bool number_bytes)
558{
559	const char *p = start, *c = start;
560	char *endptr;
561	bool got_dot = false, got_exp = false, need_double = false,
562			is_date = false, valid_start = false, is_hex = false,
563			is_neg = false;
564	double dv = 0;
565	int64_t lv = 0;
566
567	if (*p == '-') {
568		is_neg = true;
569		c ++;
570		p ++;
571	}
572	while (p < end) {
573		if (is_hex && isxdigit (*p)) {
574			p ++;
575		}
576		else if (isdigit (*p)) {
577			valid_start = true;
578			p ++;
579		}
580		else if (!is_hex && (*p == 'x' || *p == 'X')) {
581			is_hex = true;
582			allow_double = false;
583			c = p + 1;
584		}
585		else if (allow_double) {
586			if (p == c) {
587				/* Empty digits sequence, not a number */
588				*pos = start;
589				return EINVAL;
590			}
591			else if (*p == '.') {
592				if (got_dot) {
593					/* Double dots, not a number */
594					*pos = start;
595					return EINVAL;
596				}
597				else {
598					got_dot = true;
599					need_double = true;
600					p ++;
601				}
602			}
603			else if (*p == 'e' || *p == 'E') {
604				if (got_exp) {
605					/* Double exp, not a number */
606					*pos = start;
607					return EINVAL;
608				}
609				else {
610					got_exp = true;
611					need_double = true;
612					p ++;
613					if (p >= end) {
614						*pos = start;
615						return EINVAL;
616					}
617					if (!isdigit (*p) && *p != '+' && *p != '-') {
618						/* Wrong exponent sign */
619						*pos = start;
620						return EINVAL;
621					}
622					else {
623						p ++;
624					}
625				}
626			}
627			else {
628				/* Got the end of the number, need to check */
629				break;
630			}
631		}
632		else {
633			break;
634		}
635	}
636
637	if (!valid_start) {
638		*pos = start;
639		return EINVAL;
640	}
641
642	errno = 0;
643	if (need_double) {
644		dv = strtod (c, &endptr);
645	}
646	else {
647		if (is_hex) {
648			lv = strtoimax (c, &endptr, 16);
649		}
650		else {
651			lv = strtoimax (c, &endptr, 10);
652		}
653	}
654	if (errno == ERANGE) {
655		*pos = start;
656		return ERANGE;
657	}
658
659	/* Now check endptr */
660	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
661		p = endptr;
662		goto set_obj;
663	}
664
665	if (endptr < end && endptr != start) {
666		p = endptr;
667		switch (*p) {
668		case 'm':
669		case 'M':
670		case 'g':
671		case 'G':
672		case 'k':
673		case 'K':
674			if (end - p >= 2) {
675				if (p[1] == 's' || p[1] == 'S') {
676					/* Milliseconds */
677					if (!need_double) {
678						need_double = true;
679						dv = lv;
680					}
681					is_date = true;
682					if (p[0] == 'm' || p[0] == 'M') {
683						dv /= 1000.;
684					}
685					else {
686						dv *= ucl_lex_num_multiplier (*p, false);
687					}
688					p += 2;
689					goto set_obj;
690				}
691				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
692					/* Bytes */
693					if (need_double) {
694						need_double = false;
695						lv = dv;
696					}
697					lv *= ucl_lex_num_multiplier (*p, true);
698					p += 2;
699					goto set_obj;
700				}
701				else if (ucl_lex_is_atom_end (p[1])) {
702					if (need_double) {
703						dv *= ucl_lex_num_multiplier (*p, false);
704					}
705					else {
706						lv *= ucl_lex_num_multiplier (*p, number_bytes);
707					}
708					p ++;
709					goto set_obj;
710				}
711				else if (end - p >= 3) {
712					if (tolower (p[0]) == 'm' &&
713							tolower (p[1]) == 'i' &&
714							tolower (p[2]) == 'n') {
715						/* Minutes */
716						if (!need_double) {
717							need_double = true;
718							dv = lv;
719						}
720						is_date = true;
721						dv *= 60.;
722						p += 3;
723						goto set_obj;
724					}
725				}
726			}
727			else {
728				if (need_double) {
729					dv *= ucl_lex_num_multiplier (*p, false);
730				}
731				else {
732					lv *= ucl_lex_num_multiplier (*p, number_bytes);
733				}
734				p ++;
735				goto set_obj;
736			}
737			break;
738		case 'S':
739		case 's':
740			if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
741				if (!need_double) {
742					need_double = true;
743					dv = lv;
744				}
745				p ++;
746				is_date = true;
747				goto set_obj;
748			}
749			break;
750		case 'h':
751		case 'H':
752		case 'd':
753		case 'D':
754		case 'w':
755		case 'W':
756		case 'Y':
757		case 'y':
758			if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
759				if (!need_double) {
760					need_double = true;
761					dv = lv;
762				}
763				is_date = true;
764				dv *= ucl_lex_time_multiplier (*p);
765				p ++;
766				goto set_obj;
767			}
768			break;
769		}
770	}
771
772	*pos = c;
773	return EINVAL;
774
775	set_obj:
776	if (allow_double && (need_double || is_date)) {
777		if (!is_date) {
778			obj->type = UCL_FLOAT;
779		}
780		else {
781			obj->type = UCL_TIME;
782		}
783		obj->value.dv = is_neg ? (-dv) : dv;
784	}
785	else {
786		obj->type = UCL_INT;
787		obj->value.iv = is_neg ? (-lv) : lv;
788	}
789	*pos = p;
790	return 0;
791}
792
793/**
794 * Parse possible number
795 * @param parser
796 * @param chunk
797 * @return true if a number has been parsed
798 */
799static bool
800ucl_lex_number (struct ucl_parser *parser,
801		struct ucl_chunk *chunk, ucl_object_t *obj)
802{
803	const unsigned char *pos;
804	int ret;
805
806	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, true, false);
807
808	if (ret == 0) {
809		chunk->remain -= pos - chunk->pos;
810		chunk->column += pos - chunk->pos;
811		chunk->pos = pos;
812		return true;
813	}
814	else if (ret == ERANGE) {
815		ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
816	}
817
818	return false;
819}
820
821/**
822 * Parse quoted string with possible escapes
823 * @param parser
824 * @param chunk
825 * @return true if a string has been parsed
826 */
827static bool
828ucl_lex_json_string (struct ucl_parser *parser,
829		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
830{
831	const unsigned char *p = chunk->pos;
832	unsigned char c;
833	int i;
834
835	while (p < chunk->end) {
836		c = *p;
837		if (c < 0x1F) {
838			/* Unmasked control character */
839			if (c == '\n') {
840				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
841			}
842			else {
843				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
844			}
845			return false;
846		}
847		else if (c == '\\') {
848			ucl_chunk_skipc (chunk, p);
849			c = *p;
850			if (p >= chunk->end) {
851				ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
852				return false;
853			}
854			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
855				if (c == 'u') {
856					ucl_chunk_skipc (chunk, p);
857					for (i = 0; i < 4 && p < chunk->end; i ++) {
858						if (!isxdigit (*p)) {
859							ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
860							return false;
861						}
862						ucl_chunk_skipc (chunk, p);
863					}
864					if (p >= chunk->end) {
865						ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
866						return false;
867					}
868				}
869				else {
870					ucl_chunk_skipc (chunk, p);
871				}
872			}
873			*need_unescape = true;
874			*ucl_escape = true;
875			continue;
876		}
877		else if (c == '"') {
878			ucl_chunk_skipc (chunk, p);
879			return true;
880		}
881		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
882			*ucl_escape = true;
883		}
884		else if (c == '$') {
885			*var_expand = true;
886		}
887		ucl_chunk_skipc (chunk, p);
888	}
889
890	ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
891	return false;
892}
893
894/**
895 * Parse a key in an object
896 * @param parser
897 * @param chunk
898 * @return true if a key has been parsed
899 */
900static bool
901ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
902{
903	const unsigned char *p, *c = NULL, *end, *t;
904	const char *key = NULL;
905	bool got_quote = false, got_eq = false, got_semicolon = false,
906			need_unescape = false, ucl_escape = false, var_expand = false,
907			got_content = false, got_sep = false;
908	ucl_object_t *nobj, *tobj;
909	ucl_hash_t *container;
910	ssize_t keylen;
911
912	p = chunk->pos;
913
914	if (*p == '.') {
915		/* It is macro actually */
916		ucl_chunk_skipc (chunk, p);
917		parser->prev_state = parser->state;
918		parser->state = UCL_STATE_MACRO_NAME;
919		return true;
920	}
921	while (p < chunk->end) {
922		/*
923		 * A key must start with alpha, number, '/' or '_' and end with space character
924		 */
925		if (c == NULL) {
926			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
927				if (!ucl_skip_comments (parser)) {
928					return false;
929				}
930				p = chunk->pos;
931			}
932			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
933				ucl_chunk_skipc (chunk, p);
934			}
935			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
936				/* The first symbol */
937				c = p;
938				ucl_chunk_skipc (chunk, p);
939				got_content = true;
940			}
941			else if (*p == '"') {
942				/* JSON style key */
943				c = p + 1;
944				got_quote = true;
945				got_content = true;
946				ucl_chunk_skipc (chunk, p);
947			}
948			else if (*p == '}') {
949				/* We have actually end of an object */
950				*end_of_object = true;
951				return true;
952			}
953			else if (*p == '.') {
954				ucl_chunk_skipc (chunk, p);
955				parser->prev_state = parser->state;
956				parser->state = UCL_STATE_MACRO_NAME;
957				return true;
958			}
959			else {
960				/* Invalid identifier */
961				ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
962				return false;
963			}
964		}
965		else {
966			/* Parse the body of a key */
967			if (!got_quote) {
968				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
969					got_content = true;
970					ucl_chunk_skipc (chunk, p);
971				}
972				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
973					end = p;
974					break;
975				}
976				else {
977					ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
978					return false;
979				}
980			}
981			else {
982				/* We need to parse json like quoted string */
983				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
984					return false;
985				}
986				/* Always escape keys obtained via json */
987				end = chunk->pos - 1;
988				p = chunk->pos;
989				break;
990			}
991		}
992	}
993
994	if (p >= chunk->end && got_content) {
995		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
996		return false;
997	}
998	else if (!got_content) {
999		return true;
1000	}
1001	*end_of_object = false;
1002	/* We are now at the end of the key, need to parse the rest */
1003	while (p < chunk->end) {
1004		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1005			ucl_chunk_skipc (chunk, p);
1006		}
1007		else if (*p == '=') {
1008			if (!got_eq && !got_semicolon) {
1009				ucl_chunk_skipc (chunk, p);
1010				got_eq = true;
1011			}
1012			else {
1013				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1014				return false;
1015			}
1016		}
1017		else if (*p == ':') {
1018			if (!got_eq && !got_semicolon) {
1019				ucl_chunk_skipc (chunk, p);
1020				got_semicolon = true;
1021			}
1022			else {
1023				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1024				return false;
1025			}
1026		}
1027		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1028			/* Check for comment */
1029			if (!ucl_skip_comments (parser)) {
1030				return false;
1031			}
1032			p = chunk->pos;
1033		}
1034		else {
1035			/* Start value */
1036			break;
1037		}
1038	}
1039
1040	if (p >= chunk->end && got_content) {
1041		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1042		return false;
1043	}
1044
1045	got_sep = got_semicolon || got_eq;
1046
1047	if (!got_sep) {
1048		/*
1049		 * Maybe we have more keys nested, so search for termination character.
1050		 * Possible choices:
1051		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1052		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1053		 * 3) key1 value[;,\n] <- we treat that as linear object
1054		 */
1055		t = p;
1056		*next_key = false;
1057		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1058			t ++;
1059		}
1060		/* Check first non-space character after a key */
1061		if (*t != '{' && *t != '[') {
1062			while (t < chunk->end) {
1063				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1064					break;
1065				}
1066				else if (*t == '{' || *t == '[') {
1067					*next_key = true;
1068					break;
1069				}
1070				t ++;
1071			}
1072		}
1073	}
1074
1075	/* Create a new object */
1076	nobj = ucl_object_new ();
1077	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1078			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1079	if (keylen == -1) {
1080		ucl_object_free(nobj);
1081		return false;
1082	}
1083	else if (keylen == 0) {
1084		ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1085		ucl_object_free(nobj);
1086		return false;
1087	}
1088
1089	container = parser->stack->obj->value.ov;
1090	nobj->key = key;
1091	nobj->keylen = keylen;
1092	tobj = ucl_hash_search_obj (container, nobj);
1093	if (tobj == NULL) {
1094		container = ucl_hash_insert_object (container, nobj);
1095		nobj->prev = nobj;
1096		nobj->next = NULL;
1097		parser->stack->obj->len ++;
1098	}
1099	else {
1100		DL_APPEND (tobj, nobj);
1101	}
1102
1103	if (ucl_escape) {
1104		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1105	}
1106	parser->stack->obj->value.ov = container;
1107
1108	parser->cur_obj = nobj;
1109
1110	return true;
1111}
1112
1113/**
1114 * Parse a cl string
1115 * @param parser
1116 * @param chunk
1117 * @return true if a key has been parsed
1118 */
1119static bool
1120ucl_parse_string_value (struct ucl_parser *parser,
1121		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1122{
1123	const unsigned char *p;
1124	enum {
1125		UCL_BRACE_ROUND = 0,
1126		UCL_BRACE_SQUARE,
1127		UCL_BRACE_FIGURE
1128	};
1129	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1130
1131	p = chunk->pos;
1132
1133	while (p < chunk->end) {
1134
1135		/* Skip pairs of figure braces */
1136		if (*p == '{') {
1137			braces[UCL_BRACE_FIGURE][0] ++;
1138		}
1139		else if (*p == '}') {
1140			braces[UCL_BRACE_FIGURE][1] ++;
1141			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1142				/* This is not a termination symbol, continue */
1143				ucl_chunk_skipc (chunk, p);
1144				continue;
1145			}
1146		}
1147		/* Skip pairs of square braces */
1148		else if (*p == '[') {
1149			braces[UCL_BRACE_SQUARE][0] ++;
1150		}
1151		else if (*p == ']') {
1152			braces[UCL_BRACE_SQUARE][1] ++;
1153			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1154				/* This is not a termination symbol, continue */
1155				ucl_chunk_skipc (chunk, p);
1156				continue;
1157			}
1158		}
1159		else if (*p == '$') {
1160			*var_expand = true;
1161		}
1162		else if (*p == '\\') {
1163			*need_unescape = true;
1164			ucl_chunk_skipc (chunk, p);
1165			if (p < chunk->end) {
1166				ucl_chunk_skipc (chunk, p);
1167			}
1168			continue;
1169		}
1170
1171		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1172			break;
1173		}
1174		ucl_chunk_skipc (chunk, p);
1175	}
1176
1177	if (p >= chunk->end) {
1178		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1179		return false;
1180	}
1181
1182	return true;
1183}
1184
1185/**
1186 * Parse multiline string ending with \n{term}\n
1187 * @param parser
1188 * @param chunk
1189 * @param term
1190 * @param term_len
1191 * @return size of multiline string or 0 in case of error
1192 */
1193static int
1194ucl_parse_multiline_string (struct ucl_parser *parser,
1195		struct ucl_chunk *chunk, const unsigned char *term,
1196		int term_len, unsigned char const **beg,
1197		bool *var_expand)
1198{
1199	const unsigned char *p, *c;
1200	bool newline = false;
1201	int len = 0;
1202
1203	p = chunk->pos;
1204
1205	c = p;
1206
1207	while (p < chunk->end) {
1208		if (newline) {
1209			if (chunk->end - p < term_len) {
1210				return 0;
1211			}
1212			else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1213				len = p - c;
1214				chunk->remain -= term_len;
1215				chunk->pos = p + term_len;
1216				chunk->column = term_len;
1217				*beg = c;
1218				break;
1219			}
1220		}
1221		if (*p == '\n') {
1222			newline = true;
1223		}
1224		else {
1225			if (*p == '$') {
1226				*var_expand = true;
1227			}
1228			newline = false;
1229		}
1230		ucl_chunk_skipc (chunk, p);
1231	}
1232
1233	return len;
1234}
1235
1236static ucl_object_t*
1237ucl_get_value_object (struct ucl_parser *parser)
1238{
1239	ucl_object_t *t, *obj = NULL;
1240
1241	if (parser->stack->obj->type == UCL_ARRAY) {
1242		/* Object must be allocated */
1243		obj = ucl_object_new ();
1244		t = parser->stack->obj->value.av;
1245		DL_APPEND (t, obj);
1246		parser->cur_obj = obj;
1247		parser->stack->obj->value.av = t;
1248		parser->stack->obj->len ++;
1249	}
1250	else {
1251		/* Object has been already allocated */
1252		obj = parser->cur_obj;
1253	}
1254
1255	return obj;
1256}
1257
1258/**
1259 * Handle value data
1260 * @param parser
1261 * @param chunk
1262 * @return
1263 */
1264static bool
1265ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1266{
1267	const unsigned char *p, *c;
1268	ucl_object_t *obj = NULL;
1269	unsigned int stripped_spaces;
1270	int str_len;
1271	bool need_unescape = false, ucl_escape = false, var_expand = false;
1272
1273	p = chunk->pos;
1274
1275	/* Skip any spaces and comments */
1276	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1277			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1278		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1279			ucl_chunk_skipc (chunk, p);
1280		}
1281		if (!ucl_skip_comments (parser)) {
1282			return false;
1283		}
1284		p = chunk->pos;
1285	}
1286
1287	while (p < chunk->end) {
1288		c = p;
1289		switch (*p) {
1290		case '"':
1291			obj = ucl_get_value_object (parser);
1292			ucl_chunk_skipc (chunk, p);
1293			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1294				return false;
1295			}
1296			str_len = chunk->pos - c - 2;
1297			obj->type = UCL_STRING;
1298			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1299					&obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1300				return false;
1301			}
1302			obj->len = str_len;
1303			parser->state = UCL_STATE_AFTER_VALUE;
1304			p = chunk->pos;
1305			return true;
1306			break;
1307		case '{':
1308			obj = ucl_get_value_object (parser);
1309			/* We have a new object */
1310			obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1311
1312			ucl_chunk_skipc (chunk, p);
1313			return true;
1314			break;
1315		case '[':
1316			obj = ucl_get_value_object (parser);
1317			/* We have a new array */
1318			obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1319
1320			ucl_chunk_skipc (chunk, p);
1321			return true;
1322			break;
1323		case ']':
1324			/* We have the array ending */
1325			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1326				parser->state = UCL_STATE_AFTER_VALUE;
1327				return true;
1328			}
1329			else {
1330				goto parse_string;
1331			}
1332			break;
1333		case '<':
1334			obj = ucl_get_value_object (parser);
1335			/* We have something like multiline value, which must be <<[A-Z]+\n */
1336			if (chunk->end - p > 3) {
1337				if (memcmp (p, "<<", 2) == 0) {
1338					p += 2;
1339					/* We allow only uppercase characters in multiline definitions */
1340					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1341						p ++;
1342					}
1343					if (*p =='\n') {
1344						/* Set chunk positions and start multiline parsing */
1345						c += 2;
1346						chunk->remain -= p - c;
1347						chunk->pos = p + 1;
1348						chunk->column = 0;
1349						chunk->line ++;
1350						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1351								p - c, &c, &var_expand)) == 0) {
1352							ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1353							return false;
1354						}
1355						obj->type = UCL_STRING;
1356						if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1357							&obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1358							return false;
1359						}
1360						obj->len = str_len;
1361						parser->state = UCL_STATE_AFTER_VALUE;
1362						return true;
1363					}
1364				}
1365			}
1366			/* Fallback to ordinary strings */
1367		default:
1368parse_string:
1369			if (obj == NULL) {
1370				obj = ucl_get_value_object (parser);
1371			}
1372			/* Parse atom */
1373			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1374				if (!ucl_lex_number (parser, chunk, obj)) {
1375					if (parser->state == UCL_STATE_ERROR) {
1376						return false;
1377					}
1378				}
1379				else {
1380					parser->state = UCL_STATE_AFTER_VALUE;
1381					return true;
1382				}
1383				/* Fallback to normal string */
1384			}
1385
1386			if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1387				return false;
1388			}
1389			/* Cut trailing spaces */
1390			stripped_spaces = 0;
1391			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1392					UCL_CHARACTER_WHITESPACE)) {
1393				stripped_spaces ++;
1394			}
1395			str_len = chunk->pos - c - stripped_spaces;
1396			if (str_len <= 0) {
1397				ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1398				return false;
1399			}
1400			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1401				obj->len = 0;
1402				obj->type = UCL_NULL;
1403			}
1404			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1405				obj->type = UCL_STRING;
1406				if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1407						&obj->value.sv, str_len, need_unescape,
1408						false, var_expand)) == -1) {
1409					return false;
1410				}
1411				obj->len = str_len;
1412			}
1413			parser->state = UCL_STATE_AFTER_VALUE;
1414			p = chunk->pos;
1415
1416			return true;
1417			break;
1418		}
1419	}
1420
1421	return true;
1422}
1423
1424/**
1425 * Handle after value data
1426 * @param parser
1427 * @param chunk
1428 * @return
1429 */
1430static bool
1431ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1432{
1433	const unsigned char *p;
1434	bool got_sep = false;
1435	struct ucl_stack *st;
1436
1437	p = chunk->pos;
1438
1439	while (p < chunk->end) {
1440		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1441			/* Skip whitespaces */
1442			ucl_chunk_skipc (chunk, p);
1443		}
1444		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1445			/* Skip comment */
1446			if (!ucl_skip_comments (parser)) {
1447				return false;
1448			}
1449			/* Treat comment as a separator */
1450			got_sep = true;
1451			p = chunk->pos;
1452		}
1453		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1454			if (*p == '}' || *p == ']') {
1455				if (parser->stack == NULL) {
1456					ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1457					return false;
1458				}
1459				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1460						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1461
1462					/* Pop all nested objects from a stack */
1463					st = parser->stack;
1464					parser->stack = st->next;
1465					UCL_FREE (sizeof (struct ucl_stack), st);
1466
1467					while (parser->stack != NULL) {
1468						st = parser->stack;
1469						if (st->next == NULL || st->next->level == st->level) {
1470							break;
1471						}
1472						parser->stack = st->next;
1473						UCL_FREE (sizeof (struct ucl_stack), st);
1474					}
1475				}
1476				else {
1477					ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1478					return false;
1479				}
1480
1481				if (parser->stack == NULL) {
1482					/* Ignore everything after a top object */
1483					return true;
1484				}
1485				else {
1486					ucl_chunk_skipc (chunk, p);
1487				}
1488				got_sep = true;
1489			}
1490			else {
1491				/* Got a separator */
1492				got_sep = true;
1493				ucl_chunk_skipc (chunk, p);
1494			}
1495		}
1496		else {
1497			/* Anything else */
1498			if (!got_sep) {
1499				ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1500				return false;
1501			}
1502			return true;
1503		}
1504	}
1505
1506	return true;
1507}
1508
1509/**
1510 * Handle macro data
1511 * @param parser
1512 * @param chunk
1513 * @return
1514 */
1515static bool
1516ucl_parse_macro_value (struct ucl_parser *parser,
1517		struct ucl_chunk *chunk, struct ucl_macro *macro,
1518		unsigned char const **macro_start, size_t *macro_len)
1519{
1520	const unsigned char *p, *c;
1521	bool need_unescape = false, ucl_escape = false, var_expand = false;
1522
1523	p = chunk->pos;
1524
1525	switch (*p) {
1526	case '"':
1527		/* We have macro value encoded in quotes */
1528		c = p;
1529		ucl_chunk_skipc (chunk, p);
1530		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1531			return false;
1532		}
1533
1534		*macro_start = c + 1;
1535		*macro_len = chunk->pos - c - 2;
1536		p = chunk->pos;
1537		break;
1538	case '{':
1539		/* We got a multiline macro body */
1540		ucl_chunk_skipc (chunk, p);
1541		/* Skip spaces at the beginning */
1542		while (p < chunk->end) {
1543			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1544				ucl_chunk_skipc (chunk, p);
1545			}
1546			else {
1547				break;
1548			}
1549		}
1550		c = p;
1551		while (p < chunk->end) {
1552			if (*p == '}') {
1553				break;
1554			}
1555			ucl_chunk_skipc (chunk, p);
1556		}
1557		*macro_start = c;
1558		*macro_len = p - c;
1559		ucl_chunk_skipc (chunk, p);
1560		break;
1561	default:
1562		/* Macro is not enclosed in quotes or braces */
1563		c = p;
1564		while (p < chunk->end) {
1565			if (ucl_lex_is_atom_end (*p)) {
1566				break;
1567			}
1568			ucl_chunk_skipc (chunk, p);
1569		}
1570		*macro_start = c;
1571		*macro_len = p - c;
1572		break;
1573	}
1574
1575	/* We are at the end of a macro */
1576	/* Skip ';' and space characters and return to previous state */
1577	while (p < chunk->end) {
1578		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1579			break;
1580		}
1581		ucl_chunk_skipc (chunk, p);
1582	}
1583	return true;
1584}
1585
1586/**
1587 * Handle the main states of rcl parser
1588 * @param parser parser structure
1589 * @param data the pointer to the beginning of a chunk
1590 * @param len the length of a chunk
1591 * @return true if chunk has been parsed and false in case of error
1592 */
1593static bool
1594ucl_state_machine (struct ucl_parser *parser)
1595{
1596	ucl_object_t *obj;
1597	struct ucl_chunk *chunk = parser->chunks;
1598	const unsigned char *p, *c = NULL, *macro_start = NULL;
1599	unsigned char *macro_escaped;
1600	size_t macro_len = 0;
1601	struct ucl_macro *macro = NULL;
1602	bool next_key = false, end_of_object = false;
1603
1604	if (parser->top_obj == NULL) {
1605		if (*chunk->pos == '[') {
1606			obj = ucl_add_parser_stack (NULL, parser, true, 0);
1607		}
1608		else {
1609			obj = ucl_add_parser_stack (NULL, parser, false, 0);
1610		}
1611		parser->top_obj = obj;
1612		parser->cur_obj = obj;
1613		parser->state = UCL_STATE_INIT;
1614	}
1615
1616	p = chunk->pos;
1617	while (chunk->pos < chunk->end) {
1618		switch (parser->state) {
1619		case UCL_STATE_INIT:
1620			/*
1621			 * At the init state we can either go to the parse array or object
1622			 * if we got [ or { correspondingly or can just treat new data as
1623			 * a key of newly created object
1624			 */
1625			obj = parser->cur_obj;
1626			if (!ucl_skip_comments (parser)) {
1627				parser->prev_state = parser->state;
1628				parser->state = UCL_STATE_ERROR;
1629				return false;
1630			}
1631			else {
1632				p = chunk->pos;
1633				if (*p == '[') {
1634					parser->state = UCL_STATE_VALUE;
1635					ucl_chunk_skipc (chunk, p);
1636				}
1637				else {
1638					parser->state = UCL_STATE_KEY;
1639					if (*p == '{') {
1640						ucl_chunk_skipc (chunk, p);
1641					}
1642				}
1643			}
1644			break;
1645		case UCL_STATE_KEY:
1646			/* Skip any spaces */
1647			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1648				ucl_chunk_skipc (chunk, p);
1649			}
1650			if (*p == '}') {
1651				/* We have the end of an object */
1652				parser->state = UCL_STATE_AFTER_VALUE;
1653				continue;
1654			}
1655			if (parser->stack == NULL) {
1656				/* No objects are on stack, but we want to parse a key */
1657				ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1658						"expects a key", &parser->err);
1659				parser->prev_state = parser->state;
1660				parser->state = UCL_STATE_ERROR;
1661				return false;
1662			}
1663			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1664				parser->prev_state = parser->state;
1665				parser->state = UCL_STATE_ERROR;
1666				return false;
1667			}
1668			if (end_of_object) {
1669				p = chunk->pos;
1670				parser->state = UCL_STATE_AFTER_VALUE;
1671				continue;
1672			}
1673			else if (parser->state != UCL_STATE_MACRO_NAME) {
1674				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1675					/* Parse more keys and nest objects accordingly */
1676					obj = ucl_add_parser_stack (parser->cur_obj, parser, false, parser->stack->level + 1);
1677				}
1678				else {
1679					parser->state = UCL_STATE_VALUE;
1680				}
1681			}
1682			else {
1683				c = chunk->pos;
1684			}
1685			p = chunk->pos;
1686			break;
1687		case UCL_STATE_VALUE:
1688			/* We need to check what we do have */
1689			if (!ucl_parse_value (parser, chunk)) {
1690				parser->prev_state = parser->state;
1691				parser->state = UCL_STATE_ERROR;
1692				return false;
1693			}
1694			/* State is set in ucl_parse_value call */
1695			p = chunk->pos;
1696			break;
1697		case UCL_STATE_AFTER_VALUE:
1698			if (!ucl_parse_after_value (parser, chunk)) {
1699				parser->prev_state = parser->state;
1700				parser->state = UCL_STATE_ERROR;
1701				return false;
1702			}
1703			if (parser->stack != NULL) {
1704				if (parser->stack->obj->type == UCL_OBJECT) {
1705					parser->state = UCL_STATE_KEY;
1706				}
1707				else {
1708					/* Array */
1709					parser->state = UCL_STATE_VALUE;
1710				}
1711			}
1712			else {
1713				/* Skip everything at the end */
1714				return true;
1715			}
1716			p = chunk->pos;
1717			break;
1718		case UCL_STATE_MACRO_NAME:
1719			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1720				ucl_chunk_skipc (chunk, p);
1721			}
1722			else if (p - c > 0) {
1723				/* We got macro name */
1724				macro_len = (size_t)(p - c);
1725				HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1726				if (macro == NULL) {
1727					ucl_create_err (&parser->err, "error on line %d at column %d: "
1728							"unknown macro: '%.*s', character: '%c'",
1729								chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1730					parser->state = UCL_STATE_ERROR;
1731					return false;
1732				}
1733				/* Now we need to skip all spaces */
1734				while (p < chunk->end) {
1735					if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1736						if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1737							/* Skip comment */
1738							if (!ucl_skip_comments (parser)) {
1739								return false;
1740							}
1741							p = chunk->pos;
1742						}
1743						break;
1744					}
1745					ucl_chunk_skipc (chunk, p);
1746				}
1747				parser->state = UCL_STATE_MACRO;
1748			}
1749			break;
1750		case UCL_STATE_MACRO:
1751			if (!ucl_parse_macro_value (parser, chunk, macro,
1752					&macro_start, &macro_len)) {
1753				parser->prev_state = parser->state;
1754				parser->state = UCL_STATE_ERROR;
1755				return false;
1756			}
1757			macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1758			parser->state = parser->prev_state;
1759			if (macro_escaped == NULL) {
1760				if (!macro->handler (macro_start, macro_len, macro->ud)) {
1761					return false;
1762				}
1763			}
1764			else {
1765				if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1766					UCL_FREE (macro_len + 1, macro_escaped);
1767					return false;
1768				}
1769				UCL_FREE (macro_len + 1, macro_escaped);
1770			}
1771			p = chunk->pos;
1772			break;
1773		default:
1774			/* TODO: add all states */
1775			ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1776			parser->state = UCL_STATE_ERROR;
1777			return false;
1778		}
1779	}
1780
1781	return true;
1782}
1783
1784struct ucl_parser*
1785ucl_parser_new (int flags)
1786{
1787	struct ucl_parser *new;
1788
1789	new = UCL_ALLOC (sizeof (struct ucl_parser));
1790	memset (new, 0, sizeof (struct ucl_parser));
1791
1792	ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1793	ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1794	ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1795
1796	new->flags = flags;
1797
1798	/* Initial assumption about filevars */
1799	ucl_parser_set_filevars (new, NULL, false);
1800
1801	return new;
1802}
1803
1804
1805void
1806ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1807		ucl_macro_handler handler, void* ud)
1808{
1809	struct ucl_macro *new;
1810
1811	new = UCL_ALLOC (sizeof (struct ucl_macro));
1812	memset (new, 0, sizeof (struct ucl_macro));
1813	new->handler = handler;
1814	new->name = strdup (macro);
1815	new->ud = ud;
1816	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1817}
1818
1819void
1820ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1821		const char *value)
1822{
1823	struct ucl_variable *new = NULL, *cur;
1824
1825	if (var == NULL) {
1826		return;
1827	}
1828
1829	/* Find whether a variable already exists */
1830	LL_FOREACH (parser->variables, cur) {
1831		if (strcmp (cur->var, var) == 0) {
1832			new = cur;
1833			break;
1834		}
1835	}
1836
1837	if (value == NULL) {
1838
1839		if (new != NULL) {
1840			/* Remove variable */
1841			LL_DELETE (parser->variables, new);
1842			free (new->var);
1843			free (new->value);
1844			UCL_FREE (sizeof (struct ucl_variable), new);
1845		}
1846		else {
1847			/* Do nothing */
1848			return;
1849		}
1850	}
1851	else {
1852		if (new == NULL) {
1853			new = UCL_ALLOC (sizeof (struct ucl_variable));
1854			memset (new, 0, sizeof (struct ucl_variable));
1855			new->var = strdup (var);
1856			new->var_len = strlen (var);
1857			new->value = strdup (value);
1858			new->value_len = strlen (value);
1859
1860			LL_PREPEND (parser->variables, new);
1861		}
1862		else {
1863			free (new->value);
1864			new->value = strdup (value);
1865			new->value_len = strlen (value);
1866		}
1867	}
1868}
1869
1870bool
1871ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1872		size_t len)
1873{
1874	struct ucl_chunk *chunk;
1875
1876	if (parser->state != UCL_STATE_ERROR) {
1877		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1878		chunk->begin = data;
1879		chunk->remain = len;
1880		chunk->pos = chunk->begin;
1881		chunk->end = chunk->begin + len;
1882		chunk->line = 1;
1883		chunk->column = 0;
1884		LL_PREPEND (parser->chunks, chunk);
1885		parser->recursion ++;
1886		if (parser->recursion > UCL_MAX_RECURSION) {
1887			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1888					parser->recursion);
1889			return false;
1890		}
1891		return ucl_state_machine (parser);
1892	}
1893
1894	ucl_create_err (&parser->err, "a parser is in an invalid state");
1895
1896	return false;
1897}
1898