ucl_parser.c revision 268831
1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *       * Redistributions of source code must retain the above copyright
7 *         notice, this list of conditions and the following disclaimer.
8 *       * Redistributions in binary form must reproduce the above copyright
9 *         notice, this list of conditions and the following disclaimer in the
10 *         documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#include "ucl.h"
25#include "ucl_internal.h"
26#include "ucl_chartable.h"
27
28/**
29 * @file rcl_parser.c
30 * The implementation of rcl parser
31 */
32
33struct ucl_parser_saved_state {
34	unsigned int line;
35	unsigned int column;
36	size_t remain;
37	const unsigned char *pos;
38};
39
40/**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47#define ucl_chunk_skipc(chunk, p)    do{					\
48    if (*(p) == '\n') {										\
49        (chunk)->line ++;									\
50        (chunk)->column = 0;								\
51    }														\
52    else (chunk)->column ++;								\
53    (p++);													\
54    (chunk)->pos ++;										\
55    (chunk)->remain --;										\
56    } while (0)
57
58static inline void
59ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
60{
61	if (chunk->pos < chunk->end) {
62		if (isgraph (*chunk->pos)) {
63			ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
64					chunk->line, chunk->column, str, *chunk->pos);
65		}
66		else {
67			ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
68					chunk->line, chunk->column, str, (int)*chunk->pos);
69		}
70	}
71	else {
72		ucl_create_err (err, "error at the end of chunk: %s", str);
73	}
74}
75
76/**
77 * Skip all comments from the current pos resolving nested and multiline comments
78 * @param parser
79 * @return
80 */
81static bool
82ucl_skip_comments (struct ucl_parser *parser)
83{
84	struct ucl_chunk *chunk = parser->chunks;
85	const unsigned char *p;
86	int comments_nested = 0;
87
88	p = chunk->pos;
89
90start:
91	if (*p == '#') {
92		if (parser->state != UCL_STATE_SCOMMENT &&
93				parser->state != UCL_STATE_MCOMMENT) {
94			while (p < chunk->end) {
95				if (*p == '\n') {
96					ucl_chunk_skipc (chunk, p);
97					goto start;
98				}
99				ucl_chunk_skipc (chunk, p);
100			}
101		}
102	}
103	else if (*p == '/' && chunk->remain >= 2) {
104		if (p[1] == '*') {
105			ucl_chunk_skipc (chunk, p);
106			comments_nested ++;
107			ucl_chunk_skipc (chunk, p);
108
109			while (p < chunk->end) {
110				if (*p == '*') {
111					ucl_chunk_skipc (chunk, p);
112					if (*p == '/') {
113						comments_nested --;
114						if (comments_nested == 0) {
115							ucl_chunk_skipc (chunk, p);
116							goto start;
117						}
118					}
119					ucl_chunk_skipc (chunk, p);
120				}
121				else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
122					comments_nested ++;
123					ucl_chunk_skipc (chunk, p);
124					ucl_chunk_skipc (chunk, p);
125					continue;
126				}
127				ucl_chunk_skipc (chunk, p);
128			}
129			if (comments_nested != 0) {
130				ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
131				return false;
132			}
133		}
134	}
135
136	return true;
137}
138
139/**
140 * Return multiplier for a character
141 * @param c multiplier character
142 * @param is_bytes if true use 1024 multiplier
143 * @return multiplier
144 */
145static inline unsigned long
146ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
147	const struct {
148		char c;
149		long mult_normal;
150		long mult_bytes;
151	} multipliers[] = {
152			{'m', 1000 * 1000, 1024 * 1024},
153			{'k', 1000, 1024},
154			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
155	};
156	int i;
157
158	for (i = 0; i < 3; i ++) {
159		if (tolower (c) == multipliers[i].c) {
160			if (is_bytes) {
161				return multipliers[i].mult_bytes;
162			}
163			return multipliers[i].mult_normal;
164		}
165	}
166
167	return 1;
168}
169
170
171/**
172 * Return multiplier for time scaling
173 * @param c
174 * @return
175 */
176static inline double
177ucl_lex_time_multiplier (const unsigned char c) {
178	const struct {
179		char c;
180		double mult;
181	} multipliers[] = {
182			{'m', 60},
183			{'h', 60 * 60},
184			{'d', 60 * 60 * 24},
185			{'w', 60 * 60 * 24 * 7},
186			{'y', 60 * 60 * 24 * 7 * 365}
187	};
188	int i;
189
190	for (i = 0; i < 5; i ++) {
191		if (tolower (c) == multipliers[i].c) {
192			return multipliers[i].mult;
193		}
194	}
195
196	return 1;
197}
198
199/**
200 * Return true if a character is a end of an atom
201 * @param c
202 * @return
203 */
204static inline bool
205ucl_lex_is_atom_end (const unsigned char c)
206{
207	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
208}
209
210static inline bool
211ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
212{
213	if (c1 == '/') {
214		if (c2 == '*') {
215			return true;
216		}
217	}
218	else if (c1 == '#') {
219		return true;
220	}
221	return false;
222}
223
224/**
225 * Check variable found
226 * @param parser
227 * @param ptr
228 * @param remain
229 * @param out_len
230 * @param strict
231 * @param found
232 * @return
233 */
234static inline const char *
235ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
236		size_t *out_len, bool strict, bool *found)
237{
238	struct ucl_variable *var;
239	unsigned char *dst;
240	size_t dstlen;
241	bool need_free = false;
242
243	LL_FOREACH (parser->variables, var) {
244		if (strict) {
245			if (remain == var->var_len) {
246				if (memcmp (ptr, var->var, var->var_len) == 0) {
247					*out_len += var->value_len;
248					*found = true;
249					return (ptr + var->var_len);
250				}
251			}
252		}
253		else {
254			if (remain >= var->var_len) {
255				if (memcmp (ptr, var->var, var->var_len) == 0) {
256					*out_len += var->value_len;
257					*found = true;
258					return (ptr + var->var_len);
259				}
260			}
261		}
262	}
263
264	/* XXX: can only handle ${VAR} */
265	if (!(*found) && parser->var_handler != NULL && strict) {
266		/* Call generic handler */
267		if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
268				parser->var_data)) {
269			*found = true;
270			if (need_free) {
271				free (dst);
272			}
273			return (ptr + remain);
274		}
275	}
276
277	return ptr;
278}
279
280/**
281 * Check for a variable in a given string
282 * @param parser
283 * @param ptr
284 * @param remain
285 * @param out_len
286 * @param vars_found
287 * @return
288 */
289static const char *
290ucl_check_variable (struct ucl_parser *parser, const char *ptr,
291		size_t remain, size_t *out_len, bool *vars_found)
292{
293	const char *p, *end, *ret = ptr;
294	bool found = false;
295
296	if (*ptr == '{') {
297		/* We need to match the variable enclosed in braces */
298		p = ptr + 1;
299		end = ptr + remain;
300		while (p < end) {
301			if (*p == '}') {
302				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
303						out_len, true, &found);
304				if (found) {
305					/* {} must be excluded actually */
306					ret ++;
307					if (!*vars_found) {
308						*vars_found = true;
309					}
310				}
311				else {
312					*out_len += 2;
313				}
314				break;
315			}
316			p ++;
317		}
318	}
319	else if (*ptr != '$') {
320		/* Not count escaped dollar sign */
321		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
322		if (found && !*vars_found) {
323			*vars_found = true;
324		}
325		if (!found) {
326			(*out_len) ++;
327		}
328	}
329	else {
330		ret ++;
331		(*out_len) ++;
332	}
333
334	return ret;
335}
336
337/**
338 * Expand a single variable
339 * @param parser
340 * @param ptr
341 * @param remain
342 * @param dest
343 * @return
344 */
345static const char *
346ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
347		size_t remain, unsigned char **dest)
348{
349	unsigned char *d = *dest, *dst;
350	const char *p = ptr + 1, *ret;
351	struct ucl_variable *var;
352	size_t dstlen;
353	bool need_free = false;
354	bool found = false;
355	bool strict = false;
356
357	ret = ptr + 1;
358	remain --;
359
360	if (*p == '$') {
361		*d++ = *p++;
362		*dest = d;
363		return p;
364	}
365	else if (*p == '{') {
366		p ++;
367		strict = true;
368		ret += 2;
369		remain -= 2;
370	}
371
372	LL_FOREACH (parser->variables, var) {
373		if (remain >= var->var_len) {
374			if (memcmp (p, var->var, var->var_len) == 0) {
375				memcpy (d, var->value, var->value_len);
376				ret += var->var_len;
377				d += var->value_len;
378				found = true;
379				break;
380			}
381		}
382	}
383	if (!found) {
384		if (strict && parser->var_handler != NULL) {
385			if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
386							parser->var_data)) {
387				memcpy (d, dst, dstlen);
388				ret += dstlen;
389				d += remain;
390				found = true;
391			}
392		}
393
394		/* Leave variable as is */
395		if (!found) {
396			if (strict) {
397				/* Copy '${' */
398				memcpy (d, ptr, 2);
399				d += 2;
400				ret --;
401			}
402			else {
403				memcpy (d, ptr, 1);
404				d ++;
405			}
406		}
407	}
408
409	*dest = d;
410	return ret;
411}
412
413/**
414 * Expand variables in string
415 * @param parser
416 * @param dst
417 * @param src
418 * @param in_len
419 * @return
420 */
421static ssize_t
422ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
423		const char *src, size_t in_len)
424{
425	const char *p, *end = src + in_len;
426	unsigned char *d;
427	size_t out_len = 0;
428	bool vars_found = false;
429
430	p = src;
431	while (p != end) {
432		if (*p == '$') {
433			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
434		}
435		else {
436			p ++;
437			out_len ++;
438		}
439	}
440
441	if (!vars_found) {
442		/* Trivial case */
443		*dst = NULL;
444		return in_len;
445	}
446
447	*dst = UCL_ALLOC (out_len + 1);
448	if (*dst == NULL) {
449		return in_len;
450	}
451
452	d = *dst;
453	p = src;
454	while (p != end) {
455		if (*p == '$') {
456			p = ucl_expand_single_variable (parser, p, end - p, &d);
457		}
458		else {
459			*d++ = *p++;
460		}
461	}
462
463	*d = '\0';
464
465	return out_len;
466}
467
468/**
469 * Store or copy pointer to the trash stack
470 * @param parser parser object
471 * @param src src string
472 * @param dst destination buffer (trash stack pointer)
473 * @param dst_const const destination pointer (e.g. value of object)
474 * @param in_len input length
475 * @param need_unescape need to unescape source (and copy it)
476 * @param need_lowercase need to lowercase value (and copy)
477 * @param need_expand need to expand variables (and copy as well)
478 * @return output length (excluding \0 symbol)
479 */
480static inline ssize_t
481ucl_copy_or_store_ptr (struct ucl_parser *parser,
482		const unsigned char *src, unsigned char **dst,
483		const char **dst_const, size_t in_len,
484		bool need_unescape, bool need_lowercase, bool need_expand)
485{
486	ssize_t ret = -1, tret;
487	unsigned char *tmp;
488
489	if (need_unescape || need_lowercase ||
490			(need_expand && parser->variables != NULL) ||
491			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
492		/* Copy string */
493		*dst = UCL_ALLOC (in_len + 1);
494		if (*dst == NULL) {
495			ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
496			return false;
497		}
498		if (need_lowercase) {
499			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
500		}
501		else {
502			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
503		}
504
505		if (need_unescape) {
506			ret = ucl_unescape_json_string (*dst, ret);
507		}
508		if (need_expand) {
509			tmp = *dst;
510			tret = ret;
511			ret = ucl_expand_variable (parser, dst, tmp, ret);
512			if (*dst == NULL) {
513				/* Nothing to expand */
514				*dst = tmp;
515				ret = tret;
516			}
517		}
518		*dst_const = *dst;
519	}
520	else {
521		*dst_const = src;
522		ret = in_len;
523	}
524
525	return ret;
526}
527
528/**
529 * Create and append an object at the specified level
530 * @param parser
531 * @param is_array
532 * @param level
533 * @return
534 */
535static inline ucl_object_t *
536ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
537{
538	struct ucl_stack *st;
539
540	if (!is_array) {
541		if (obj == NULL) {
542			obj = ucl_object_typed_new (UCL_OBJECT);
543		}
544		else {
545			obj->type = UCL_OBJECT;
546		}
547		obj->value.ov = ucl_hash_create ();
548		parser->state = UCL_STATE_KEY;
549	}
550	else {
551		if (obj == NULL) {
552			obj = ucl_object_typed_new (UCL_ARRAY);
553		}
554		else {
555			obj->type = UCL_ARRAY;
556		}
557		parser->state = UCL_STATE_VALUE;
558	}
559
560	st = UCL_ALLOC (sizeof (struct ucl_stack));
561	if (st == NULL) {
562		ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
563		return NULL;
564	}
565	st->obj = obj;
566	st->level = level;
567	LL_PREPEND (parser->stack, st);
568	parser->cur_obj = obj;
569
570	return obj;
571}
572
573int
574ucl_maybe_parse_number (ucl_object_t *obj,
575		const char *start, const char *end, const char **pos,
576		bool allow_double, bool number_bytes, bool allow_time)
577{
578	const char *p = start, *c = start;
579	char *endptr;
580	bool got_dot = false, got_exp = false, need_double = false,
581			is_time = false, valid_start = false, is_hex = false,
582			is_neg = false;
583	double dv = 0;
584	int64_t lv = 0;
585
586	if (*p == '-') {
587		is_neg = true;
588		c ++;
589		p ++;
590	}
591	while (p < end) {
592		if (is_hex && isxdigit (*p)) {
593			p ++;
594		}
595		else if (isdigit (*p)) {
596			valid_start = true;
597			p ++;
598		}
599		else if (!is_hex && (*p == 'x' || *p == 'X')) {
600			is_hex = true;
601			allow_double = false;
602			c = p + 1;
603		}
604		else if (allow_double) {
605			if (p == c) {
606				/* Empty digits sequence, not a number */
607				*pos = start;
608				return EINVAL;
609			}
610			else if (*p == '.') {
611				if (got_dot) {
612					/* Double dots, not a number */
613					*pos = start;
614					return EINVAL;
615				}
616				else {
617					got_dot = true;
618					need_double = true;
619					p ++;
620				}
621			}
622			else if (*p == 'e' || *p == 'E') {
623				if (got_exp) {
624					/* Double exp, not a number */
625					*pos = start;
626					return EINVAL;
627				}
628				else {
629					got_exp = true;
630					need_double = true;
631					p ++;
632					if (p >= end) {
633						*pos = start;
634						return EINVAL;
635					}
636					if (!isdigit (*p) && *p != '+' && *p != '-') {
637						/* Wrong exponent sign */
638						*pos = start;
639						return EINVAL;
640					}
641					else {
642						p ++;
643					}
644				}
645			}
646			else {
647				/* Got the end of the number, need to check */
648				break;
649			}
650		}
651		else {
652			break;
653		}
654	}
655
656	if (!valid_start) {
657		*pos = start;
658		return EINVAL;
659	}
660
661	errno = 0;
662	if (need_double) {
663		dv = strtod (c, &endptr);
664	}
665	else {
666		if (is_hex) {
667			lv = strtoimax (c, &endptr, 16);
668		}
669		else {
670			lv = strtoimax (c, &endptr, 10);
671		}
672	}
673	if (errno == ERANGE) {
674		*pos = start;
675		return ERANGE;
676	}
677
678	/* Now check endptr */
679	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
680			ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
681		p = endptr;
682		goto set_obj;
683	}
684
685	if (endptr < end && endptr != start) {
686		p = endptr;
687		switch (*p) {
688		case 'm':
689		case 'M':
690		case 'g':
691		case 'G':
692		case 'k':
693		case 'K':
694			if (end - p >= 2) {
695				if (p[1] == 's' || p[1] == 'S') {
696					/* Milliseconds */
697					if (!need_double) {
698						need_double = true;
699						dv = lv;
700					}
701					is_time = true;
702					if (p[0] == 'm' || p[0] == 'M') {
703						dv /= 1000.;
704					}
705					else {
706						dv *= ucl_lex_num_multiplier (*p, false);
707					}
708					p += 2;
709					goto set_obj;
710				}
711				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
712					/* Bytes */
713					if (need_double) {
714						need_double = false;
715						lv = dv;
716					}
717					lv *= ucl_lex_num_multiplier (*p, true);
718					p += 2;
719					goto set_obj;
720				}
721				else if (ucl_lex_is_atom_end (p[1])) {
722					if (need_double) {
723						dv *= ucl_lex_num_multiplier (*p, false);
724					}
725					else {
726						lv *= ucl_lex_num_multiplier (*p, number_bytes);
727					}
728					p ++;
729					goto set_obj;
730				}
731				else if (allow_time && end - p >= 3) {
732					if (tolower (p[0]) == 'm' &&
733							tolower (p[1]) == 'i' &&
734							tolower (p[2]) == 'n') {
735						/* Minutes */
736						if (!need_double) {
737							need_double = true;
738							dv = lv;
739						}
740						is_time = true;
741						dv *= 60.;
742						p += 3;
743						goto set_obj;
744					}
745				}
746			}
747			else {
748				if (need_double) {
749					dv *= ucl_lex_num_multiplier (*p, false);
750				}
751				else {
752					lv *= ucl_lex_num_multiplier (*p, number_bytes);
753				}
754				p ++;
755				goto set_obj;
756			}
757			break;
758		case 'S':
759		case 's':
760			if (allow_time &&
761					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
762				if (!need_double) {
763					need_double = true;
764					dv = lv;
765				}
766				p ++;
767				is_time = true;
768				goto set_obj;
769			}
770			break;
771		case 'h':
772		case 'H':
773		case 'd':
774		case 'D':
775		case 'w':
776		case 'W':
777		case 'Y':
778		case 'y':
779			if (allow_time &&
780					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
781				if (!need_double) {
782					need_double = true;
783					dv = lv;
784				}
785				is_time = true;
786				dv *= ucl_lex_time_multiplier (*p);
787				p ++;
788				goto set_obj;
789			}
790			break;
791		}
792	}
793
794	*pos = c;
795	return EINVAL;
796
797	set_obj:
798	if (allow_double && (need_double || is_time)) {
799		if (!is_time) {
800			obj->type = UCL_FLOAT;
801		}
802		else {
803			obj->type = UCL_TIME;
804		}
805		obj->value.dv = is_neg ? (-dv) : dv;
806	}
807	else {
808		obj->type = UCL_INT;
809		obj->value.iv = is_neg ? (-lv) : lv;
810	}
811	*pos = p;
812	return 0;
813}
814
815/**
816 * Parse possible number
817 * @param parser
818 * @param chunk
819 * @return true if a number has been parsed
820 */
821static bool
822ucl_lex_number (struct ucl_parser *parser,
823		struct ucl_chunk *chunk, ucl_object_t *obj)
824{
825	const unsigned char *pos;
826	int ret;
827
828	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
829			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
830
831	if (ret == 0) {
832		chunk->remain -= pos - chunk->pos;
833		chunk->column += pos - chunk->pos;
834		chunk->pos = pos;
835		return true;
836	}
837	else if (ret == ERANGE) {
838		ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
839	}
840
841	return false;
842}
843
844/**
845 * Parse quoted string with possible escapes
846 * @param parser
847 * @param chunk
848 * @return true if a string has been parsed
849 */
850static bool
851ucl_lex_json_string (struct ucl_parser *parser,
852		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
853{
854	const unsigned char *p = chunk->pos;
855	unsigned char c;
856	int i;
857
858	while (p < chunk->end) {
859		c = *p;
860		if (c < 0x1F) {
861			/* Unmasked control character */
862			if (c == '\n') {
863				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
864			}
865			else {
866				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
867			}
868			return false;
869		}
870		else if (c == '\\') {
871			ucl_chunk_skipc (chunk, p);
872			c = *p;
873			if (p >= chunk->end) {
874				ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
875				return false;
876			}
877			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
878				if (c == 'u') {
879					ucl_chunk_skipc (chunk, p);
880					for (i = 0; i < 4 && p < chunk->end; i ++) {
881						if (!isxdigit (*p)) {
882							ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
883							return false;
884						}
885						ucl_chunk_skipc (chunk, p);
886					}
887					if (p >= chunk->end) {
888						ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
889						return false;
890					}
891				}
892				else {
893					ucl_chunk_skipc (chunk, p);
894				}
895			}
896			*need_unescape = true;
897			*ucl_escape = true;
898			continue;
899		}
900		else if (c == '"') {
901			ucl_chunk_skipc (chunk, p);
902			return true;
903		}
904		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
905			*ucl_escape = true;
906		}
907		else if (c == '$') {
908			*var_expand = true;
909		}
910		ucl_chunk_skipc (chunk, p);
911	}
912
913	ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
914	return false;
915}
916
917/**
918 * Parse a key in an object
919 * @param parser
920 * @param chunk
921 * @return true if a key has been parsed
922 */
923static bool
924ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
925{
926	const unsigned char *p, *c = NULL, *end, *t;
927	const char *key = NULL;
928	bool got_quote = false, got_eq = false, got_semicolon = false,
929			need_unescape = false, ucl_escape = false, var_expand = false,
930			got_content = false, got_sep = false;
931	ucl_object_t *nobj, *tobj;
932	ucl_hash_t *container;
933	ssize_t keylen;
934
935	p = chunk->pos;
936
937	if (*p == '.') {
938		/* It is macro actually */
939		ucl_chunk_skipc (chunk, p);
940		parser->prev_state = parser->state;
941		parser->state = UCL_STATE_MACRO_NAME;
942		return true;
943	}
944	while (p < chunk->end) {
945		/*
946		 * A key must start with alpha, number, '/' or '_' and end with space character
947		 */
948		if (c == NULL) {
949			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
950				if (!ucl_skip_comments (parser)) {
951					return false;
952				}
953				p = chunk->pos;
954			}
955			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
956				ucl_chunk_skipc (chunk, p);
957			}
958			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
959				/* The first symbol */
960				c = p;
961				ucl_chunk_skipc (chunk, p);
962				got_content = true;
963			}
964			else if (*p == '"') {
965				/* JSON style key */
966				c = p + 1;
967				got_quote = true;
968				got_content = true;
969				ucl_chunk_skipc (chunk, p);
970			}
971			else if (*p == '}') {
972				/* We have actually end of an object */
973				*end_of_object = true;
974				return true;
975			}
976			else if (*p == '.') {
977				ucl_chunk_skipc (chunk, p);
978				parser->prev_state = parser->state;
979				parser->state = UCL_STATE_MACRO_NAME;
980				return true;
981			}
982			else {
983				/* Invalid identifier */
984				ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
985				return false;
986			}
987		}
988		else {
989			/* Parse the body of a key */
990			if (!got_quote) {
991				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
992					got_content = true;
993					ucl_chunk_skipc (chunk, p);
994				}
995				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
996					end = p;
997					break;
998				}
999				else {
1000					ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
1001					return false;
1002				}
1003			}
1004			else {
1005				/* We need to parse json like quoted string */
1006				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1007					return false;
1008				}
1009				/* Always escape keys obtained via json */
1010				end = chunk->pos - 1;
1011				p = chunk->pos;
1012				break;
1013			}
1014		}
1015	}
1016
1017	if (p >= chunk->end && got_content) {
1018		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1019		return false;
1020	}
1021	else if (!got_content) {
1022		return true;
1023	}
1024	*end_of_object = false;
1025	/* We are now at the end of the key, need to parse the rest */
1026	while (p < chunk->end) {
1027		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1028			ucl_chunk_skipc (chunk, p);
1029		}
1030		else if (*p == '=') {
1031			if (!got_eq && !got_semicolon) {
1032				ucl_chunk_skipc (chunk, p);
1033				got_eq = true;
1034			}
1035			else {
1036				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1037				return false;
1038			}
1039		}
1040		else if (*p == ':') {
1041			if (!got_eq && !got_semicolon) {
1042				ucl_chunk_skipc (chunk, p);
1043				got_semicolon = true;
1044			}
1045			else {
1046				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1047				return false;
1048			}
1049		}
1050		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1051			/* Check for comment */
1052			if (!ucl_skip_comments (parser)) {
1053				return false;
1054			}
1055			p = chunk->pos;
1056		}
1057		else {
1058			/* Start value */
1059			break;
1060		}
1061	}
1062
1063	if (p >= chunk->end && got_content) {
1064		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1065		return false;
1066	}
1067
1068	got_sep = got_semicolon || got_eq;
1069
1070	if (!got_sep) {
1071		/*
1072		 * Maybe we have more keys nested, so search for termination character.
1073		 * Possible choices:
1074		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1075		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1076		 * 3) key1 value[;,\n] <- we treat that as linear object
1077		 */
1078		t = p;
1079		*next_key = false;
1080		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1081			t ++;
1082		}
1083		/* Check first non-space character after a key */
1084		if (*t != '{' && *t != '[') {
1085			while (t < chunk->end) {
1086				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1087					break;
1088				}
1089				else if (*t == '{' || *t == '[') {
1090					*next_key = true;
1091					break;
1092				}
1093				t ++;
1094			}
1095		}
1096	}
1097
1098	/* Create a new object */
1099	nobj = ucl_object_new ();
1100	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1101			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1102	if (keylen == -1) {
1103		ucl_object_unref (nobj);
1104		return false;
1105	}
1106	else if (keylen == 0) {
1107		ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1108		ucl_object_unref (nobj);
1109		return false;
1110	}
1111
1112	container = parser->stack->obj->value.ov;
1113	nobj->key = key;
1114	nobj->keylen = keylen;
1115	tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1116	if (tobj == NULL) {
1117		container = ucl_hash_insert_object (container, nobj);
1118		nobj->prev = nobj;
1119		nobj->next = NULL;
1120		parser->stack->obj->len ++;
1121	}
1122	else {
1123		DL_APPEND (tobj, nobj);
1124	}
1125
1126	if (ucl_escape) {
1127		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1128	}
1129	parser->stack->obj->value.ov = container;
1130
1131	parser->cur_obj = nobj;
1132
1133	return true;
1134}
1135
1136/**
1137 * Parse a cl string
1138 * @param parser
1139 * @param chunk
1140 * @return true if a key has been parsed
1141 */
1142static bool
1143ucl_parse_string_value (struct ucl_parser *parser,
1144		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1145{
1146	const unsigned char *p;
1147	enum {
1148		UCL_BRACE_ROUND = 0,
1149		UCL_BRACE_SQUARE,
1150		UCL_BRACE_FIGURE
1151	};
1152	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1153
1154	p = chunk->pos;
1155
1156	while (p < chunk->end) {
1157
1158		/* Skip pairs of figure braces */
1159		if (*p == '{') {
1160			braces[UCL_BRACE_FIGURE][0] ++;
1161		}
1162		else if (*p == '}') {
1163			braces[UCL_BRACE_FIGURE][1] ++;
1164			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1165				/* This is not a termination symbol, continue */
1166				ucl_chunk_skipc (chunk, p);
1167				continue;
1168			}
1169		}
1170		/* Skip pairs of square braces */
1171		else if (*p == '[') {
1172			braces[UCL_BRACE_SQUARE][0] ++;
1173		}
1174		else if (*p == ']') {
1175			braces[UCL_BRACE_SQUARE][1] ++;
1176			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1177				/* This is not a termination symbol, continue */
1178				ucl_chunk_skipc (chunk, p);
1179				continue;
1180			}
1181		}
1182		else if (*p == '$') {
1183			*var_expand = true;
1184		}
1185		else if (*p == '\\') {
1186			*need_unescape = true;
1187			ucl_chunk_skipc (chunk, p);
1188			if (p < chunk->end) {
1189				ucl_chunk_skipc (chunk, p);
1190			}
1191			continue;
1192		}
1193
1194		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1195			break;
1196		}
1197		ucl_chunk_skipc (chunk, p);
1198	}
1199
1200	if (p >= chunk->end) {
1201		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1202		return false;
1203	}
1204
1205	return true;
1206}
1207
1208/**
1209 * Parse multiline string ending with \n{term}\n
1210 * @param parser
1211 * @param chunk
1212 * @param term
1213 * @param term_len
1214 * @return size of multiline string or 0 in case of error
1215 */
1216static int
1217ucl_parse_multiline_string (struct ucl_parser *parser,
1218		struct ucl_chunk *chunk, const unsigned char *term,
1219		int term_len, unsigned char const **beg,
1220		bool *var_expand)
1221{
1222	const unsigned char *p, *c;
1223	bool newline = false;
1224	int len = 0;
1225
1226	p = chunk->pos;
1227
1228	c = p;
1229
1230	while (p < chunk->end) {
1231		if (newline) {
1232			if (chunk->end - p < term_len) {
1233				return 0;
1234			}
1235			else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1236				len = p - c;
1237				chunk->remain -= term_len;
1238				chunk->pos = p + term_len;
1239				chunk->column = term_len;
1240				*beg = c;
1241				break;
1242			}
1243		}
1244		if (*p == '\n') {
1245			newline = true;
1246		}
1247		else {
1248			if (*p == '$') {
1249				*var_expand = true;
1250			}
1251			newline = false;
1252		}
1253		ucl_chunk_skipc (chunk, p);
1254	}
1255
1256	return len;
1257}
1258
1259static ucl_object_t*
1260ucl_get_value_object (struct ucl_parser *parser)
1261{
1262	ucl_object_t *t, *obj = NULL;
1263
1264	if (parser->stack->obj->type == UCL_ARRAY) {
1265		/* Object must be allocated */
1266		obj = ucl_object_new ();
1267		t = parser->stack->obj->value.av;
1268		DL_APPEND (t, obj);
1269		parser->cur_obj = obj;
1270		parser->stack->obj->value.av = t;
1271		parser->stack->obj->len ++;
1272	}
1273	else {
1274		/* Object has been already allocated */
1275		obj = parser->cur_obj;
1276	}
1277
1278	return obj;
1279}
1280
1281/**
1282 * Handle value data
1283 * @param parser
1284 * @param chunk
1285 * @return
1286 */
1287static bool
1288ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1289{
1290	const unsigned char *p, *c;
1291	ucl_object_t *obj = NULL;
1292	unsigned int stripped_spaces;
1293	int str_len;
1294	bool need_unescape = false, ucl_escape = false, var_expand = false;
1295
1296	p = chunk->pos;
1297
1298	/* Skip any spaces and comments */
1299	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1300			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1301		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1302			ucl_chunk_skipc (chunk, p);
1303		}
1304		if (!ucl_skip_comments (parser)) {
1305			return false;
1306		}
1307		p = chunk->pos;
1308	}
1309
1310	while (p < chunk->end) {
1311		c = p;
1312		switch (*p) {
1313		case '"':
1314			obj = ucl_get_value_object (parser);
1315			ucl_chunk_skipc (chunk, p);
1316			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1317				return false;
1318			}
1319			str_len = chunk->pos - c - 2;
1320			obj->type = UCL_STRING;
1321			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1322					&obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1323				return false;
1324			}
1325			obj->len = str_len;
1326			parser->state = UCL_STATE_AFTER_VALUE;
1327			p = chunk->pos;
1328			return true;
1329			break;
1330		case '{':
1331			obj = ucl_get_value_object (parser);
1332			/* We have a new object */
1333			obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1334			if (obj == NULL) {
1335				return false;
1336			}
1337
1338			ucl_chunk_skipc (chunk, p);
1339			return true;
1340			break;
1341		case '[':
1342			obj = ucl_get_value_object (parser);
1343			/* We have a new array */
1344			obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1345			if (obj == NULL) {
1346				return false;
1347			}
1348
1349			ucl_chunk_skipc (chunk, p);
1350			return true;
1351			break;
1352		case ']':
1353			/* We have the array ending */
1354			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1355				parser->state = UCL_STATE_AFTER_VALUE;
1356				return true;
1357			}
1358			else {
1359				goto parse_string;
1360			}
1361			break;
1362		case '<':
1363			obj = ucl_get_value_object (parser);
1364			/* We have something like multiline value, which must be <<[A-Z]+\n */
1365			if (chunk->end - p > 3) {
1366				if (memcmp (p, "<<", 2) == 0) {
1367					p += 2;
1368					/* We allow only uppercase characters in multiline definitions */
1369					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1370						p ++;
1371					}
1372					if (*p =='\n') {
1373						/* Set chunk positions and start multiline parsing */
1374						c += 2;
1375						chunk->remain -= p - c;
1376						chunk->pos = p + 1;
1377						chunk->column = 0;
1378						chunk->line ++;
1379						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1380								p - c, &c, &var_expand)) == 0) {
1381							ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1382							return false;
1383						}
1384						obj->type = UCL_STRING;
1385						if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1386							&obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1387							return false;
1388						}
1389						obj->len = str_len;
1390						parser->state = UCL_STATE_AFTER_VALUE;
1391						return true;
1392					}
1393				}
1394			}
1395			/* Fallback to ordinary strings */
1396		default:
1397parse_string:
1398			if (obj == NULL) {
1399				obj = ucl_get_value_object (parser);
1400			}
1401			/* Parse atom */
1402			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1403				if (!ucl_lex_number (parser, chunk, obj)) {
1404					if (parser->state == UCL_STATE_ERROR) {
1405						return false;
1406					}
1407				}
1408				else {
1409					parser->state = UCL_STATE_AFTER_VALUE;
1410					return true;
1411				}
1412				/* Fallback to normal string */
1413			}
1414
1415			if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1416				return false;
1417			}
1418			/* Cut trailing spaces */
1419			stripped_spaces = 0;
1420			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1421					UCL_CHARACTER_WHITESPACE)) {
1422				stripped_spaces ++;
1423			}
1424			str_len = chunk->pos - c - stripped_spaces;
1425			if (str_len <= 0) {
1426				ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1427				return false;
1428			}
1429			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1430				obj->len = 0;
1431				obj->type = UCL_NULL;
1432			}
1433			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1434				obj->type = UCL_STRING;
1435				if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1436						&obj->value.sv, str_len, need_unescape,
1437						false, var_expand)) == -1) {
1438					return false;
1439				}
1440				obj->len = str_len;
1441			}
1442			parser->state = UCL_STATE_AFTER_VALUE;
1443			p = chunk->pos;
1444
1445			return true;
1446			break;
1447		}
1448	}
1449
1450	return true;
1451}
1452
1453/**
1454 * Handle after value data
1455 * @param parser
1456 * @param chunk
1457 * @return
1458 */
1459static bool
1460ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1461{
1462	const unsigned char *p;
1463	bool got_sep = false;
1464	struct ucl_stack *st;
1465
1466	p = chunk->pos;
1467
1468	while (p < chunk->end) {
1469		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1470			/* Skip whitespaces */
1471			ucl_chunk_skipc (chunk, p);
1472		}
1473		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1474			/* Skip comment */
1475			if (!ucl_skip_comments (parser)) {
1476				return false;
1477			}
1478			/* Treat comment as a separator */
1479			got_sep = true;
1480			p = chunk->pos;
1481		}
1482		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1483			if (*p == '}' || *p == ']') {
1484				if (parser->stack == NULL) {
1485					ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1486					return false;
1487				}
1488				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1489						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1490
1491					/* Pop all nested objects from a stack */
1492					st = parser->stack;
1493					parser->stack = st->next;
1494					UCL_FREE (sizeof (struct ucl_stack), st);
1495
1496					while (parser->stack != NULL) {
1497						st = parser->stack;
1498						if (st->next == NULL || st->next->level == st->level) {
1499							break;
1500						}
1501						parser->stack = st->next;
1502						UCL_FREE (sizeof (struct ucl_stack), st);
1503					}
1504				}
1505				else {
1506					ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1507					return false;
1508				}
1509
1510				if (parser->stack == NULL) {
1511					/* Ignore everything after a top object */
1512					return true;
1513				}
1514				else {
1515					ucl_chunk_skipc (chunk, p);
1516				}
1517				got_sep = true;
1518			}
1519			else {
1520				/* Got a separator */
1521				got_sep = true;
1522				ucl_chunk_skipc (chunk, p);
1523			}
1524		}
1525		else {
1526			/* Anything else */
1527			if (!got_sep) {
1528				ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1529				return false;
1530			}
1531			return true;
1532		}
1533	}
1534
1535	return true;
1536}
1537
1538/**
1539 * Handle macro data
1540 * @param parser
1541 * @param chunk
1542 * @return
1543 */
1544static bool
1545ucl_parse_macro_value (struct ucl_parser *parser,
1546		struct ucl_chunk *chunk, struct ucl_macro *macro,
1547		unsigned char const **macro_start, size_t *macro_len)
1548{
1549	const unsigned char *p, *c;
1550	bool need_unescape = false, ucl_escape = false, var_expand = false;
1551
1552	p = chunk->pos;
1553
1554	switch (*p) {
1555	case '"':
1556		/* We have macro value encoded in quotes */
1557		c = p;
1558		ucl_chunk_skipc (chunk, p);
1559		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1560			return false;
1561		}
1562
1563		*macro_start = c + 1;
1564		*macro_len = chunk->pos - c - 2;
1565		p = chunk->pos;
1566		break;
1567	case '{':
1568		/* We got a multiline macro body */
1569		ucl_chunk_skipc (chunk, p);
1570		/* Skip spaces at the beginning */
1571		while (p < chunk->end) {
1572			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1573				ucl_chunk_skipc (chunk, p);
1574			}
1575			else {
1576				break;
1577			}
1578		}
1579		c = p;
1580		while (p < chunk->end) {
1581			if (*p == '}') {
1582				break;
1583			}
1584			ucl_chunk_skipc (chunk, p);
1585		}
1586		*macro_start = c;
1587		*macro_len = p - c;
1588		ucl_chunk_skipc (chunk, p);
1589		break;
1590	default:
1591		/* Macro is not enclosed in quotes or braces */
1592		c = p;
1593		while (p < chunk->end) {
1594			if (ucl_lex_is_atom_end (*p)) {
1595				break;
1596			}
1597			ucl_chunk_skipc (chunk, p);
1598		}
1599		*macro_start = c;
1600		*macro_len = p - c;
1601		break;
1602	}
1603
1604	/* We are at the end of a macro */
1605	/* Skip ';' and space characters and return to previous state */
1606	while (p < chunk->end) {
1607		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1608			break;
1609		}
1610		ucl_chunk_skipc (chunk, p);
1611	}
1612	return true;
1613}
1614
1615/**
1616 * Handle the main states of rcl parser
1617 * @param parser parser structure
1618 * @param data the pointer to the beginning of a chunk
1619 * @param len the length of a chunk
1620 * @return true if chunk has been parsed and false in case of error
1621 */
1622static bool
1623ucl_state_machine (struct ucl_parser *parser)
1624{
1625	ucl_object_t *obj;
1626	struct ucl_chunk *chunk = parser->chunks;
1627	const unsigned char *p, *c = NULL, *macro_start = NULL;
1628	unsigned char *macro_escaped;
1629	size_t macro_len = 0;
1630	struct ucl_macro *macro = NULL;
1631	bool next_key = false, end_of_object = false;
1632
1633	if (parser->top_obj == NULL) {
1634		if (*chunk->pos == '[') {
1635			obj = ucl_add_parser_stack (NULL, parser, true, 0);
1636		}
1637		else {
1638			obj = ucl_add_parser_stack (NULL, parser, false, 0);
1639		}
1640		if (obj == NULL) {
1641			return false;
1642		}
1643		parser->top_obj = obj;
1644		parser->cur_obj = obj;
1645		parser->state = UCL_STATE_INIT;
1646	}
1647
1648	p = chunk->pos;
1649	while (chunk->pos < chunk->end) {
1650		switch (parser->state) {
1651		case UCL_STATE_INIT:
1652			/*
1653			 * At the init state we can either go to the parse array or object
1654			 * if we got [ or { correspondingly or can just treat new data as
1655			 * a key of newly created object
1656			 */
1657			obj = parser->cur_obj;
1658			if (!ucl_skip_comments (parser)) {
1659				parser->prev_state = parser->state;
1660				parser->state = UCL_STATE_ERROR;
1661				return false;
1662			}
1663			else {
1664				p = chunk->pos;
1665				if (*p == '[') {
1666					parser->state = UCL_STATE_VALUE;
1667					ucl_chunk_skipc (chunk, p);
1668				}
1669				else {
1670					parser->state = UCL_STATE_KEY;
1671					if (*p == '{') {
1672						ucl_chunk_skipc (chunk, p);
1673					}
1674				}
1675			}
1676			break;
1677		case UCL_STATE_KEY:
1678			/* Skip any spaces */
1679			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1680				ucl_chunk_skipc (chunk, p);
1681			}
1682			if (*p == '}') {
1683				/* We have the end of an object */
1684				parser->state = UCL_STATE_AFTER_VALUE;
1685				continue;
1686			}
1687			if (parser->stack == NULL) {
1688				/* No objects are on stack, but we want to parse a key */
1689				ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1690						"expects a key", &parser->err);
1691				parser->prev_state = parser->state;
1692				parser->state = UCL_STATE_ERROR;
1693				return false;
1694			}
1695			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1696				parser->prev_state = parser->state;
1697				parser->state = UCL_STATE_ERROR;
1698				return false;
1699			}
1700			if (end_of_object) {
1701				p = chunk->pos;
1702				parser->state = UCL_STATE_AFTER_VALUE;
1703				continue;
1704			}
1705			else if (parser->state != UCL_STATE_MACRO_NAME) {
1706				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1707					/* Parse more keys and nest objects accordingly */
1708					obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1709							parser->stack->level + 1);
1710					if (obj == NULL) {
1711						return false;
1712					}
1713				}
1714				else {
1715					parser->state = UCL_STATE_VALUE;
1716				}
1717			}
1718			else {
1719				c = chunk->pos;
1720			}
1721			p = chunk->pos;
1722			break;
1723		case UCL_STATE_VALUE:
1724			/* We need to check what we do have */
1725			if (!ucl_parse_value (parser, chunk)) {
1726				parser->prev_state = parser->state;
1727				parser->state = UCL_STATE_ERROR;
1728				return false;
1729			}
1730			/* State is set in ucl_parse_value call */
1731			p = chunk->pos;
1732			break;
1733		case UCL_STATE_AFTER_VALUE:
1734			if (!ucl_parse_after_value (parser, chunk)) {
1735				parser->prev_state = parser->state;
1736				parser->state = UCL_STATE_ERROR;
1737				return false;
1738			}
1739			if (parser->stack != NULL) {
1740				if (parser->stack->obj->type == UCL_OBJECT) {
1741					parser->state = UCL_STATE_KEY;
1742				}
1743				else {
1744					/* Array */
1745					parser->state = UCL_STATE_VALUE;
1746				}
1747			}
1748			else {
1749				/* Skip everything at the end */
1750				return true;
1751			}
1752			p = chunk->pos;
1753			break;
1754		case UCL_STATE_MACRO_NAME:
1755			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1756				ucl_chunk_skipc (chunk, p);
1757			}
1758			else if (p - c > 0) {
1759				/* We got macro name */
1760				macro_len = (size_t)(p - c);
1761				HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1762				if (macro == NULL) {
1763					ucl_create_err (&parser->err, "error on line %d at column %d: "
1764							"unknown macro: '%.*s', character: '%c'",
1765								chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1766					parser->state = UCL_STATE_ERROR;
1767					return false;
1768				}
1769				/* Now we need to skip all spaces */
1770				while (p < chunk->end) {
1771					if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1772						if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1773							/* Skip comment */
1774							if (!ucl_skip_comments (parser)) {
1775								return false;
1776							}
1777							p = chunk->pos;
1778						}
1779						break;
1780					}
1781					ucl_chunk_skipc (chunk, p);
1782				}
1783				parser->state = UCL_STATE_MACRO;
1784			}
1785			break;
1786		case UCL_STATE_MACRO:
1787			if (!ucl_parse_macro_value (parser, chunk, macro,
1788					&macro_start, &macro_len)) {
1789				parser->prev_state = parser->state;
1790				parser->state = UCL_STATE_ERROR;
1791				return false;
1792			}
1793			macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1794			parser->state = parser->prev_state;
1795			if (macro_escaped == NULL) {
1796				if (!macro->handler (macro_start, macro_len, macro->ud)) {
1797					return false;
1798				}
1799			}
1800			else {
1801				if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1802					UCL_FREE (macro_len + 1, macro_escaped);
1803					return false;
1804				}
1805				UCL_FREE (macro_len + 1, macro_escaped);
1806			}
1807			p = chunk->pos;
1808			break;
1809		default:
1810			/* TODO: add all states */
1811			ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1812			parser->state = UCL_STATE_ERROR;
1813			return false;
1814		}
1815	}
1816
1817	return true;
1818}
1819
1820struct ucl_parser*
1821ucl_parser_new (int flags)
1822{
1823	struct ucl_parser *new;
1824
1825	new = UCL_ALLOC (sizeof (struct ucl_parser));
1826	if (new == NULL) {
1827		return NULL;
1828	}
1829	memset (new, 0, sizeof (struct ucl_parser));
1830
1831	ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1832	ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1833	ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1834
1835	new->flags = flags;
1836
1837	/* Initial assumption about filevars */
1838	ucl_parser_set_filevars (new, NULL, false);
1839
1840	return new;
1841}
1842
1843
1844void
1845ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1846		ucl_macro_handler handler, void* ud)
1847{
1848	struct ucl_macro *new;
1849
1850	if (macro == NULL || handler == NULL) {
1851		return;
1852	}
1853	new = UCL_ALLOC (sizeof (struct ucl_macro));
1854	if (new == NULL) {
1855		return;
1856	}
1857	memset (new, 0, sizeof (struct ucl_macro));
1858	new->handler = handler;
1859	new->name = strdup (macro);
1860	new->ud = ud;
1861	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1862}
1863
1864void
1865ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1866		const char *value)
1867{
1868	struct ucl_variable *new = NULL, *cur;
1869
1870	if (var == NULL) {
1871		return;
1872	}
1873
1874	/* Find whether a variable already exists */
1875	LL_FOREACH (parser->variables, cur) {
1876		if (strcmp (cur->var, var) == 0) {
1877			new = cur;
1878			break;
1879		}
1880	}
1881
1882	if (value == NULL) {
1883
1884		if (new != NULL) {
1885			/* Remove variable */
1886			LL_DELETE (parser->variables, new);
1887			free (new->var);
1888			free (new->value);
1889			UCL_FREE (sizeof (struct ucl_variable), new);
1890		}
1891		else {
1892			/* Do nothing */
1893			return;
1894		}
1895	}
1896	else {
1897		if (new == NULL) {
1898			new = UCL_ALLOC (sizeof (struct ucl_variable));
1899			if (new == NULL) {
1900				return;
1901			}
1902			memset (new, 0, sizeof (struct ucl_variable));
1903			new->var = strdup (var);
1904			new->var_len = strlen (var);
1905			new->value = strdup (value);
1906			new->value_len = strlen (value);
1907
1908			LL_PREPEND (parser->variables, new);
1909		}
1910		else {
1911			free (new->value);
1912			new->value = strdup (value);
1913			new->value_len = strlen (value);
1914		}
1915	}
1916}
1917
1918void
1919ucl_parser_set_variables_handler (struct ucl_parser *parser,
1920		ucl_variable_handler handler, void *ud)
1921{
1922	parser->var_handler = handler;
1923	parser->var_data = ud;
1924}
1925
1926bool
1927ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1928		size_t len)
1929{
1930	struct ucl_chunk *chunk;
1931
1932	if (data == NULL || len == 0) {
1933		ucl_create_err (&parser->err, "invalid chunk added");
1934		return false;
1935	}
1936	if (parser->state != UCL_STATE_ERROR) {
1937		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1938		if (chunk == NULL) {
1939			ucl_create_err (&parser->err, "cannot allocate chunk structure");
1940			return false;
1941		}
1942		chunk->begin = data;
1943		chunk->remain = len;
1944		chunk->pos = chunk->begin;
1945		chunk->end = chunk->begin + len;
1946		chunk->line = 1;
1947		chunk->column = 0;
1948		LL_PREPEND (parser->chunks, chunk);
1949		parser->recursion ++;
1950		if (parser->recursion > UCL_MAX_RECURSION) {
1951			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1952					parser->recursion);
1953			return false;
1954		}
1955		return ucl_state_machine (parser);
1956	}
1957
1958	ucl_create_err (&parser->err, "a parser is in an invalid state");
1959
1960	return false;
1961}
1962
1963bool
1964ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1965		size_t len)
1966{
1967	if (data == NULL) {
1968		ucl_create_err (&parser->err, "invalid string added");
1969		return false;
1970	}
1971	if (len == 0) {
1972		len = strlen (data);
1973	}
1974
1975	return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1976}
1977