ucl_parser.c revision 275223
1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *       * Redistributions of source code must retain the above copyright
7 *         notice, this list of conditions and the following disclaimer.
8 *       * Redistributions in binary form must reproduce the above copyright
9 *         notice, this list of conditions and the following disclaimer in the
10 *         documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#include "ucl.h"
25#include "ucl_internal.h"
26#include "ucl_chartable.h"
27
28/**
29 * @file ucl_parser.c
30 * The implementation of ucl parser
31 */
32
33struct ucl_parser_saved_state {
34	unsigned int line;
35	unsigned int column;
36	size_t remain;
37	const unsigned char *pos;
38};
39
40/**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47#define ucl_chunk_skipc(chunk, p)    do{					\
48    if (*(p) == '\n') {										\
49        (chunk)->line ++;									\
50        (chunk)->column = 0;								\
51    }														\
52    else (chunk)->column ++;								\
53    (p++);													\
54    (chunk)->pos ++;										\
55    (chunk)->remain --;										\
56    } while (0)
57
58static inline void
59ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err)
60{
61	const char *fmt_string, *filename;
62	struct ucl_chunk *chunk = parser->chunks;
63
64	if (parser->cur_file) {
65		filename = parser->cur_file;
66	}
67	else {
68		filename = "<unknown>";
69	}
70	if (chunk->pos < chunk->end) {
71		if (isgraph (*chunk->pos)) {
72			fmt_string = "error while parsing %s: "
73					"line: %d, column: %d - '%s', character: '%c'";
74		}
75		else {
76			fmt_string = "error while parsing %s: "
77					"line: %d, column: %d - '%s', character: '0x%02x'";
78		}
79		ucl_create_err (err, fmt_string,
80			filename, chunk->line, chunk->column,
81			str, *chunk->pos);
82	}
83	else {
84		ucl_create_err (err, "error while parsing %s: at the end of chunk: %s",
85			filename, str);
86	}
87}
88
89/**
90 * Skip all comments from the current pos resolving nested and multiline comments
91 * @param parser
92 * @return
93 */
94static bool
95ucl_skip_comments (struct ucl_parser *parser)
96{
97	struct ucl_chunk *chunk = parser->chunks;
98	const unsigned char *p;
99	int comments_nested = 0;
100	bool quoted = false;
101
102	p = chunk->pos;
103
104start:
105	if (chunk->remain > 0 && *p == '#') {
106		if (parser->state != UCL_STATE_SCOMMENT &&
107				parser->state != UCL_STATE_MCOMMENT) {
108			while (p < chunk->end) {
109				if (*p == '\n') {
110					ucl_chunk_skipc (chunk, p);
111					goto start;
112				}
113				ucl_chunk_skipc (chunk, p);
114			}
115		}
116	}
117	else if (chunk->remain >= 2 && *p == '/') {
118		if (p[1] == '*') {
119			ucl_chunk_skipc (chunk, p);
120			comments_nested ++;
121			ucl_chunk_skipc (chunk, p);
122
123			while (p < chunk->end) {
124				if (*p == '"' && *(p - 1) != '\\') {
125					quoted = !quoted;
126				}
127
128				if (!quoted) {
129					if (*p == '*') {
130						ucl_chunk_skipc (chunk, p);
131						if (*p == '/') {
132							comments_nested --;
133							if (comments_nested == 0) {
134								ucl_chunk_skipc (chunk, p);
135								goto start;
136							}
137						}
138						ucl_chunk_skipc (chunk, p);
139					}
140					else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
141						comments_nested ++;
142						ucl_chunk_skipc (chunk, p);
143						ucl_chunk_skipc (chunk, p);
144						continue;
145					}
146				}
147				ucl_chunk_skipc (chunk, p);
148			}
149			if (comments_nested != 0) {
150				ucl_set_err (parser, UCL_ENESTED,
151						"unfinished multiline comment", &parser->err);
152				return false;
153			}
154		}
155	}
156
157	return true;
158}
159
160/**
161 * Return multiplier for a character
162 * @param c multiplier character
163 * @param is_bytes if true use 1024 multiplier
164 * @return multiplier
165 */
166static inline unsigned long
167ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
168	const struct {
169		char c;
170		long mult_normal;
171		long mult_bytes;
172	} multipliers[] = {
173			{'m', 1000 * 1000, 1024 * 1024},
174			{'k', 1000, 1024},
175			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
176	};
177	int i;
178
179	for (i = 0; i < 3; i ++) {
180		if (tolower (c) == multipliers[i].c) {
181			if (is_bytes) {
182				return multipliers[i].mult_bytes;
183			}
184			return multipliers[i].mult_normal;
185		}
186	}
187
188	return 1;
189}
190
191
192/**
193 * Return multiplier for time scaling
194 * @param c
195 * @return
196 */
197static inline double
198ucl_lex_time_multiplier (const unsigned char c) {
199	const struct {
200		char c;
201		double mult;
202	} multipliers[] = {
203			{'m', 60},
204			{'h', 60 * 60},
205			{'d', 60 * 60 * 24},
206			{'w', 60 * 60 * 24 * 7},
207			{'y', 60 * 60 * 24 * 7 * 365}
208	};
209	int i;
210
211	for (i = 0; i < 5; i ++) {
212		if (tolower (c) == multipliers[i].c) {
213			return multipliers[i].mult;
214		}
215	}
216
217	return 1;
218}
219
220/**
221 * Return true if a character is a end of an atom
222 * @param c
223 * @return
224 */
225static inline bool
226ucl_lex_is_atom_end (const unsigned char c)
227{
228	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
229}
230
231static inline bool
232ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
233{
234	if (c1 == '/') {
235		if (c2 == '*') {
236			return true;
237		}
238	}
239	else if (c1 == '#') {
240		return true;
241	}
242	return false;
243}
244
245/**
246 * Check variable found
247 * @param parser
248 * @param ptr
249 * @param remain
250 * @param out_len
251 * @param strict
252 * @param found
253 * @return
254 */
255static inline const char *
256ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
257		size_t *out_len, bool strict, bool *found)
258{
259	struct ucl_variable *var;
260	unsigned char *dst;
261	size_t dstlen;
262	bool need_free = false;
263
264	LL_FOREACH (parser->variables, var) {
265		if (strict) {
266			if (remain == var->var_len) {
267				if (memcmp (ptr, var->var, var->var_len) == 0) {
268					*out_len += var->value_len;
269					*found = true;
270					return (ptr + var->var_len);
271				}
272			}
273		}
274		else {
275			if (remain >= var->var_len) {
276				if (memcmp (ptr, var->var, var->var_len) == 0) {
277					*out_len += var->value_len;
278					*found = true;
279					return (ptr + var->var_len);
280				}
281			}
282		}
283	}
284
285	/* XXX: can only handle ${VAR} */
286	if (!(*found) && parser->var_handler != NULL && strict) {
287		/* Call generic handler */
288		if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
289				parser->var_data)) {
290			*found = true;
291			if (need_free) {
292				free (dst);
293			}
294			return (ptr + remain);
295		}
296	}
297
298	return ptr;
299}
300
301/**
302 * Check for a variable in a given string
303 * @param parser
304 * @param ptr
305 * @param remain
306 * @param out_len
307 * @param vars_found
308 * @return
309 */
310static const char *
311ucl_check_variable (struct ucl_parser *parser, const char *ptr,
312		size_t remain, size_t *out_len, bool *vars_found)
313{
314	const char *p, *end, *ret = ptr;
315	bool found = false;
316
317	if (*ptr == '{') {
318		/* We need to match the variable enclosed in braces */
319		p = ptr + 1;
320		end = ptr + remain;
321		while (p < end) {
322			if (*p == '}') {
323				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
324						out_len, true, &found);
325				if (found) {
326					/* {} must be excluded actually */
327					ret ++;
328					if (!*vars_found) {
329						*vars_found = true;
330					}
331				}
332				else {
333					*out_len += 2;
334				}
335				break;
336			}
337			p ++;
338		}
339	}
340	else if (*ptr != '$') {
341		/* Not count escaped dollar sign */
342		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
343		if (found && !*vars_found) {
344			*vars_found = true;
345		}
346		if (!found) {
347			(*out_len) ++;
348		}
349	}
350	else {
351		ret ++;
352		(*out_len) ++;
353	}
354
355	return ret;
356}
357
358/**
359 * Expand a single variable
360 * @param parser
361 * @param ptr
362 * @param remain
363 * @param dest
364 * @return
365 */
366static const char *
367ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
368		size_t remain, unsigned char **dest)
369{
370	unsigned char *d = *dest, *dst;
371	const char *p = ptr + 1, *ret;
372	struct ucl_variable *var;
373	size_t dstlen;
374	bool need_free = false;
375	bool found = false;
376	bool strict = false;
377
378	ret = ptr + 1;
379	remain --;
380
381	if (*p == '$') {
382		*d++ = *p++;
383		*dest = d;
384		return p;
385	}
386	else if (*p == '{') {
387		p ++;
388		strict = true;
389		ret += 2;
390		remain -= 2;
391	}
392
393	LL_FOREACH (parser->variables, var) {
394		if (remain >= var->var_len) {
395			if (memcmp (p, var->var, var->var_len) == 0) {
396				memcpy (d, var->value, var->value_len);
397				ret += var->var_len;
398				d += var->value_len;
399				found = true;
400				break;
401			}
402		}
403	}
404	if (!found) {
405		if (strict && parser->var_handler != NULL) {
406			if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
407							parser->var_data)) {
408				memcpy (d, dst, dstlen);
409				ret += dstlen;
410				d += remain;
411				found = true;
412			}
413		}
414
415		/* Leave variable as is */
416		if (!found) {
417			if (strict) {
418				/* Copy '${' */
419				memcpy (d, ptr, 2);
420				d += 2;
421				ret --;
422			}
423			else {
424				memcpy (d, ptr, 1);
425				d ++;
426			}
427		}
428	}
429
430	*dest = d;
431	return ret;
432}
433
434/**
435 * Expand variables in string
436 * @param parser
437 * @param dst
438 * @param src
439 * @param in_len
440 * @return
441 */
442static ssize_t
443ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
444		const char *src, size_t in_len)
445{
446	const char *p, *end = src + in_len;
447	unsigned char *d;
448	size_t out_len = 0;
449	bool vars_found = false;
450
451	p = src;
452	while (p != end) {
453		if (*p == '$') {
454			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
455		}
456		else {
457			p ++;
458			out_len ++;
459		}
460	}
461
462	if (!vars_found) {
463		/* Trivial case */
464		*dst = NULL;
465		return in_len;
466	}
467
468	*dst = UCL_ALLOC (out_len + 1);
469	if (*dst == NULL) {
470		return in_len;
471	}
472
473	d = *dst;
474	p = src;
475	while (p != end) {
476		if (*p == '$') {
477			p = ucl_expand_single_variable (parser, p, end - p, &d);
478		}
479		else {
480			*d++ = *p++;
481		}
482	}
483
484	*d = '\0';
485
486	return out_len;
487}
488
489/**
490 * Store or copy pointer to the trash stack
491 * @param parser parser object
492 * @param src src string
493 * @param dst destination buffer (trash stack pointer)
494 * @param dst_const const destination pointer (e.g. value of object)
495 * @param in_len input length
496 * @param need_unescape need to unescape source (and copy it)
497 * @param need_lowercase need to lowercase value (and copy)
498 * @param need_expand need to expand variables (and copy as well)
499 * @return output length (excluding \0 symbol)
500 */
501static inline ssize_t
502ucl_copy_or_store_ptr (struct ucl_parser *parser,
503		const unsigned char *src, unsigned char **dst,
504		const char **dst_const, size_t in_len,
505		bool need_unescape, bool need_lowercase, bool need_expand)
506{
507	ssize_t ret = -1, tret;
508	unsigned char *tmp;
509
510	if (need_unescape || need_lowercase ||
511			(need_expand && parser->variables != NULL) ||
512			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
513		/* Copy string */
514		*dst = UCL_ALLOC (in_len + 1);
515		if (*dst == NULL) {
516			ucl_set_err (parser, 0, "cannot allocate memory for a string",
517					&parser->err);
518			return false;
519		}
520		if (need_lowercase) {
521			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
522		}
523		else {
524			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
525		}
526
527		if (need_unescape) {
528			ret = ucl_unescape_json_string (*dst, ret);
529		}
530		if (need_expand) {
531			tmp = *dst;
532			tret = ret;
533			ret = ucl_expand_variable (parser, dst, tmp, ret);
534			if (*dst == NULL) {
535				/* Nothing to expand */
536				*dst = tmp;
537				ret = tret;
538			}
539			else {
540				/* Free unexpanded value */
541				UCL_FREE (in_len + 1, tmp);
542			}
543		}
544		*dst_const = *dst;
545	}
546	else {
547		*dst_const = src;
548		ret = in_len;
549	}
550
551	return ret;
552}
553
554/**
555 * Create and append an object at the specified level
556 * @param parser
557 * @param is_array
558 * @param level
559 * @return
560 */
561static inline ucl_object_t *
562ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
563{
564	struct ucl_stack *st;
565
566	if (!is_array) {
567		if (obj == NULL) {
568			obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority);
569		}
570		else {
571			obj->type = UCL_OBJECT;
572		}
573		obj->value.ov = ucl_hash_create ();
574		parser->state = UCL_STATE_KEY;
575	}
576	else {
577		if (obj == NULL) {
578			obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority);
579		}
580		else {
581			obj->type = UCL_ARRAY;
582		}
583		parser->state = UCL_STATE_VALUE;
584	}
585
586	st = UCL_ALLOC (sizeof (struct ucl_stack));
587	if (st == NULL) {
588		ucl_set_err (parser, 0, "cannot allocate memory for an object",
589				&parser->err);
590		ucl_object_unref (obj);
591		return NULL;
592	}
593	st->obj = obj;
594	st->level = level;
595	LL_PREPEND (parser->stack, st);
596	parser->cur_obj = obj;
597
598	return obj;
599}
600
601int
602ucl_maybe_parse_number (ucl_object_t *obj,
603		const char *start, const char *end, const char **pos,
604		bool allow_double, bool number_bytes, bool allow_time)
605{
606	const char *p = start, *c = start;
607	char *endptr;
608	bool got_dot = false, got_exp = false, need_double = false,
609			is_time = false, valid_start = false, is_hex = false,
610			is_neg = false;
611	double dv = 0;
612	int64_t lv = 0;
613
614	if (*p == '-') {
615		is_neg = true;
616		c ++;
617		p ++;
618	}
619	while (p < end) {
620		if (is_hex && isxdigit (*p)) {
621			p ++;
622		}
623		else if (isdigit (*p)) {
624			valid_start = true;
625			p ++;
626		}
627		else if (!is_hex && (*p == 'x' || *p == 'X')) {
628			is_hex = true;
629			allow_double = false;
630			c = p + 1;
631		}
632		else if (allow_double) {
633			if (p == c) {
634				/* Empty digits sequence, not a number */
635				*pos = start;
636				return EINVAL;
637			}
638			else if (*p == '.') {
639				if (got_dot) {
640					/* Double dots, not a number */
641					*pos = start;
642					return EINVAL;
643				}
644				else {
645					got_dot = true;
646					need_double = true;
647					p ++;
648				}
649			}
650			else if (*p == 'e' || *p == 'E') {
651				if (got_exp) {
652					/* Double exp, not a number */
653					*pos = start;
654					return EINVAL;
655				}
656				else {
657					got_exp = true;
658					need_double = true;
659					p ++;
660					if (p >= end) {
661						*pos = start;
662						return EINVAL;
663					}
664					if (!isdigit (*p) && *p != '+' && *p != '-') {
665						/* Wrong exponent sign */
666						*pos = start;
667						return EINVAL;
668					}
669					else {
670						p ++;
671					}
672				}
673			}
674			else {
675				/* Got the end of the number, need to check */
676				break;
677			}
678		}
679		else {
680			break;
681		}
682	}
683
684	if (!valid_start) {
685		*pos = start;
686		return EINVAL;
687	}
688
689	errno = 0;
690	if (need_double) {
691		dv = strtod (c, &endptr);
692	}
693	else {
694		if (is_hex) {
695			lv = strtoimax (c, &endptr, 16);
696		}
697		else {
698			lv = strtoimax (c, &endptr, 10);
699		}
700	}
701	if (errno == ERANGE) {
702		*pos = start;
703		return ERANGE;
704	}
705
706	/* Now check endptr */
707	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
708		p = endptr;
709		goto set_obj;
710	}
711
712	if (endptr < end && endptr != start) {
713		p = endptr;
714		switch (*p) {
715		case 'm':
716		case 'M':
717		case 'g':
718		case 'G':
719		case 'k':
720		case 'K':
721			if (end - p >= 2) {
722				if (p[1] == 's' || p[1] == 'S') {
723					/* Milliseconds */
724					if (!need_double) {
725						need_double = true;
726						dv = lv;
727					}
728					is_time = true;
729					if (p[0] == 'm' || p[0] == 'M') {
730						dv /= 1000.;
731					}
732					else {
733						dv *= ucl_lex_num_multiplier (*p, false);
734					}
735					p += 2;
736					goto set_obj;
737				}
738				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
739					/* Bytes */
740					if (need_double) {
741						need_double = false;
742						lv = dv;
743					}
744					lv *= ucl_lex_num_multiplier (*p, true);
745					p += 2;
746					goto set_obj;
747				}
748				else if (ucl_lex_is_atom_end (p[1])) {
749					if (need_double) {
750						dv *= ucl_lex_num_multiplier (*p, false);
751					}
752					else {
753						lv *= ucl_lex_num_multiplier (*p, number_bytes);
754					}
755					p ++;
756					goto set_obj;
757				}
758				else if (allow_time && end - p >= 3) {
759					if (tolower (p[0]) == 'm' &&
760							tolower (p[1]) == 'i' &&
761							tolower (p[2]) == 'n') {
762						/* Minutes */
763						if (!need_double) {
764							need_double = true;
765							dv = lv;
766						}
767						is_time = true;
768						dv *= 60.;
769						p += 3;
770						goto set_obj;
771					}
772				}
773			}
774			else {
775				if (need_double) {
776					dv *= ucl_lex_num_multiplier (*p, false);
777				}
778				else {
779					lv *= ucl_lex_num_multiplier (*p, number_bytes);
780				}
781				p ++;
782				goto set_obj;
783			}
784			break;
785		case 'S':
786		case 's':
787			if (allow_time &&
788					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
789				if (!need_double) {
790					need_double = true;
791					dv = lv;
792				}
793				p ++;
794				is_time = true;
795				goto set_obj;
796			}
797			break;
798		case 'h':
799		case 'H':
800		case 'd':
801		case 'D':
802		case 'w':
803		case 'W':
804		case 'Y':
805		case 'y':
806			if (allow_time &&
807					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
808				if (!need_double) {
809					need_double = true;
810					dv = lv;
811				}
812				is_time = true;
813				dv *= ucl_lex_time_multiplier (*p);
814				p ++;
815				goto set_obj;
816			}
817			break;
818		case '\t':
819		case ' ':
820			while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
821				p++;
822			}
823			if (ucl_lex_is_atom_end(*p))
824				goto set_obj;
825			break;
826		}
827	}
828	else if (endptr == end) {
829		/* Just a number at the end of chunk */
830		p = endptr;
831		goto set_obj;
832	}
833
834	*pos = c;
835	return EINVAL;
836
837	set_obj:
838	if (allow_double && (need_double || is_time)) {
839		if (!is_time) {
840			obj->type = UCL_FLOAT;
841		}
842		else {
843			obj->type = UCL_TIME;
844		}
845		obj->value.dv = is_neg ? (-dv) : dv;
846	}
847	else {
848		obj->type = UCL_INT;
849		obj->value.iv = is_neg ? (-lv) : lv;
850	}
851	*pos = p;
852	return 0;
853}
854
855/**
856 * Parse possible number
857 * @param parser
858 * @param chunk
859 * @return true if a number has been parsed
860 */
861static bool
862ucl_lex_number (struct ucl_parser *parser,
863		struct ucl_chunk *chunk, ucl_object_t *obj)
864{
865	const unsigned char *pos;
866	int ret;
867
868	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
869			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
870
871	if (ret == 0) {
872		chunk->remain -= pos - chunk->pos;
873		chunk->column += pos - chunk->pos;
874		chunk->pos = pos;
875		return true;
876	}
877	else if (ret == ERANGE) {
878		ucl_set_err (parser, ERANGE, "numeric value out of range", &parser->err);
879	}
880
881	return false;
882}
883
884/**
885 * Parse quoted string with possible escapes
886 * @param parser
887 * @param chunk
888 * @return true if a string has been parsed
889 */
890static bool
891ucl_lex_json_string (struct ucl_parser *parser,
892		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
893{
894	const unsigned char *p = chunk->pos;
895	unsigned char c;
896	int i;
897
898	while (p < chunk->end) {
899		c = *p;
900		if (c < 0x1F) {
901			/* Unmasked control character */
902			if (c == '\n') {
903				ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline",
904						&parser->err);
905			}
906			else {
907				ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character",
908						&parser->err);
909			}
910			return false;
911		}
912		else if (c == '\\') {
913			ucl_chunk_skipc (chunk, p);
914			c = *p;
915			if (p >= chunk->end) {
916				ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
917						&parser->err);
918				return false;
919			}
920			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
921				if (c == 'u') {
922					ucl_chunk_skipc (chunk, p);
923					for (i = 0; i < 4 && p < chunk->end; i ++) {
924						if (!isxdigit (*p)) {
925							ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape",
926									&parser->err);
927							return false;
928						}
929						ucl_chunk_skipc (chunk, p);
930					}
931					if (p >= chunk->end) {
932						ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
933								&parser->err);
934						return false;
935					}
936				}
937				else {
938					ucl_chunk_skipc (chunk, p);
939				}
940			}
941			*need_unescape = true;
942			*ucl_escape = true;
943			continue;
944		}
945		else if (c == '"') {
946			ucl_chunk_skipc (chunk, p);
947			return true;
948		}
949		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
950			*ucl_escape = true;
951		}
952		else if (c == '$') {
953			*var_expand = true;
954		}
955		ucl_chunk_skipc (chunk, p);
956	}
957
958	ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string",
959			&parser->err);
960	return false;
961}
962
963static void
964ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont,
965		ucl_object_t *top,
966		ucl_object_t *elt)
967{
968	ucl_object_t *nobj;
969
970	if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
971		/* Implicit array */
972		top->flags |= UCL_OBJECT_MULTIVALUE;
973		DL_APPEND (top, elt);
974	}
975	else {
976		if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
977			/* Just add to the explicit array */
978			DL_APPEND (top->value.av, elt);
979		}
980		else {
981			/* Convert to an array */
982			ucl_hash_delete (cont, top);
983			nobj = ucl_object_typed_new (UCL_ARRAY);
984			nobj->key = top->key;
985			nobj->keylen = top->keylen;
986			nobj->flags |= UCL_OBJECT_MULTIVALUE;
987			DL_APPEND (nobj->value.av, top);
988			DL_APPEND (nobj->value.av, elt);
989			ucl_hash_insert (cont, nobj, nobj->key, nobj->keylen);
990		}
991	}
992}
993
994/**
995 * Parse a key in an object
996 * @param parser
997 * @param chunk
998 * @return true if a key has been parsed
999 */
1000static bool
1001ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
1002{
1003	const unsigned char *p, *c = NULL, *end, *t;
1004	const char *key = NULL;
1005	bool got_quote = false, got_eq = false, got_semicolon = false,
1006			need_unescape = false, ucl_escape = false, var_expand = false,
1007			got_content = false, got_sep = false;
1008	ucl_object_t *nobj, *tobj;
1009	ucl_hash_t *container;
1010	ssize_t keylen;
1011
1012	p = chunk->pos;
1013
1014	if (*p == '.') {
1015		/* It is macro actually */
1016		ucl_chunk_skipc (chunk, p);
1017		parser->prev_state = parser->state;
1018		parser->state = UCL_STATE_MACRO_NAME;
1019		return true;
1020	}
1021	while (p < chunk->end) {
1022		/*
1023		 * A key must start with alpha, number, '/' or '_' and end with space character
1024		 */
1025		if (c == NULL) {
1026			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1027				if (!ucl_skip_comments (parser)) {
1028					return false;
1029				}
1030				p = chunk->pos;
1031			}
1032			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1033				ucl_chunk_skipc (chunk, p);
1034			}
1035			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
1036				/* The first symbol */
1037				c = p;
1038				ucl_chunk_skipc (chunk, p);
1039				got_content = true;
1040			}
1041			else if (*p == '"') {
1042				/* JSON style key */
1043				c = p + 1;
1044				got_quote = true;
1045				got_content = true;
1046				ucl_chunk_skipc (chunk, p);
1047			}
1048			else if (*p == '}') {
1049				/* We have actually end of an object */
1050				*end_of_object = true;
1051				return true;
1052			}
1053			else if (*p == '.') {
1054				ucl_chunk_skipc (chunk, p);
1055				parser->prev_state = parser->state;
1056				parser->state = UCL_STATE_MACRO_NAME;
1057				return true;
1058			}
1059			else {
1060				/* Invalid identifier */
1061				ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter",
1062						&parser->err);
1063				return false;
1064			}
1065		}
1066		else {
1067			/* Parse the body of a key */
1068			if (!got_quote) {
1069				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
1070					got_content = true;
1071					ucl_chunk_skipc (chunk, p);
1072				}
1073				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
1074					end = p;
1075					break;
1076				}
1077				else {
1078					ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key",
1079							&parser->err);
1080					return false;
1081				}
1082			}
1083			else {
1084				/* We need to parse json like quoted string */
1085				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1086					return false;
1087				}
1088				/* Always escape keys obtained via json */
1089				end = chunk->pos - 1;
1090				p = chunk->pos;
1091				break;
1092			}
1093		}
1094	}
1095
1096	if (p >= chunk->end && got_content) {
1097		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1098		return false;
1099	}
1100	else if (!got_content) {
1101		return true;
1102	}
1103	*end_of_object = false;
1104	/* We are now at the end of the key, need to parse the rest */
1105	while (p < chunk->end) {
1106		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1107			ucl_chunk_skipc (chunk, p);
1108		}
1109		else if (*p == '=') {
1110			if (!got_eq && !got_semicolon) {
1111				ucl_chunk_skipc (chunk, p);
1112				got_eq = true;
1113			}
1114			else {
1115				ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character",
1116						&parser->err);
1117				return false;
1118			}
1119		}
1120		else if (*p == ':') {
1121			if (!got_eq && !got_semicolon) {
1122				ucl_chunk_skipc (chunk, p);
1123				got_semicolon = true;
1124			}
1125			else {
1126				ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character",
1127						&parser->err);
1128				return false;
1129			}
1130		}
1131		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1132			/* Check for comment */
1133			if (!ucl_skip_comments (parser)) {
1134				return false;
1135			}
1136			p = chunk->pos;
1137		}
1138		else {
1139			/* Start value */
1140			break;
1141		}
1142	}
1143
1144	if (p >= chunk->end && got_content) {
1145		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1146		return false;
1147	}
1148
1149	got_sep = got_semicolon || got_eq;
1150
1151	if (!got_sep) {
1152		/*
1153		 * Maybe we have more keys nested, so search for termination character.
1154		 * Possible choices:
1155		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1156		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1157		 * 3) key1 value[;,\n] <- we treat that as linear object
1158		 */
1159		t = p;
1160		*next_key = false;
1161		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1162			t ++;
1163		}
1164		/* Check first non-space character after a key */
1165		if (*t != '{' && *t != '[') {
1166			while (t < chunk->end) {
1167				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1168					break;
1169				}
1170				else if (*t == '{' || *t == '[') {
1171					*next_key = true;
1172					break;
1173				}
1174				t ++;
1175			}
1176		}
1177	}
1178
1179	/* Create a new object */
1180	nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1181	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1182			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1183	if (keylen == -1) {
1184		ucl_object_unref (nobj);
1185		return false;
1186	}
1187	else if (keylen == 0) {
1188		ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1189		ucl_object_unref (nobj);
1190		return false;
1191	}
1192
1193	container = parser->stack->obj->value.ov;
1194	nobj->key = key;
1195	nobj->keylen = keylen;
1196	tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1197	if (tobj == NULL) {
1198		container = ucl_hash_insert_object (container, nobj);
1199		nobj->prev = nobj;
1200		nobj->next = NULL;
1201		parser->stack->obj->len ++;
1202	}
1203	else {
1204		/*
1205		 * The logic here is the following:
1206		 *
1207		 * - if we have two objects with the same priority, then we form an
1208		 * implicit or explicit array
1209		 * - if a new object has bigger priority, then we overwrite an old one
1210		 * - if a new object has lower priority, then we ignore it
1211		 */
1212		unsigned priold = ucl_object_get_priority (tobj),
1213				prinew = ucl_object_get_priority (nobj);
1214		if (priold == prinew) {
1215			ucl_parser_append_elt (parser, container, tobj, nobj);
1216		}
1217		else if (priold > prinew) {
1218			ucl_object_unref (nobj);
1219			return true;
1220		}
1221		else {
1222			ucl_hash_replace (container, tobj, nobj);
1223			ucl_object_unref (tobj);
1224		}
1225	}
1226
1227	if (ucl_escape) {
1228		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1229	}
1230	parser->stack->obj->value.ov = container;
1231
1232	parser->cur_obj = nobj;
1233
1234	return true;
1235}
1236
1237/**
1238 * Parse a cl string
1239 * @param parser
1240 * @param chunk
1241 * @return true if a key has been parsed
1242 */
1243static bool
1244ucl_parse_string_value (struct ucl_parser *parser,
1245		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1246{
1247	const unsigned char *p;
1248	enum {
1249		UCL_BRACE_ROUND = 0,
1250		UCL_BRACE_SQUARE,
1251		UCL_BRACE_FIGURE
1252	};
1253	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1254
1255	p = chunk->pos;
1256
1257	while (p < chunk->end) {
1258
1259		/* Skip pairs of figure braces */
1260		if (*p == '{') {
1261			braces[UCL_BRACE_FIGURE][0] ++;
1262		}
1263		else if (*p == '}') {
1264			braces[UCL_BRACE_FIGURE][1] ++;
1265			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1266				/* This is not a termination symbol, continue */
1267				ucl_chunk_skipc (chunk, p);
1268				continue;
1269			}
1270		}
1271		/* Skip pairs of square braces */
1272		else if (*p == '[') {
1273			braces[UCL_BRACE_SQUARE][0] ++;
1274		}
1275		else if (*p == ']') {
1276			braces[UCL_BRACE_SQUARE][1] ++;
1277			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1278				/* This is not a termination symbol, continue */
1279				ucl_chunk_skipc (chunk, p);
1280				continue;
1281			}
1282		}
1283		else if (*p == '$') {
1284			*var_expand = true;
1285		}
1286		else if (*p == '\\') {
1287			*need_unescape = true;
1288			ucl_chunk_skipc (chunk, p);
1289			if (p < chunk->end) {
1290				ucl_chunk_skipc (chunk, p);
1291			}
1292			continue;
1293		}
1294
1295		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1296			break;
1297		}
1298		ucl_chunk_skipc (chunk, p);
1299	}
1300
1301	return true;
1302}
1303
1304/**
1305 * Parse multiline string ending with \n{term}\n
1306 * @param parser
1307 * @param chunk
1308 * @param term
1309 * @param term_len
1310 * @return size of multiline string or 0 in case of error
1311 */
1312static int
1313ucl_parse_multiline_string (struct ucl_parser *parser,
1314		struct ucl_chunk *chunk, const unsigned char *term,
1315		int term_len, unsigned char const **beg,
1316		bool *var_expand)
1317{
1318	const unsigned char *p, *c, *tend;
1319	bool newline = false;
1320	int len = 0;
1321
1322	p = chunk->pos;
1323
1324	c = p;
1325
1326	while (p < chunk->end) {
1327		if (newline) {
1328			if (chunk->end - p < term_len) {
1329				return 0;
1330			}
1331			else if (memcmp (p, term, term_len) == 0) {
1332				tend = p + term_len;
1333				if (*tend != '\n' && *tend != ';' && *tend != ',') {
1334					/* Incomplete terminator */
1335					ucl_chunk_skipc (chunk, p);
1336					continue;
1337				}
1338				len = p - c;
1339				chunk->remain -= term_len;
1340				chunk->pos = p + term_len;
1341				chunk->column = term_len;
1342				*beg = c;
1343				break;
1344			}
1345		}
1346		if (*p == '\n') {
1347			newline = true;
1348		}
1349		else {
1350			if (*p == '$') {
1351				*var_expand = true;
1352			}
1353			newline = false;
1354		}
1355		ucl_chunk_skipc (chunk, p);
1356	}
1357
1358	return len;
1359}
1360
1361static ucl_object_t*
1362ucl_get_value_object (struct ucl_parser *parser)
1363{
1364	ucl_object_t *t, *obj = NULL;
1365
1366	if (parser->stack->obj->type == UCL_ARRAY) {
1367		/* Object must be allocated */
1368		obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1369		t = parser->stack->obj->value.av;
1370		DL_APPEND (t, obj);
1371		parser->cur_obj = obj;
1372		parser->stack->obj->value.av = t;
1373		parser->stack->obj->len ++;
1374	}
1375	else {
1376		/* Object has been already allocated */
1377		obj = parser->cur_obj;
1378	}
1379
1380	return obj;
1381}
1382
1383/**
1384 * Handle value data
1385 * @param parser
1386 * @param chunk
1387 * @return
1388 */
1389static bool
1390ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1391{
1392	const unsigned char *p, *c;
1393	ucl_object_t *obj = NULL;
1394	unsigned int stripped_spaces;
1395	int str_len;
1396	bool need_unescape = false, ucl_escape = false, var_expand = false;
1397
1398	p = chunk->pos;
1399
1400	/* Skip any spaces and comments */
1401	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1402			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1403		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1404			ucl_chunk_skipc (chunk, p);
1405		}
1406		if (!ucl_skip_comments (parser)) {
1407			return false;
1408		}
1409		p = chunk->pos;
1410	}
1411
1412	while (p < chunk->end) {
1413		c = p;
1414		switch (*p) {
1415		case '"':
1416			obj = ucl_get_value_object (parser);
1417			ucl_chunk_skipc (chunk, p);
1418			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1419				return false;
1420			}
1421			str_len = chunk->pos - c - 2;
1422			obj->type = UCL_STRING;
1423			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1424					&obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1425				return false;
1426			}
1427			obj->len = str_len;
1428			parser->state = UCL_STATE_AFTER_VALUE;
1429			p = chunk->pos;
1430			return true;
1431			break;
1432		case '{':
1433			obj = ucl_get_value_object (parser);
1434			/* We have a new object */
1435			obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1436			if (obj == NULL) {
1437				return false;
1438			}
1439
1440			ucl_chunk_skipc (chunk, p);
1441			return true;
1442			break;
1443		case '[':
1444			obj = ucl_get_value_object (parser);
1445			/* We have a new array */
1446			obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1447			if (obj == NULL) {
1448				return false;
1449			}
1450
1451			ucl_chunk_skipc (chunk, p);
1452			return true;
1453			break;
1454		case ']':
1455			/* We have the array ending */
1456			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1457				parser->state = UCL_STATE_AFTER_VALUE;
1458				return true;
1459			}
1460			else {
1461				goto parse_string;
1462			}
1463			break;
1464		case '<':
1465			obj = ucl_get_value_object (parser);
1466			/* We have something like multiline value, which must be <<[A-Z]+\n */
1467			if (chunk->end - p > 3) {
1468				if (memcmp (p, "<<", 2) == 0) {
1469					p += 2;
1470					/* We allow only uppercase characters in multiline definitions */
1471					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1472						p ++;
1473					}
1474					if (*p =='\n') {
1475						/* Set chunk positions and start multiline parsing */
1476						c += 2;
1477						chunk->remain -= p - c;
1478						chunk->pos = p + 1;
1479						chunk->column = 0;
1480						chunk->line ++;
1481						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1482								p - c, &c, &var_expand)) == 0) {
1483							ucl_set_err (parser, UCL_ESYNTAX,
1484									"unterminated multiline value", &parser->err);
1485							return false;
1486						}
1487						obj->type = UCL_STRING;
1488						if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1489							&obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1490							return false;
1491						}
1492						obj->len = str_len;
1493						parser->state = UCL_STATE_AFTER_VALUE;
1494						return true;
1495					}
1496				}
1497			}
1498			/* Fallback to ordinary strings */
1499		default:
1500parse_string:
1501			if (obj == NULL) {
1502				obj = ucl_get_value_object (parser);
1503			}
1504			/* Parse atom */
1505			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1506				if (!ucl_lex_number (parser, chunk, obj)) {
1507					if (parser->state == UCL_STATE_ERROR) {
1508						return false;
1509					}
1510				}
1511				else {
1512					parser->state = UCL_STATE_AFTER_VALUE;
1513					return true;
1514				}
1515				/* Fallback to normal string */
1516			}
1517
1518			if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1519				return false;
1520			}
1521			/* Cut trailing spaces */
1522			stripped_spaces = 0;
1523			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1524					UCL_CHARACTER_WHITESPACE)) {
1525				stripped_spaces ++;
1526			}
1527			str_len = chunk->pos - c - stripped_spaces;
1528			if (str_len <= 0) {
1529				ucl_set_err (parser, 0, "string value must not be empty",
1530						&parser->err);
1531				return false;
1532			}
1533			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1534				obj->len = 0;
1535				obj->type = UCL_NULL;
1536			}
1537			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1538				obj->type = UCL_STRING;
1539				if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1540						&obj->value.sv, str_len, need_unescape,
1541						false, var_expand)) == -1) {
1542					return false;
1543				}
1544				obj->len = str_len;
1545			}
1546			parser->state = UCL_STATE_AFTER_VALUE;
1547			p = chunk->pos;
1548
1549			return true;
1550			break;
1551		}
1552	}
1553
1554	return true;
1555}
1556
1557/**
1558 * Handle after value data
1559 * @param parser
1560 * @param chunk
1561 * @return
1562 */
1563static bool
1564ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1565{
1566	const unsigned char *p;
1567	bool got_sep = false;
1568	struct ucl_stack *st;
1569
1570	p = chunk->pos;
1571
1572	while (p < chunk->end) {
1573		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1574			/* Skip whitespaces */
1575			ucl_chunk_skipc (chunk, p);
1576		}
1577		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1578			/* Skip comment */
1579			if (!ucl_skip_comments (parser)) {
1580				return false;
1581			}
1582			/* Treat comment as a separator */
1583			got_sep = true;
1584			p = chunk->pos;
1585		}
1586		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1587			if (*p == '}' || *p == ']') {
1588				if (parser->stack == NULL) {
1589					ucl_set_err (parser, UCL_ESYNTAX,
1590							"end of array or object detected without corresponding start",
1591							&parser->err);
1592					return false;
1593				}
1594				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1595						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1596
1597					/* Pop all nested objects from a stack */
1598					st = parser->stack;
1599					parser->stack = st->next;
1600					UCL_FREE (sizeof (struct ucl_stack), st);
1601
1602					while (parser->stack != NULL) {
1603						st = parser->stack;
1604						if (st->next == NULL || st->next->level == st->level) {
1605							break;
1606						}
1607						parser->stack = st->next;
1608						UCL_FREE (sizeof (struct ucl_stack), st);
1609					}
1610				}
1611				else {
1612					ucl_set_err (parser, UCL_ESYNTAX,
1613							"unexpected terminating symbol detected",
1614							&parser->err);
1615					return false;
1616				}
1617
1618				if (parser->stack == NULL) {
1619					/* Ignore everything after a top object */
1620					return true;
1621				}
1622				else {
1623					ucl_chunk_skipc (chunk, p);
1624				}
1625				got_sep = true;
1626			}
1627			else {
1628				/* Got a separator */
1629				got_sep = true;
1630				ucl_chunk_skipc (chunk, p);
1631			}
1632		}
1633		else {
1634			/* Anything else */
1635			if (!got_sep) {
1636				ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing",
1637						&parser->err);
1638				return false;
1639			}
1640			return true;
1641		}
1642	}
1643
1644	return true;
1645}
1646
1647/**
1648 * Handle macro data
1649 * @param parser
1650 * @param chunk
1651 * @return
1652 */
1653static bool
1654ucl_parse_macro_value (struct ucl_parser *parser,
1655		struct ucl_chunk *chunk, struct ucl_macro *macro,
1656		unsigned char const **macro_start, size_t *macro_len)
1657{
1658	const unsigned char *p, *c;
1659	bool need_unescape = false, ucl_escape = false, var_expand = false;
1660
1661	p = chunk->pos;
1662
1663	switch (*p) {
1664	case '"':
1665		/* We have macro value encoded in quotes */
1666		c = p;
1667		ucl_chunk_skipc (chunk, p);
1668		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1669			return false;
1670		}
1671
1672		*macro_start = c + 1;
1673		*macro_len = chunk->pos - c - 2;
1674		p = chunk->pos;
1675		break;
1676	case '{':
1677		/* We got a multiline macro body */
1678		ucl_chunk_skipc (chunk, p);
1679		/* Skip spaces at the beginning */
1680		while (p < chunk->end) {
1681			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1682				ucl_chunk_skipc (chunk, p);
1683			}
1684			else {
1685				break;
1686			}
1687		}
1688		c = p;
1689		while (p < chunk->end) {
1690			if (*p == '}') {
1691				break;
1692			}
1693			ucl_chunk_skipc (chunk, p);
1694		}
1695		*macro_start = c;
1696		*macro_len = p - c;
1697		ucl_chunk_skipc (chunk, p);
1698		break;
1699	default:
1700		/* Macro is not enclosed in quotes or braces */
1701		c = p;
1702		while (p < chunk->end) {
1703			if (ucl_lex_is_atom_end (*p)) {
1704				break;
1705			}
1706			ucl_chunk_skipc (chunk, p);
1707		}
1708		*macro_start = c;
1709		*macro_len = p - c;
1710		break;
1711	}
1712
1713	/* We are at the end of a macro */
1714	/* Skip ';' and space characters and return to previous state */
1715	while (p < chunk->end) {
1716		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1717			break;
1718		}
1719		ucl_chunk_skipc (chunk, p);
1720	}
1721	return true;
1722}
1723
1724/**
1725 * Parse macro arguments as UCL object
1726 * @param parser parser structure
1727 * @param chunk the current data chunk
1728 * @return
1729 */
1730static ucl_object_t *
1731ucl_parse_macro_arguments (struct ucl_parser *parser,
1732		struct ucl_chunk *chunk)
1733{
1734	ucl_object_t *res = NULL;
1735	struct ucl_parser *params_parser;
1736	int obraces = 1, ebraces = 0, state = 0;
1737	const unsigned char *p, *c;
1738	size_t args_len = 0;
1739	struct ucl_parser_saved_state saved;
1740
1741	saved.column = chunk->column;
1742	saved.line = chunk->line;
1743	saved.pos = chunk->pos;
1744	saved.remain = chunk->remain;
1745	p = chunk->pos;
1746
1747	if (*p != '(' || chunk->remain < 2) {
1748		return NULL;
1749	}
1750
1751	/* Set begin and start */
1752	ucl_chunk_skipc (chunk, p);
1753	c = p;
1754
1755	while ((p) < (chunk)->end) {
1756		switch (state) {
1757		case 0:
1758			/* Parse symbols and check for '(', ')' and '"' */
1759			if (*p == '(') {
1760				obraces ++;
1761			}
1762			else if (*p == ')') {
1763				ebraces ++;
1764			}
1765			else if (*p == '"') {
1766				state = 1;
1767			}
1768			/* Check pairing */
1769			if (obraces == ebraces) {
1770				state = 99;
1771			}
1772			else {
1773				args_len ++;
1774			}
1775			/* Check overflow */
1776			if (chunk->remain == 0) {
1777				goto restore_chunk;
1778			}
1779			ucl_chunk_skipc (chunk, p);
1780			break;
1781		case 1:
1782			/* We have quote character, so skip all but quotes */
1783			if (*p == '"' && *(p - 1) != '\\') {
1784				state = 0;
1785			}
1786			if (chunk->remain == 0) {
1787				goto restore_chunk;
1788			}
1789			ucl_chunk_skipc (chunk, p);
1790			break;
1791		case 99:
1792			/*
1793			 * We have read the full body of arguments, so we need to parse and set
1794			 * object from that
1795			 */
1796			params_parser = ucl_parser_new (parser->flags);
1797			if (!ucl_parser_add_chunk (params_parser, c, args_len)) {
1798				ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error",
1799						&parser->err);
1800			}
1801			else {
1802				res = ucl_parser_get_object (params_parser);
1803			}
1804			ucl_parser_free (params_parser);
1805
1806			return res;
1807
1808			break;
1809		}
1810	}
1811
1812	return res;
1813
1814restore_chunk:
1815	chunk->column = saved.column;
1816	chunk->line = saved.line;
1817	chunk->pos = saved.pos;
1818	chunk->remain = saved.remain;
1819
1820	return NULL;
1821}
1822
1823#define SKIP_SPACES_COMMENTS(parser, chunk, p) do {								\
1824	while ((p) < (chunk)->end) {												\
1825		if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) {		\
1826			if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) {	\
1827				if (!ucl_skip_comments (parser)) {								\
1828					return false;												\
1829				}																\
1830				p = (chunk)->pos;												\
1831			}																	\
1832			break;																\
1833		}																		\
1834		ucl_chunk_skipc (chunk, p);												\
1835	}																			\
1836} while(0)
1837
1838/**
1839 * Handle the main states of rcl parser
1840 * @param parser parser structure
1841 * @param data the pointer to the beginning of a chunk
1842 * @param len the length of a chunk
1843 * @return true if chunk has been parsed and false in case of error
1844 */
1845static bool
1846ucl_state_machine (struct ucl_parser *parser)
1847{
1848	ucl_object_t *obj, *macro_args;
1849	struct ucl_chunk *chunk = parser->chunks;
1850	const unsigned char *p, *c = NULL, *macro_start = NULL;
1851	unsigned char *macro_escaped;
1852	size_t macro_len = 0;
1853	struct ucl_macro *macro = NULL;
1854	bool next_key = false, end_of_object = false, ret;
1855
1856	if (parser->top_obj == NULL) {
1857		if (*chunk->pos == '[') {
1858			obj = ucl_add_parser_stack (NULL, parser, true, 0);
1859		}
1860		else {
1861			obj = ucl_add_parser_stack (NULL, parser, false, 0);
1862		}
1863		if (obj == NULL) {
1864			return false;
1865		}
1866		parser->top_obj = obj;
1867		parser->cur_obj = obj;
1868		parser->state = UCL_STATE_INIT;
1869	}
1870
1871	p = chunk->pos;
1872	while (chunk->pos < chunk->end) {
1873		switch (parser->state) {
1874		case UCL_STATE_INIT:
1875			/*
1876			 * At the init state we can either go to the parse array or object
1877			 * if we got [ or { correspondingly or can just treat new data as
1878			 * a key of newly created object
1879			 */
1880			if (!ucl_skip_comments (parser)) {
1881				parser->prev_state = parser->state;
1882				parser->state = UCL_STATE_ERROR;
1883				return false;
1884			}
1885			else {
1886				/* Skip any spaces */
1887				while (p < chunk->end && ucl_test_character (*p,
1888						UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1889					ucl_chunk_skipc (chunk, p);
1890				}
1891				p = chunk->pos;
1892				if (*p == '[') {
1893					parser->state = UCL_STATE_VALUE;
1894					ucl_chunk_skipc (chunk, p);
1895				}
1896				else {
1897					parser->state = UCL_STATE_KEY;
1898					if (*p == '{') {
1899						ucl_chunk_skipc (chunk, p);
1900					}
1901				}
1902			}
1903			break;
1904		case UCL_STATE_KEY:
1905			/* Skip any spaces */
1906			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1907				ucl_chunk_skipc (chunk, p);
1908			}
1909			if (*p == '}') {
1910				/* We have the end of an object */
1911				parser->state = UCL_STATE_AFTER_VALUE;
1912				continue;
1913			}
1914			if (parser->stack == NULL) {
1915				/* No objects are on stack, but we want to parse a key */
1916				ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser "
1917						"expects a key", &parser->err);
1918				parser->prev_state = parser->state;
1919				parser->state = UCL_STATE_ERROR;
1920				return false;
1921			}
1922			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1923				parser->prev_state = parser->state;
1924				parser->state = UCL_STATE_ERROR;
1925				return false;
1926			}
1927			if (end_of_object) {
1928				p = chunk->pos;
1929				parser->state = UCL_STATE_AFTER_VALUE;
1930				continue;
1931			}
1932			else if (parser->state != UCL_STATE_MACRO_NAME) {
1933				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1934					/* Parse more keys and nest objects accordingly */
1935					obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1936							parser->stack->level + 1);
1937					if (obj == NULL) {
1938						return false;
1939					}
1940				}
1941				else {
1942					parser->state = UCL_STATE_VALUE;
1943				}
1944			}
1945			else {
1946				c = chunk->pos;
1947			}
1948			p = chunk->pos;
1949			break;
1950		case UCL_STATE_VALUE:
1951			/* We need to check what we do have */
1952			if (!ucl_parse_value (parser, chunk)) {
1953				parser->prev_state = parser->state;
1954				parser->state = UCL_STATE_ERROR;
1955				return false;
1956			}
1957			/* State is set in ucl_parse_value call */
1958			p = chunk->pos;
1959			break;
1960		case UCL_STATE_AFTER_VALUE:
1961			if (!ucl_parse_after_value (parser, chunk)) {
1962				parser->prev_state = parser->state;
1963				parser->state = UCL_STATE_ERROR;
1964				return false;
1965			}
1966			if (parser->stack != NULL) {
1967				if (parser->stack->obj->type == UCL_OBJECT) {
1968					parser->state = UCL_STATE_KEY;
1969				}
1970				else {
1971					/* Array */
1972					parser->state = UCL_STATE_VALUE;
1973				}
1974			}
1975			else {
1976				/* Skip everything at the end */
1977				return true;
1978			}
1979			p = chunk->pos;
1980			break;
1981		case UCL_STATE_MACRO_NAME:
1982			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
1983					*p != '(') {
1984				ucl_chunk_skipc (chunk, p);
1985			}
1986			else if (p - c > 0) {
1987				/* We got macro name */
1988				macro_len = (size_t)(p - c);
1989				HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1990				if (macro == NULL) {
1991					ucl_create_err (&parser->err, "error on line %d at column %d: "
1992							"unknown macro: '%.*s', character: '%c'",
1993								chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1994					parser->state = UCL_STATE_ERROR;
1995					return false;
1996				}
1997				/* Now we need to skip all spaces */
1998				SKIP_SPACES_COMMENTS(parser, chunk, p);
1999				parser->state = UCL_STATE_MACRO;
2000			}
2001			break;
2002		case UCL_STATE_MACRO:
2003			if (*chunk->pos == '(') {
2004				macro_args = ucl_parse_macro_arguments (parser, chunk);
2005				p = chunk->pos;
2006				if (macro_args) {
2007					SKIP_SPACES_COMMENTS(parser, chunk, p);
2008				}
2009			}
2010			else {
2011				macro_args = NULL;
2012			}
2013			if (!ucl_parse_macro_value (parser, chunk, macro,
2014					&macro_start, &macro_len)) {
2015				parser->prev_state = parser->state;
2016				parser->state = UCL_STATE_ERROR;
2017				return false;
2018			}
2019			macro_len = ucl_expand_variable (parser, &macro_escaped,
2020					macro_start, macro_len);
2021			parser->state = parser->prev_state;
2022			if (macro_escaped == NULL) {
2023				ret = macro->handler (macro_start, macro_len, macro_args,
2024						macro->ud);
2025			}
2026			else {
2027				ret = macro->handler (macro_escaped, macro_len, macro_args,
2028						macro->ud);
2029				UCL_FREE (macro_len + 1, macro_escaped);
2030			}
2031			p = chunk->pos;
2032			if (macro_args) {
2033				ucl_object_unref (macro_args);
2034			}
2035			if (!ret) {
2036				return false;
2037			}
2038			break;
2039		default:
2040			/* TODO: add all states */
2041			ucl_set_err (parser, UCL_EINTERNAL,
2042					"internal error: parser is in an unknown state", &parser->err);
2043			parser->state = UCL_STATE_ERROR;
2044			return false;
2045		}
2046	}
2047
2048	return true;
2049}
2050
2051struct ucl_parser*
2052ucl_parser_new (int flags)
2053{
2054	struct ucl_parser *new;
2055
2056	new = UCL_ALLOC (sizeof (struct ucl_parser));
2057	if (new == NULL) {
2058		return NULL;
2059	}
2060	memset (new, 0, sizeof (struct ucl_parser));
2061
2062	ucl_parser_register_macro (new, "include", ucl_include_handler, new);
2063	ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
2064	ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
2065
2066	new->flags = flags;
2067
2068	/* Initial assumption about filevars */
2069	ucl_parser_set_filevars (new, NULL, false);
2070
2071	return new;
2072}
2073
2074
2075void
2076ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
2077		ucl_macro_handler handler, void* ud)
2078{
2079	struct ucl_macro *new;
2080
2081	if (macro == NULL || handler == NULL) {
2082		return;
2083	}
2084	new = UCL_ALLOC (sizeof (struct ucl_macro));
2085	if (new == NULL) {
2086		return;
2087	}
2088	memset (new, 0, sizeof (struct ucl_macro));
2089	new->handler = handler;
2090	new->name = strdup (macro);
2091	new->ud = ud;
2092	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2093}
2094
2095void
2096ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
2097		const char *value)
2098{
2099	struct ucl_variable *new = NULL, *cur;
2100
2101	if (var == NULL) {
2102		return;
2103	}
2104
2105	/* Find whether a variable already exists */
2106	LL_FOREACH (parser->variables, cur) {
2107		if (strcmp (cur->var, var) == 0) {
2108			new = cur;
2109			break;
2110		}
2111	}
2112
2113	if (value == NULL) {
2114
2115		if (new != NULL) {
2116			/* Remove variable */
2117			DL_DELETE (parser->variables, new);
2118			free (new->var);
2119			free (new->value);
2120			UCL_FREE (sizeof (struct ucl_variable), new);
2121		}
2122		else {
2123			/* Do nothing */
2124			return;
2125		}
2126	}
2127	else {
2128		if (new == NULL) {
2129			new = UCL_ALLOC (sizeof (struct ucl_variable));
2130			if (new == NULL) {
2131				return;
2132			}
2133			memset (new, 0, sizeof (struct ucl_variable));
2134			new->var = strdup (var);
2135			new->var_len = strlen (var);
2136			new->value = strdup (value);
2137			new->value_len = strlen (value);
2138
2139			DL_APPEND (parser->variables, new);
2140		}
2141		else {
2142			free (new->value);
2143			new->value = strdup (value);
2144			new->value_len = strlen (value);
2145		}
2146	}
2147}
2148
2149void
2150ucl_parser_set_variables_handler (struct ucl_parser *parser,
2151		ucl_variable_handler handler, void *ud)
2152{
2153	parser->var_handler = handler;
2154	parser->var_data = ud;
2155}
2156
2157bool
2158ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *data,
2159		size_t len, unsigned priority)
2160{
2161	struct ucl_chunk *chunk;
2162
2163	if (data == NULL) {
2164		ucl_create_err (&parser->err, "invalid chunk added");
2165		return false;
2166	}
2167	if (len == 0) {
2168		parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
2169		return true;
2170	}
2171	if (parser->state != UCL_STATE_ERROR) {
2172		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
2173		if (chunk == NULL) {
2174			ucl_create_err (&parser->err, "cannot allocate chunk structure");
2175			return false;
2176		}
2177		chunk->begin = data;
2178		chunk->remain = len;
2179		chunk->pos = chunk->begin;
2180		chunk->end = chunk->begin + len;
2181		chunk->line = 1;
2182		chunk->column = 0;
2183		chunk->priority = priority;
2184		LL_PREPEND (parser->chunks, chunk);
2185		parser->recursion ++;
2186		if (parser->recursion > UCL_MAX_RECURSION) {
2187			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
2188					parser->recursion);
2189			return false;
2190		}
2191		return ucl_state_machine (parser);
2192	}
2193
2194	ucl_create_err (&parser->err, "a parser is in an invalid state");
2195
2196	return false;
2197}
2198
2199bool
2200ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
2201		size_t len)
2202{
2203	return ucl_parser_add_chunk_priority (parser, data, len, 0);
2204}
2205
2206bool
2207ucl_parser_add_string (struct ucl_parser *parser, const char *data,
2208		size_t len)
2209{
2210	if (data == NULL) {
2211		ucl_create_err (&parser->err, "invalid string added");
2212		return false;
2213	}
2214	if (len == 0) {
2215		len = strlen (data);
2216	}
2217
2218	return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
2219}
2220