ucl_parser.c revision 264789
1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *       * Redistributions of source code must retain the above copyright
7 *         notice, this list of conditions and the following disclaimer.
8 *       * Redistributions in binary form must reproduce the above copyright
9 *         notice, this list of conditions and the following disclaimer in the
10 *         documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#include "ucl.h"
25#include "ucl_internal.h"
26#include "ucl_chartable.h"
27
28/**
29 * @file rcl_parser.c
30 * The implementation of rcl parser
31 */
32
33struct ucl_parser_saved_state {
34	unsigned int line;
35	unsigned int column;
36	size_t remain;
37	const unsigned char *pos;
38};
39
40/**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47#define ucl_chunk_skipc(chunk, p)    do{					\
48    if (*(p) == '\n') {										\
49        (chunk)->line ++;									\
50        (chunk)->column = 0;								\
51    }														\
52    else (chunk)->column ++;								\
53    (p++);													\
54    (chunk)->pos ++;										\
55    (chunk)->remain --;										\
56    } while (0)
57
58static inline void
59ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
60{
61	if (chunk->pos < chunk->end) {
62		if (isgraph (*chunk->pos)) {
63			ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
64					chunk->line, chunk->column, str, *chunk->pos);
65		}
66		else {
67			ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
68					chunk->line, chunk->column, str, (int)*chunk->pos);
69		}
70	}
71	else {
72		ucl_create_err (err, "error at the end of chunk: %s", str);
73	}
74}
75
76/**
77 * Skip all comments from the current pos resolving nested and multiline comments
78 * @param parser
79 * @return
80 */
81static bool
82ucl_skip_comments (struct ucl_parser *parser)
83{
84	struct ucl_chunk *chunk = parser->chunks;
85	const unsigned char *p;
86	int comments_nested = 0;
87
88	p = chunk->pos;
89
90start:
91	if (*p == '#') {
92		if (parser->state != UCL_STATE_SCOMMENT &&
93				parser->state != UCL_STATE_MCOMMENT) {
94			while (p < chunk->end) {
95				if (*p == '\n') {
96					ucl_chunk_skipc (chunk, p);
97					goto start;
98				}
99				ucl_chunk_skipc (chunk, p);
100			}
101		}
102	}
103	else if (*p == '/' && chunk->remain >= 2) {
104		if (p[1] == '*') {
105			ucl_chunk_skipc (chunk, p);
106			comments_nested ++;
107			ucl_chunk_skipc (chunk, p);
108
109			while (p < chunk->end) {
110				if (*p == '*') {
111					ucl_chunk_skipc (chunk, p);
112					if (*p == '/') {
113						comments_nested --;
114						if (comments_nested == 0) {
115							ucl_chunk_skipc (chunk, p);
116							goto start;
117						}
118					}
119					ucl_chunk_skipc (chunk, p);
120				}
121				else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
122					comments_nested ++;
123					ucl_chunk_skipc (chunk, p);
124					ucl_chunk_skipc (chunk, p);
125					continue;
126				}
127				ucl_chunk_skipc (chunk, p);
128			}
129			if (comments_nested != 0) {
130				ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
131				return false;
132			}
133		}
134	}
135
136	return true;
137}
138
139/**
140 * Return multiplier for a character
141 * @param c multiplier character
142 * @param is_bytes if true use 1024 multiplier
143 * @return multiplier
144 */
145static inline unsigned long
146ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
147	const struct {
148		char c;
149		long mult_normal;
150		long mult_bytes;
151	} multipliers[] = {
152			{'m', 1000 * 1000, 1024 * 1024},
153			{'k', 1000, 1024},
154			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
155	};
156	int i;
157
158	for (i = 0; i < 3; i ++) {
159		if (tolower (c) == multipliers[i].c) {
160			if (is_bytes) {
161				return multipliers[i].mult_bytes;
162			}
163			return multipliers[i].mult_normal;
164		}
165	}
166
167	return 1;
168}
169
170
171/**
172 * Return multiplier for time scaling
173 * @param c
174 * @return
175 */
176static inline double
177ucl_lex_time_multiplier (const unsigned char c) {
178	const struct {
179		char c;
180		double mult;
181	} multipliers[] = {
182			{'m', 60},
183			{'h', 60 * 60},
184			{'d', 60 * 60 * 24},
185			{'w', 60 * 60 * 24 * 7},
186			{'y', 60 * 60 * 24 * 7 * 365}
187	};
188	int i;
189
190	for (i = 0; i < 5; i ++) {
191		if (tolower (c) == multipliers[i].c) {
192			return multipliers[i].mult;
193		}
194	}
195
196	return 1;
197}
198
199/**
200 * Return true if a character is a end of an atom
201 * @param c
202 * @return
203 */
204static inline bool
205ucl_lex_is_atom_end (const unsigned char c)
206{
207	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
208}
209
210static inline bool
211ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
212{
213	if (c1 == '/') {
214		if (c2 == '*') {
215			return true;
216		}
217	}
218	else if (c1 == '#') {
219		return true;
220	}
221	return false;
222}
223
224/**
225 * Check variable found
226 * @param parser
227 * @param ptr
228 * @param remain
229 * @param out_len
230 * @param strict
231 * @param found
232 * @return
233 */
234static inline const char *
235ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
236		size_t *out_len, bool strict, bool *found)
237{
238	struct ucl_variable *var;
239
240	LL_FOREACH (parser->variables, var) {
241		if (strict) {
242			if (remain == var->var_len) {
243				if (memcmp (ptr, var->var, var->var_len) == 0) {
244					*out_len += var->value_len;
245					*found = true;
246					return (ptr + var->var_len);
247				}
248			}
249		}
250		else {
251			if (remain >= var->var_len) {
252				if (memcmp (ptr, var->var, var->var_len) == 0) {
253					*out_len += var->value_len;
254					*found = true;
255					return (ptr + var->var_len);
256				}
257			}
258		}
259	}
260
261	return ptr;
262}
263
264/**
265 * Check for a variable in a given string
266 * @param parser
267 * @param ptr
268 * @param remain
269 * @param out_len
270 * @param vars_found
271 * @return
272 */
273static const char *
274ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
275{
276	const char *p, *end, *ret = ptr;
277	bool found = false;
278
279	if (*ptr == '{') {
280		/* We need to match the variable enclosed in braces */
281		p = ptr + 1;
282		end = ptr + remain;
283		while (p < end) {
284			if (*p == '}') {
285				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
286				if (found) {
287					/* {} must be excluded actually */
288					ret ++;
289					if (!*vars_found) {
290						*vars_found = true;
291					}
292				}
293				else {
294					*out_len += 2;
295				}
296				break;
297			}
298			p ++;
299		}
300	}
301	else if (*ptr != '$') {
302		/* Not count escaped dollar sign */
303		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
304		if (found && !*vars_found) {
305			*vars_found = true;
306		}
307		if (!found) {
308			(*out_len) ++;
309		}
310	}
311	else {
312		ret ++;
313		(*out_len) ++;
314	}
315
316	return ret;
317}
318
319/**
320 * Expand a single variable
321 * @param parser
322 * @param ptr
323 * @param remain
324 * @param dest
325 * @return
326 */
327static const char *
328ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
329		size_t remain, unsigned char **dest)
330{
331	unsigned char *d = *dest;
332	const char *p = ptr + 1, *ret;
333	struct ucl_variable *var;
334	bool found = false;
335
336	ret = ptr + 1;
337	remain --;
338
339	if (*p == '$') {
340		*d++ = *p++;
341		*dest = d;
342		return p;
343	}
344	else if (*p == '{') {
345		p ++;
346		ret += 2;
347		remain -= 2;
348	}
349
350	LL_FOREACH (parser->variables, var) {
351		if (remain >= var->var_len) {
352			if (memcmp (p, var->var, var->var_len) == 0) {
353				memcpy (d, var->value, var->value_len);
354				ret += var->var_len;
355				d += var->value_len;
356				found = true;
357				break;
358			}
359		}
360	}
361	if (!found) {
362		memcpy (d, ptr, 2);
363		d += 2;
364		ret --;
365	}
366
367	*dest = d;
368	return ret;
369}
370
371/**
372 * Expand variables in string
373 * @param parser
374 * @param dst
375 * @param src
376 * @param in_len
377 * @return
378 */
379static ssize_t
380ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
381		const char *src, size_t in_len)
382{
383	const char *p, *end = src + in_len;
384	unsigned char *d;
385	size_t out_len = 0;
386	bool vars_found = false;
387
388	p = src;
389	while (p != end) {
390		if (*p == '$') {
391			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
392		}
393		else {
394			p ++;
395			out_len ++;
396		}
397	}
398
399	if (!vars_found) {
400		/* Trivial case */
401		*dst = NULL;
402		return in_len;
403	}
404
405	*dst = UCL_ALLOC (out_len + 1);
406	if (*dst == NULL) {
407		return in_len;
408	}
409
410	d = *dst;
411	p = src;
412	while (p != end) {
413		if (*p == '$') {
414			p = ucl_expand_single_variable (parser, p, end - p, &d);
415		}
416		else {
417			*d++ = *p++;
418		}
419	}
420
421	*d = '\0';
422
423	return out_len;
424}
425
426/**
427 * Store or copy pointer to the trash stack
428 * @param parser parser object
429 * @param src src string
430 * @param dst destination buffer (trash stack pointer)
431 * @param dst_const const destination pointer (e.g. value of object)
432 * @param in_len input length
433 * @param need_unescape need to unescape source (and copy it)
434 * @param need_lowercase need to lowercase value (and copy)
435 * @param need_expand need to expand variables (and copy as well)
436 * @return output length (excluding \0 symbol)
437 */
438static inline ssize_t
439ucl_copy_or_store_ptr (struct ucl_parser *parser,
440		const unsigned char *src, unsigned char **dst,
441		const char **dst_const, size_t in_len,
442		bool need_unescape, bool need_lowercase, bool need_expand)
443{
444	ssize_t ret = -1, tret;
445	unsigned char *tmp;
446
447	if (need_unescape || need_lowercase ||
448			(need_expand && parser->variables != NULL) ||
449			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
450		/* Copy string */
451		*dst = UCL_ALLOC (in_len + 1);
452		if (*dst == NULL) {
453			ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
454			return false;
455		}
456		if (need_lowercase) {
457			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
458		}
459		else {
460			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
461		}
462
463		if (need_unescape) {
464			ret = ucl_unescape_json_string (*dst, ret);
465		}
466		if (need_expand) {
467			tmp = *dst;
468			tret = ret;
469			ret = ucl_expand_variable (parser, dst, tmp, ret);
470			if (*dst == NULL) {
471				/* Nothing to expand */
472				*dst = tmp;
473				ret = tret;
474			}
475		}
476		*dst_const = *dst;
477	}
478	else {
479		*dst_const = src;
480		ret = in_len;
481	}
482
483	return ret;
484}
485
486/**
487 * Create and append an object at the specified level
488 * @param parser
489 * @param is_array
490 * @param level
491 * @return
492 */
493static inline ucl_object_t *
494ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
495{
496	struct ucl_stack *st;
497
498	if (!is_array) {
499		if (obj == NULL) {
500			obj = ucl_object_typed_new (UCL_OBJECT);
501		}
502		else {
503			obj->type = UCL_OBJECT;
504		}
505		obj->value.ov = ucl_hash_create ();
506		parser->state = UCL_STATE_KEY;
507	}
508	else {
509		if (obj == NULL) {
510			obj = ucl_object_typed_new (UCL_ARRAY);
511		}
512		else {
513			obj->type = UCL_ARRAY;
514		}
515		parser->state = UCL_STATE_VALUE;
516	}
517
518	st = UCL_ALLOC (sizeof (struct ucl_stack));
519	if (st == NULL) {
520		ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
521		return NULL;
522	}
523	st->obj = obj;
524	st->level = level;
525	LL_PREPEND (parser->stack, st);
526	parser->cur_obj = obj;
527
528	return obj;
529}
530
531int
532ucl_maybe_parse_number (ucl_object_t *obj,
533		const char *start, const char *end, const char **pos,
534		bool allow_double, bool number_bytes, bool allow_time)
535{
536	const char *p = start, *c = start;
537	char *endptr;
538	bool got_dot = false, got_exp = false, need_double = false,
539			is_time = false, valid_start = false, is_hex = false,
540			is_neg = false;
541	double dv = 0;
542	int64_t lv = 0;
543
544	if (*p == '-') {
545		is_neg = true;
546		c ++;
547		p ++;
548	}
549	while (p < end) {
550		if (is_hex && isxdigit (*p)) {
551			p ++;
552		}
553		else if (isdigit (*p)) {
554			valid_start = true;
555			p ++;
556		}
557		else if (!is_hex && (*p == 'x' || *p == 'X')) {
558			is_hex = true;
559			allow_double = false;
560			c = p + 1;
561		}
562		else if (allow_double) {
563			if (p == c) {
564				/* Empty digits sequence, not a number */
565				*pos = start;
566				return EINVAL;
567			}
568			else if (*p == '.') {
569				if (got_dot) {
570					/* Double dots, not a number */
571					*pos = start;
572					return EINVAL;
573				}
574				else {
575					got_dot = true;
576					need_double = true;
577					p ++;
578				}
579			}
580			else if (*p == 'e' || *p == 'E') {
581				if (got_exp) {
582					/* Double exp, not a number */
583					*pos = start;
584					return EINVAL;
585				}
586				else {
587					got_exp = true;
588					need_double = true;
589					p ++;
590					if (p >= end) {
591						*pos = start;
592						return EINVAL;
593					}
594					if (!isdigit (*p) && *p != '+' && *p != '-') {
595						/* Wrong exponent sign */
596						*pos = start;
597						return EINVAL;
598					}
599					else {
600						p ++;
601					}
602				}
603			}
604			else {
605				/* Got the end of the number, need to check */
606				break;
607			}
608		}
609		else {
610			break;
611		}
612	}
613
614	if (!valid_start) {
615		*pos = start;
616		return EINVAL;
617	}
618
619	errno = 0;
620	if (need_double) {
621		dv = strtod (c, &endptr);
622	}
623	else {
624		if (is_hex) {
625			lv = strtoimax (c, &endptr, 16);
626		}
627		else {
628			lv = strtoimax (c, &endptr, 10);
629		}
630	}
631	if (errno == ERANGE) {
632		*pos = start;
633		return ERANGE;
634	}
635
636	/* Now check endptr */
637	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
638			ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
639		p = endptr;
640		goto set_obj;
641	}
642
643	if (endptr < end && endptr != start) {
644		p = endptr;
645		switch (*p) {
646		case 'm':
647		case 'M':
648		case 'g':
649		case 'G':
650		case 'k':
651		case 'K':
652			if (end - p >= 2) {
653				if (p[1] == 's' || p[1] == 'S') {
654					/* Milliseconds */
655					if (!need_double) {
656						need_double = true;
657						dv = lv;
658					}
659					is_time = true;
660					if (p[0] == 'm' || p[0] == 'M') {
661						dv /= 1000.;
662					}
663					else {
664						dv *= ucl_lex_num_multiplier (*p, false);
665					}
666					p += 2;
667					goto set_obj;
668				}
669				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
670					/* Bytes */
671					if (need_double) {
672						need_double = false;
673						lv = dv;
674					}
675					lv *= ucl_lex_num_multiplier (*p, true);
676					p += 2;
677					goto set_obj;
678				}
679				else if (ucl_lex_is_atom_end (p[1])) {
680					if (need_double) {
681						dv *= ucl_lex_num_multiplier (*p, false);
682					}
683					else {
684						lv *= ucl_lex_num_multiplier (*p, number_bytes);
685					}
686					p ++;
687					goto set_obj;
688				}
689				else if (allow_time && end - p >= 3) {
690					if (tolower (p[0]) == 'm' &&
691							tolower (p[1]) == 'i' &&
692							tolower (p[2]) == 'n') {
693						/* Minutes */
694						if (!need_double) {
695							need_double = true;
696							dv = lv;
697						}
698						is_time = true;
699						dv *= 60.;
700						p += 3;
701						goto set_obj;
702					}
703				}
704			}
705			else {
706				if (need_double) {
707					dv *= ucl_lex_num_multiplier (*p, false);
708				}
709				else {
710					lv *= ucl_lex_num_multiplier (*p, number_bytes);
711				}
712				p ++;
713				goto set_obj;
714			}
715			break;
716		case 'S':
717		case 's':
718			if (allow_time &&
719					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
720				if (!need_double) {
721					need_double = true;
722					dv = lv;
723				}
724				p ++;
725				is_time = true;
726				goto set_obj;
727			}
728			break;
729		case 'h':
730		case 'H':
731		case 'd':
732		case 'D':
733		case 'w':
734		case 'W':
735		case 'Y':
736		case 'y':
737			if (allow_time &&
738					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
739				if (!need_double) {
740					need_double = true;
741					dv = lv;
742				}
743				is_time = true;
744				dv *= ucl_lex_time_multiplier (*p);
745				p ++;
746				goto set_obj;
747			}
748			break;
749		}
750	}
751
752	*pos = c;
753	return EINVAL;
754
755	set_obj:
756	if (allow_double && (need_double || is_time)) {
757		if (!is_time) {
758			obj->type = UCL_FLOAT;
759		}
760		else {
761			obj->type = UCL_TIME;
762		}
763		obj->value.dv = is_neg ? (-dv) : dv;
764	}
765	else {
766		obj->type = UCL_INT;
767		obj->value.iv = is_neg ? (-lv) : lv;
768	}
769	*pos = p;
770	return 0;
771}
772
773/**
774 * Parse possible number
775 * @param parser
776 * @param chunk
777 * @return true if a number has been parsed
778 */
779static bool
780ucl_lex_number (struct ucl_parser *parser,
781		struct ucl_chunk *chunk, ucl_object_t *obj)
782{
783	const unsigned char *pos;
784	int ret;
785
786	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
787			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
788
789	if (ret == 0) {
790		chunk->remain -= pos - chunk->pos;
791		chunk->column += pos - chunk->pos;
792		chunk->pos = pos;
793		return true;
794	}
795	else if (ret == ERANGE) {
796		ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
797	}
798
799	return false;
800}
801
802/**
803 * Parse quoted string with possible escapes
804 * @param parser
805 * @param chunk
806 * @return true if a string has been parsed
807 */
808static bool
809ucl_lex_json_string (struct ucl_parser *parser,
810		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
811{
812	const unsigned char *p = chunk->pos;
813	unsigned char c;
814	int i;
815
816	while (p < chunk->end) {
817		c = *p;
818		if (c < 0x1F) {
819			/* Unmasked control character */
820			if (c == '\n') {
821				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
822			}
823			else {
824				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
825			}
826			return false;
827		}
828		else if (c == '\\') {
829			ucl_chunk_skipc (chunk, p);
830			c = *p;
831			if (p >= chunk->end) {
832				ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
833				return false;
834			}
835			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
836				if (c == 'u') {
837					ucl_chunk_skipc (chunk, p);
838					for (i = 0; i < 4 && p < chunk->end; i ++) {
839						if (!isxdigit (*p)) {
840							ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
841							return false;
842						}
843						ucl_chunk_skipc (chunk, p);
844					}
845					if (p >= chunk->end) {
846						ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
847						return false;
848					}
849				}
850				else {
851					ucl_chunk_skipc (chunk, p);
852				}
853			}
854			*need_unescape = true;
855			*ucl_escape = true;
856			continue;
857		}
858		else if (c == '"') {
859			ucl_chunk_skipc (chunk, p);
860			return true;
861		}
862		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
863			*ucl_escape = true;
864		}
865		else if (c == '$') {
866			*var_expand = true;
867		}
868		ucl_chunk_skipc (chunk, p);
869	}
870
871	ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
872	return false;
873}
874
875/**
876 * Parse a key in an object
877 * @param parser
878 * @param chunk
879 * @return true if a key has been parsed
880 */
881static bool
882ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
883{
884	const unsigned char *p, *c = NULL, *end, *t;
885	const char *key = NULL;
886	bool got_quote = false, got_eq = false, got_semicolon = false,
887			need_unescape = false, ucl_escape = false, var_expand = false,
888			got_content = false, got_sep = false;
889	ucl_object_t *nobj, *tobj;
890	ucl_hash_t *container;
891	ssize_t keylen;
892
893	p = chunk->pos;
894
895	if (*p == '.') {
896		/* It is macro actually */
897		ucl_chunk_skipc (chunk, p);
898		parser->prev_state = parser->state;
899		parser->state = UCL_STATE_MACRO_NAME;
900		return true;
901	}
902	while (p < chunk->end) {
903		/*
904		 * A key must start with alpha, number, '/' or '_' and end with space character
905		 */
906		if (c == NULL) {
907			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
908				if (!ucl_skip_comments (parser)) {
909					return false;
910				}
911				p = chunk->pos;
912			}
913			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
914				ucl_chunk_skipc (chunk, p);
915			}
916			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
917				/* The first symbol */
918				c = p;
919				ucl_chunk_skipc (chunk, p);
920				got_content = true;
921			}
922			else if (*p == '"') {
923				/* JSON style key */
924				c = p + 1;
925				got_quote = true;
926				got_content = true;
927				ucl_chunk_skipc (chunk, p);
928			}
929			else if (*p == '}') {
930				/* We have actually end of an object */
931				*end_of_object = true;
932				return true;
933			}
934			else if (*p == '.') {
935				ucl_chunk_skipc (chunk, p);
936				parser->prev_state = parser->state;
937				parser->state = UCL_STATE_MACRO_NAME;
938				return true;
939			}
940			else {
941				/* Invalid identifier */
942				ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
943				return false;
944			}
945		}
946		else {
947			/* Parse the body of a key */
948			if (!got_quote) {
949				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
950					got_content = true;
951					ucl_chunk_skipc (chunk, p);
952				}
953				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
954					end = p;
955					break;
956				}
957				else {
958					ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
959					return false;
960				}
961			}
962			else {
963				/* We need to parse json like quoted string */
964				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
965					return false;
966				}
967				/* Always escape keys obtained via json */
968				end = chunk->pos - 1;
969				p = chunk->pos;
970				break;
971			}
972		}
973	}
974
975	if (p >= chunk->end && got_content) {
976		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
977		return false;
978	}
979	else if (!got_content) {
980		return true;
981	}
982	*end_of_object = false;
983	/* We are now at the end of the key, need to parse the rest */
984	while (p < chunk->end) {
985		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
986			ucl_chunk_skipc (chunk, p);
987		}
988		else if (*p == '=') {
989			if (!got_eq && !got_semicolon) {
990				ucl_chunk_skipc (chunk, p);
991				got_eq = true;
992			}
993			else {
994				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
995				return false;
996			}
997		}
998		else if (*p == ':') {
999			if (!got_eq && !got_semicolon) {
1000				ucl_chunk_skipc (chunk, p);
1001				got_semicolon = true;
1002			}
1003			else {
1004				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1005				return false;
1006			}
1007		}
1008		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1009			/* Check for comment */
1010			if (!ucl_skip_comments (parser)) {
1011				return false;
1012			}
1013			p = chunk->pos;
1014		}
1015		else {
1016			/* Start value */
1017			break;
1018		}
1019	}
1020
1021	if (p >= chunk->end && got_content) {
1022		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1023		return false;
1024	}
1025
1026	got_sep = got_semicolon || got_eq;
1027
1028	if (!got_sep) {
1029		/*
1030		 * Maybe we have more keys nested, so search for termination character.
1031		 * Possible choices:
1032		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1033		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1034		 * 3) key1 value[;,\n] <- we treat that as linear object
1035		 */
1036		t = p;
1037		*next_key = false;
1038		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1039			t ++;
1040		}
1041		/* Check first non-space character after a key */
1042		if (*t != '{' && *t != '[') {
1043			while (t < chunk->end) {
1044				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1045					break;
1046				}
1047				else if (*t == '{' || *t == '[') {
1048					*next_key = true;
1049					break;
1050				}
1051				t ++;
1052			}
1053		}
1054	}
1055
1056	/* Create a new object */
1057	nobj = ucl_object_new ();
1058	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1059			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1060	if (keylen == -1) {
1061		ucl_object_unref (nobj);
1062		return false;
1063	}
1064	else if (keylen == 0) {
1065		ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1066		ucl_object_unref (nobj);
1067		return false;
1068	}
1069
1070	container = parser->stack->obj->value.ov;
1071	nobj->key = key;
1072	nobj->keylen = keylen;
1073	tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1074	if (tobj == NULL) {
1075		container = ucl_hash_insert_object (container, nobj);
1076		nobj->prev = nobj;
1077		nobj->next = NULL;
1078		parser->stack->obj->len ++;
1079	}
1080	else {
1081		DL_APPEND (tobj, nobj);
1082	}
1083
1084	if (ucl_escape) {
1085		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1086	}
1087	parser->stack->obj->value.ov = container;
1088
1089	parser->cur_obj = nobj;
1090
1091	return true;
1092}
1093
1094/**
1095 * Parse a cl string
1096 * @param parser
1097 * @param chunk
1098 * @return true if a key has been parsed
1099 */
1100static bool
1101ucl_parse_string_value (struct ucl_parser *parser,
1102		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1103{
1104	const unsigned char *p;
1105	enum {
1106		UCL_BRACE_ROUND = 0,
1107		UCL_BRACE_SQUARE,
1108		UCL_BRACE_FIGURE
1109	};
1110	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1111
1112	p = chunk->pos;
1113
1114	while (p < chunk->end) {
1115
1116		/* Skip pairs of figure braces */
1117		if (*p == '{') {
1118			braces[UCL_BRACE_FIGURE][0] ++;
1119		}
1120		else if (*p == '}') {
1121			braces[UCL_BRACE_FIGURE][1] ++;
1122			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1123				/* This is not a termination symbol, continue */
1124				ucl_chunk_skipc (chunk, p);
1125				continue;
1126			}
1127		}
1128		/* Skip pairs of square braces */
1129		else if (*p == '[') {
1130			braces[UCL_BRACE_SQUARE][0] ++;
1131		}
1132		else if (*p == ']') {
1133			braces[UCL_BRACE_SQUARE][1] ++;
1134			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1135				/* This is not a termination symbol, continue */
1136				ucl_chunk_skipc (chunk, p);
1137				continue;
1138			}
1139		}
1140		else if (*p == '$') {
1141			*var_expand = true;
1142		}
1143		else if (*p == '\\') {
1144			*need_unescape = true;
1145			ucl_chunk_skipc (chunk, p);
1146			if (p < chunk->end) {
1147				ucl_chunk_skipc (chunk, p);
1148			}
1149			continue;
1150		}
1151
1152		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1153			break;
1154		}
1155		ucl_chunk_skipc (chunk, p);
1156	}
1157
1158	if (p >= chunk->end) {
1159		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1160		return false;
1161	}
1162
1163	return true;
1164}
1165
1166/**
1167 * Parse multiline string ending with \n{term}\n
1168 * @param parser
1169 * @param chunk
1170 * @param term
1171 * @param term_len
1172 * @return size of multiline string or 0 in case of error
1173 */
1174static int
1175ucl_parse_multiline_string (struct ucl_parser *parser,
1176		struct ucl_chunk *chunk, const unsigned char *term,
1177		int term_len, unsigned char const **beg,
1178		bool *var_expand)
1179{
1180	const unsigned char *p, *c;
1181	bool newline = false;
1182	int len = 0;
1183
1184	p = chunk->pos;
1185
1186	c = p;
1187
1188	while (p < chunk->end) {
1189		if (newline) {
1190			if (chunk->end - p < term_len) {
1191				return 0;
1192			}
1193			else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1194				len = p - c;
1195				chunk->remain -= term_len;
1196				chunk->pos = p + term_len;
1197				chunk->column = term_len;
1198				*beg = c;
1199				break;
1200			}
1201		}
1202		if (*p == '\n') {
1203			newline = true;
1204		}
1205		else {
1206			if (*p == '$') {
1207				*var_expand = true;
1208			}
1209			newline = false;
1210		}
1211		ucl_chunk_skipc (chunk, p);
1212	}
1213
1214	return len;
1215}
1216
1217static ucl_object_t*
1218ucl_get_value_object (struct ucl_parser *parser)
1219{
1220	ucl_object_t *t, *obj = NULL;
1221
1222	if (parser->stack->obj->type == UCL_ARRAY) {
1223		/* Object must be allocated */
1224		obj = ucl_object_new ();
1225		t = parser->stack->obj->value.av;
1226		DL_APPEND (t, obj);
1227		parser->cur_obj = obj;
1228		parser->stack->obj->value.av = t;
1229		parser->stack->obj->len ++;
1230	}
1231	else {
1232		/* Object has been already allocated */
1233		obj = parser->cur_obj;
1234	}
1235
1236	return obj;
1237}
1238
1239/**
1240 * Handle value data
1241 * @param parser
1242 * @param chunk
1243 * @return
1244 */
1245static bool
1246ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1247{
1248	const unsigned char *p, *c;
1249	ucl_object_t *obj = NULL;
1250	unsigned int stripped_spaces;
1251	int str_len;
1252	bool need_unescape = false, ucl_escape = false, var_expand = false;
1253
1254	p = chunk->pos;
1255
1256	/* Skip any spaces and comments */
1257	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1258			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1259		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1260			ucl_chunk_skipc (chunk, p);
1261		}
1262		if (!ucl_skip_comments (parser)) {
1263			return false;
1264		}
1265		p = chunk->pos;
1266	}
1267
1268	while (p < chunk->end) {
1269		c = p;
1270		switch (*p) {
1271		case '"':
1272			obj = ucl_get_value_object (parser);
1273			ucl_chunk_skipc (chunk, p);
1274			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1275				return false;
1276			}
1277			str_len = chunk->pos - c - 2;
1278			obj->type = UCL_STRING;
1279			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1280					&obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1281				return false;
1282			}
1283			obj->len = str_len;
1284			parser->state = UCL_STATE_AFTER_VALUE;
1285			p = chunk->pos;
1286			return true;
1287			break;
1288		case '{':
1289			obj = ucl_get_value_object (parser);
1290			/* We have a new object */
1291			obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1292			if (obj == NULL) {
1293				return false;
1294			}
1295
1296			ucl_chunk_skipc (chunk, p);
1297			return true;
1298			break;
1299		case '[':
1300			obj = ucl_get_value_object (parser);
1301			/* We have a new array */
1302			obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1303			if (obj == NULL) {
1304				return false;
1305			}
1306
1307			ucl_chunk_skipc (chunk, p);
1308			return true;
1309			break;
1310		case ']':
1311			/* We have the array ending */
1312			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1313				parser->state = UCL_STATE_AFTER_VALUE;
1314				return true;
1315			}
1316			else {
1317				goto parse_string;
1318			}
1319			break;
1320		case '<':
1321			obj = ucl_get_value_object (parser);
1322			/* We have something like multiline value, which must be <<[A-Z]+\n */
1323			if (chunk->end - p > 3) {
1324				if (memcmp (p, "<<", 2) == 0) {
1325					p += 2;
1326					/* We allow only uppercase characters in multiline definitions */
1327					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1328						p ++;
1329					}
1330					if (*p =='\n') {
1331						/* Set chunk positions and start multiline parsing */
1332						c += 2;
1333						chunk->remain -= p - c;
1334						chunk->pos = p + 1;
1335						chunk->column = 0;
1336						chunk->line ++;
1337						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1338								p - c, &c, &var_expand)) == 0) {
1339							ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1340							return false;
1341						}
1342						obj->type = UCL_STRING;
1343						if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1344							&obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1345							return false;
1346						}
1347						obj->len = str_len;
1348						parser->state = UCL_STATE_AFTER_VALUE;
1349						return true;
1350					}
1351				}
1352			}
1353			/* Fallback to ordinary strings */
1354		default:
1355parse_string:
1356			if (obj == NULL) {
1357				obj = ucl_get_value_object (parser);
1358			}
1359			/* Parse atom */
1360			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1361				if (!ucl_lex_number (parser, chunk, obj)) {
1362					if (parser->state == UCL_STATE_ERROR) {
1363						return false;
1364					}
1365				}
1366				else {
1367					parser->state = UCL_STATE_AFTER_VALUE;
1368					return true;
1369				}
1370				/* Fallback to normal string */
1371			}
1372
1373			if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1374				return false;
1375			}
1376			/* Cut trailing spaces */
1377			stripped_spaces = 0;
1378			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1379					UCL_CHARACTER_WHITESPACE)) {
1380				stripped_spaces ++;
1381			}
1382			str_len = chunk->pos - c - stripped_spaces;
1383			if (str_len <= 0) {
1384				ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1385				return false;
1386			}
1387			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1388				obj->len = 0;
1389				obj->type = UCL_NULL;
1390			}
1391			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1392				obj->type = UCL_STRING;
1393				if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1394						&obj->value.sv, str_len, need_unescape,
1395						false, var_expand)) == -1) {
1396					return false;
1397				}
1398				obj->len = str_len;
1399			}
1400			parser->state = UCL_STATE_AFTER_VALUE;
1401			p = chunk->pos;
1402
1403			return true;
1404			break;
1405		}
1406	}
1407
1408	return true;
1409}
1410
1411/**
1412 * Handle after value data
1413 * @param parser
1414 * @param chunk
1415 * @return
1416 */
1417static bool
1418ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1419{
1420	const unsigned char *p;
1421	bool got_sep = false;
1422	struct ucl_stack *st;
1423
1424	p = chunk->pos;
1425
1426	while (p < chunk->end) {
1427		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1428			/* Skip whitespaces */
1429			ucl_chunk_skipc (chunk, p);
1430		}
1431		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1432			/* Skip comment */
1433			if (!ucl_skip_comments (parser)) {
1434				return false;
1435			}
1436			/* Treat comment as a separator */
1437			got_sep = true;
1438			p = chunk->pos;
1439		}
1440		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1441			if (*p == '}' || *p == ']') {
1442				if (parser->stack == NULL) {
1443					ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1444					return false;
1445				}
1446				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1447						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1448
1449					/* Pop all nested objects from a stack */
1450					st = parser->stack;
1451					parser->stack = st->next;
1452					UCL_FREE (sizeof (struct ucl_stack), st);
1453
1454					while (parser->stack != NULL) {
1455						st = parser->stack;
1456						if (st->next == NULL || st->next->level == st->level) {
1457							break;
1458						}
1459						parser->stack = st->next;
1460						UCL_FREE (sizeof (struct ucl_stack), st);
1461					}
1462				}
1463				else {
1464					ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1465					return false;
1466				}
1467
1468				if (parser->stack == NULL) {
1469					/* Ignore everything after a top object */
1470					return true;
1471				}
1472				else {
1473					ucl_chunk_skipc (chunk, p);
1474				}
1475				got_sep = true;
1476			}
1477			else {
1478				/* Got a separator */
1479				got_sep = true;
1480				ucl_chunk_skipc (chunk, p);
1481			}
1482		}
1483		else {
1484			/* Anything else */
1485			if (!got_sep) {
1486				ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1487				return false;
1488			}
1489			return true;
1490		}
1491	}
1492
1493	return true;
1494}
1495
1496/**
1497 * Handle macro data
1498 * @param parser
1499 * @param chunk
1500 * @return
1501 */
1502static bool
1503ucl_parse_macro_value (struct ucl_parser *parser,
1504		struct ucl_chunk *chunk, struct ucl_macro *macro,
1505		unsigned char const **macro_start, size_t *macro_len)
1506{
1507	const unsigned char *p, *c;
1508	bool need_unescape = false, ucl_escape = false, var_expand = false;
1509
1510	p = chunk->pos;
1511
1512	switch (*p) {
1513	case '"':
1514		/* We have macro value encoded in quotes */
1515		c = p;
1516		ucl_chunk_skipc (chunk, p);
1517		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1518			return false;
1519		}
1520
1521		*macro_start = c + 1;
1522		*macro_len = chunk->pos - c - 2;
1523		p = chunk->pos;
1524		break;
1525	case '{':
1526		/* We got a multiline macro body */
1527		ucl_chunk_skipc (chunk, p);
1528		/* Skip spaces at the beginning */
1529		while (p < chunk->end) {
1530			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1531				ucl_chunk_skipc (chunk, p);
1532			}
1533			else {
1534				break;
1535			}
1536		}
1537		c = p;
1538		while (p < chunk->end) {
1539			if (*p == '}') {
1540				break;
1541			}
1542			ucl_chunk_skipc (chunk, p);
1543		}
1544		*macro_start = c;
1545		*macro_len = p - c;
1546		ucl_chunk_skipc (chunk, p);
1547		break;
1548	default:
1549		/* Macro is not enclosed in quotes or braces */
1550		c = p;
1551		while (p < chunk->end) {
1552			if (ucl_lex_is_atom_end (*p)) {
1553				break;
1554			}
1555			ucl_chunk_skipc (chunk, p);
1556		}
1557		*macro_start = c;
1558		*macro_len = p - c;
1559		break;
1560	}
1561
1562	/* We are at the end of a macro */
1563	/* Skip ';' and space characters and return to previous state */
1564	while (p < chunk->end) {
1565		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1566			break;
1567		}
1568		ucl_chunk_skipc (chunk, p);
1569	}
1570	return true;
1571}
1572
1573/**
1574 * Handle the main states of rcl parser
1575 * @param parser parser structure
1576 * @param data the pointer to the beginning of a chunk
1577 * @param len the length of a chunk
1578 * @return true if chunk has been parsed and false in case of error
1579 */
1580static bool
1581ucl_state_machine (struct ucl_parser *parser)
1582{
1583	ucl_object_t *obj;
1584	struct ucl_chunk *chunk = parser->chunks;
1585	const unsigned char *p, *c = NULL, *macro_start = NULL;
1586	unsigned char *macro_escaped;
1587	size_t macro_len = 0;
1588	struct ucl_macro *macro = NULL;
1589	bool next_key = false, end_of_object = false;
1590
1591	if (parser->top_obj == NULL) {
1592		if (*chunk->pos == '[') {
1593			obj = ucl_add_parser_stack (NULL, parser, true, 0);
1594		}
1595		else {
1596			obj = ucl_add_parser_stack (NULL, parser, false, 0);
1597		}
1598		if (obj == NULL) {
1599			return false;
1600		}
1601		parser->top_obj = obj;
1602		parser->cur_obj = obj;
1603		parser->state = UCL_STATE_INIT;
1604	}
1605
1606	p = chunk->pos;
1607	while (chunk->pos < chunk->end) {
1608		switch (parser->state) {
1609		case UCL_STATE_INIT:
1610			/*
1611			 * At the init state we can either go to the parse array or object
1612			 * if we got [ or { correspondingly or can just treat new data as
1613			 * a key of newly created object
1614			 */
1615			obj = parser->cur_obj;
1616			if (!ucl_skip_comments (parser)) {
1617				parser->prev_state = parser->state;
1618				parser->state = UCL_STATE_ERROR;
1619				return false;
1620			}
1621			else {
1622				p = chunk->pos;
1623				if (*p == '[') {
1624					parser->state = UCL_STATE_VALUE;
1625					ucl_chunk_skipc (chunk, p);
1626				}
1627				else {
1628					parser->state = UCL_STATE_KEY;
1629					if (*p == '{') {
1630						ucl_chunk_skipc (chunk, p);
1631					}
1632				}
1633			}
1634			break;
1635		case UCL_STATE_KEY:
1636			/* Skip any spaces */
1637			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1638				ucl_chunk_skipc (chunk, p);
1639			}
1640			if (*p == '}') {
1641				/* We have the end of an object */
1642				parser->state = UCL_STATE_AFTER_VALUE;
1643				continue;
1644			}
1645			if (parser->stack == NULL) {
1646				/* No objects are on stack, but we want to parse a key */
1647				ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1648						"expects a key", &parser->err);
1649				parser->prev_state = parser->state;
1650				parser->state = UCL_STATE_ERROR;
1651				return false;
1652			}
1653			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1654				parser->prev_state = parser->state;
1655				parser->state = UCL_STATE_ERROR;
1656				return false;
1657			}
1658			if (end_of_object) {
1659				p = chunk->pos;
1660				parser->state = UCL_STATE_AFTER_VALUE;
1661				continue;
1662			}
1663			else if (parser->state != UCL_STATE_MACRO_NAME) {
1664				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1665					/* Parse more keys and nest objects accordingly */
1666					obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1667							parser->stack->level + 1);
1668					if (obj == NULL) {
1669						return false;
1670					}
1671				}
1672				else {
1673					parser->state = UCL_STATE_VALUE;
1674				}
1675			}
1676			else {
1677				c = chunk->pos;
1678			}
1679			p = chunk->pos;
1680			break;
1681		case UCL_STATE_VALUE:
1682			/* We need to check what we do have */
1683			if (!ucl_parse_value (parser, chunk)) {
1684				parser->prev_state = parser->state;
1685				parser->state = UCL_STATE_ERROR;
1686				return false;
1687			}
1688			/* State is set in ucl_parse_value call */
1689			p = chunk->pos;
1690			break;
1691		case UCL_STATE_AFTER_VALUE:
1692			if (!ucl_parse_after_value (parser, chunk)) {
1693				parser->prev_state = parser->state;
1694				parser->state = UCL_STATE_ERROR;
1695				return false;
1696			}
1697			if (parser->stack != NULL) {
1698				if (parser->stack->obj->type == UCL_OBJECT) {
1699					parser->state = UCL_STATE_KEY;
1700				}
1701				else {
1702					/* Array */
1703					parser->state = UCL_STATE_VALUE;
1704				}
1705			}
1706			else {
1707				/* Skip everything at the end */
1708				return true;
1709			}
1710			p = chunk->pos;
1711			break;
1712		case UCL_STATE_MACRO_NAME:
1713			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1714				ucl_chunk_skipc (chunk, p);
1715			}
1716			else if (p - c > 0) {
1717				/* We got macro name */
1718				macro_len = (size_t)(p - c);
1719				HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1720				if (macro == NULL) {
1721					ucl_create_err (&parser->err, "error on line %d at column %d: "
1722							"unknown macro: '%.*s', character: '%c'",
1723								chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1724					parser->state = UCL_STATE_ERROR;
1725					return false;
1726				}
1727				/* Now we need to skip all spaces */
1728				while (p < chunk->end) {
1729					if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1730						if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1731							/* Skip comment */
1732							if (!ucl_skip_comments (parser)) {
1733								return false;
1734							}
1735							p = chunk->pos;
1736						}
1737						break;
1738					}
1739					ucl_chunk_skipc (chunk, p);
1740				}
1741				parser->state = UCL_STATE_MACRO;
1742			}
1743			break;
1744		case UCL_STATE_MACRO:
1745			if (!ucl_parse_macro_value (parser, chunk, macro,
1746					&macro_start, &macro_len)) {
1747				parser->prev_state = parser->state;
1748				parser->state = UCL_STATE_ERROR;
1749				return false;
1750			}
1751			macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1752			parser->state = parser->prev_state;
1753			if (macro_escaped == NULL) {
1754				if (!macro->handler (macro_start, macro_len, macro->ud)) {
1755					return false;
1756				}
1757			}
1758			else {
1759				if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1760					UCL_FREE (macro_len + 1, macro_escaped);
1761					return false;
1762				}
1763				UCL_FREE (macro_len + 1, macro_escaped);
1764			}
1765			p = chunk->pos;
1766			break;
1767		default:
1768			/* TODO: add all states */
1769			ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1770			parser->state = UCL_STATE_ERROR;
1771			return false;
1772		}
1773	}
1774
1775	return true;
1776}
1777
1778struct ucl_parser*
1779ucl_parser_new (int flags)
1780{
1781	struct ucl_parser *new;
1782
1783	new = UCL_ALLOC (sizeof (struct ucl_parser));
1784	if (new == NULL) {
1785		return NULL;
1786	}
1787	memset (new, 0, sizeof (struct ucl_parser));
1788
1789	ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1790	ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1791	ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1792
1793	new->flags = flags;
1794
1795	/* Initial assumption about filevars */
1796	ucl_parser_set_filevars (new, NULL, false);
1797
1798	return new;
1799}
1800
1801
1802void
1803ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1804		ucl_macro_handler handler, void* ud)
1805{
1806	struct ucl_macro *new;
1807
1808	if (macro == NULL || handler == NULL) {
1809		return;
1810	}
1811	new = UCL_ALLOC (sizeof (struct ucl_macro));
1812	if (new == NULL) {
1813		return;
1814	}
1815	memset (new, 0, sizeof (struct ucl_macro));
1816	new->handler = handler;
1817	new->name = strdup (macro);
1818	new->ud = ud;
1819	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1820}
1821
1822void
1823ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1824		const char *value)
1825{
1826	struct ucl_variable *new = NULL, *cur;
1827
1828	if (var == NULL) {
1829		return;
1830	}
1831
1832	/* Find whether a variable already exists */
1833	LL_FOREACH (parser->variables, cur) {
1834		if (strcmp (cur->var, var) == 0) {
1835			new = cur;
1836			break;
1837		}
1838	}
1839
1840	if (value == NULL) {
1841
1842		if (new != NULL) {
1843			/* Remove variable */
1844			LL_DELETE (parser->variables, new);
1845			free (new->var);
1846			free (new->value);
1847			UCL_FREE (sizeof (struct ucl_variable), new);
1848		}
1849		else {
1850			/* Do nothing */
1851			return;
1852		}
1853	}
1854	else {
1855		if (new == NULL) {
1856			new = UCL_ALLOC (sizeof (struct ucl_variable));
1857			if (new == NULL) {
1858				return;
1859			}
1860			memset (new, 0, sizeof (struct ucl_variable));
1861			new->var = strdup (var);
1862			new->var_len = strlen (var);
1863			new->value = strdup (value);
1864			new->value_len = strlen (value);
1865
1866			LL_PREPEND (parser->variables, new);
1867		}
1868		else {
1869			free (new->value);
1870			new->value = strdup (value);
1871			new->value_len = strlen (value);
1872		}
1873	}
1874}
1875
1876bool
1877ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1878		size_t len)
1879{
1880	struct ucl_chunk *chunk;
1881
1882	if (data == NULL || len == 0) {
1883		ucl_create_err (&parser->err, "invalid chunk added");
1884		return false;
1885	}
1886	if (parser->state != UCL_STATE_ERROR) {
1887		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1888		if (chunk == NULL) {
1889			ucl_create_err (&parser->err, "cannot allocate chunk structure");
1890			return false;
1891		}
1892		chunk->begin = data;
1893		chunk->remain = len;
1894		chunk->pos = chunk->begin;
1895		chunk->end = chunk->begin + len;
1896		chunk->line = 1;
1897		chunk->column = 0;
1898		LL_PREPEND (parser->chunks, chunk);
1899		parser->recursion ++;
1900		if (parser->recursion > UCL_MAX_RECURSION) {
1901			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1902					parser->recursion);
1903			return false;
1904		}
1905		return ucl_state_machine (parser);
1906	}
1907
1908	ucl_create_err (&parser->err, "a parser is in an invalid state");
1909
1910	return false;
1911}
1912
1913bool
1914ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1915		size_t len)
1916{
1917	if (data == NULL) {
1918		ucl_create_err (&parser->err, "invalid string added");
1919		return false;
1920	}
1921	if (len == 0) {
1922		len = strlen (data);
1923	}
1924
1925	return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1926}
1927