ucl_parser.c revision 314278
1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *       * Redistributions of source code must retain the above copyright
7 *         notice, this list of conditions and the following disclaimer.
8 *       * Redistributions in binary form must reproduce the above copyright
9 *         notice, this list of conditions and the following disclaimer in the
10 *         documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#include "ucl.h"
25#include "ucl_internal.h"
26#include "ucl_chartable.h"
27
28/**
29 * @file ucl_parser.c
30 * The implementation of ucl parser
31 */
32
33struct ucl_parser_saved_state {
34	unsigned int line;
35	unsigned int column;
36	size_t remain;
37	const unsigned char *pos;
38};
39
40/**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47#define ucl_chunk_skipc(chunk, p)    do{					\
48    if (*(p) == '\n') {										\
49        (chunk)->line ++;									\
50        (chunk)->column = 0;								\
51    }														\
52    else (chunk)->column ++;								\
53    (p++);													\
54    (chunk)->pos ++;										\
55    (chunk)->remain --;										\
56    } while (0)
57
58static inline void
59ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err)
60{
61	const char *fmt_string, *filename;
62	struct ucl_chunk *chunk = parser->chunks;
63
64	if (parser->cur_file) {
65		filename = parser->cur_file;
66	}
67	else {
68		filename = "<unknown>";
69	}
70
71	if (chunk->pos < chunk->end) {
72		if (isgraph (*chunk->pos)) {
73			fmt_string = "error while parsing %s: "
74					"line: %d, column: %d - '%s', character: '%c'";
75		}
76		else {
77			fmt_string = "error while parsing %s: "
78					"line: %d, column: %d - '%s', character: '0x%02x'";
79		}
80		ucl_create_err (err, fmt_string,
81			filename, chunk->line, chunk->column,
82			str, *chunk->pos);
83	}
84	else {
85		ucl_create_err (err, "error while parsing %s: at the end of chunk: %s",
86			filename, str);
87	}
88
89	parser->err_code = code;
90}
91
92static void
93ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len)
94{
95	ucl_object_t *nobj;
96
97	if (len > 0 && begin != NULL) {
98		nobj = ucl_object_fromstring_common (begin, len, 0);
99
100		if (parser->last_comment) {
101			/* We need to append data to an existing object */
102			DL_APPEND (parser->last_comment, nobj);
103		}
104		else {
105			parser->last_comment = nobj;
106		}
107	}
108}
109
110static void
111ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before)
112{
113	if (parser->last_comment) {
114		ucl_object_insert_key (parser->comments, parser->last_comment,
115				(const char *)&obj, sizeof (void *), true);
116
117		if (before) {
118			parser->last_comment->flags |= UCL_OBJECT_INHERITED;
119		}
120
121		parser->last_comment = NULL;
122	}
123}
124
125/**
126 * Skip all comments from the current pos resolving nested and multiline comments
127 * @param parser
128 * @return
129 */
130static bool
131ucl_skip_comments (struct ucl_parser *parser)
132{
133	struct ucl_chunk *chunk = parser->chunks;
134	const unsigned char *p, *beg = NULL;
135	int comments_nested = 0;
136	bool quoted = false;
137
138	p = chunk->pos;
139
140start:
141	if (chunk->remain > 0 && *p == '#') {
142		if (parser->state != UCL_STATE_SCOMMENT &&
143				parser->state != UCL_STATE_MCOMMENT) {
144			beg = p;
145
146			while (p < chunk->end) {
147				if (*p == '\n') {
148					if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
149						ucl_save_comment (parser, beg, p - beg);
150						beg = NULL;
151					}
152
153					ucl_chunk_skipc (chunk, p);
154
155					goto start;
156				}
157				ucl_chunk_skipc (chunk, p);
158			}
159		}
160	}
161	else if (chunk->remain >= 2 && *p == '/') {
162		if (p[1] == '*') {
163			beg = p;
164			ucl_chunk_skipc (chunk, p);
165			comments_nested ++;
166			ucl_chunk_skipc (chunk, p);
167
168			while (p < chunk->end) {
169				if (*p == '"' && *(p - 1) != '\\') {
170					quoted = !quoted;
171				}
172
173				if (!quoted) {
174					if (*p == '*') {
175						ucl_chunk_skipc (chunk, p);
176						if (*p == '/') {
177							comments_nested --;
178							if (comments_nested == 0) {
179								if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
180									ucl_save_comment (parser, beg, p - beg + 1);
181									beg = NULL;
182								}
183
184								ucl_chunk_skipc (chunk, p);
185								goto start;
186							}
187						}
188						ucl_chunk_skipc (chunk, p);
189					}
190					else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
191						comments_nested ++;
192						ucl_chunk_skipc (chunk, p);
193						ucl_chunk_skipc (chunk, p);
194						continue;
195					}
196				}
197
198				ucl_chunk_skipc (chunk, p);
199			}
200			if (comments_nested != 0) {
201				ucl_set_err (parser, UCL_ENESTED,
202						"unfinished multiline comment", &parser->err);
203				return false;
204			}
205		}
206	}
207
208	if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) {
209		ucl_save_comment (parser, beg, p - beg);
210	}
211
212	return true;
213}
214
215/**
216 * Return multiplier for a character
217 * @param c multiplier character
218 * @param is_bytes if true use 1024 multiplier
219 * @return multiplier
220 */
221static inline unsigned long
222ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
223	const struct {
224		char c;
225		long mult_normal;
226		long mult_bytes;
227	} multipliers[] = {
228			{'m', 1000 * 1000, 1024 * 1024},
229			{'k', 1000, 1024},
230			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
231	};
232	int i;
233
234	for (i = 0; i < 3; i ++) {
235		if (tolower (c) == multipliers[i].c) {
236			if (is_bytes) {
237				return multipliers[i].mult_bytes;
238			}
239			return multipliers[i].mult_normal;
240		}
241	}
242
243	return 1;
244}
245
246
247/**
248 * Return multiplier for time scaling
249 * @param c
250 * @return
251 */
252static inline double
253ucl_lex_time_multiplier (const unsigned char c) {
254	const struct {
255		char c;
256		double mult;
257	} multipliers[] = {
258			{'m', 60},
259			{'h', 60 * 60},
260			{'d', 60 * 60 * 24},
261			{'w', 60 * 60 * 24 * 7},
262			{'y', 60 * 60 * 24 * 365}
263	};
264	int i;
265
266	for (i = 0; i < 5; i ++) {
267		if (tolower (c) == multipliers[i].c) {
268			return multipliers[i].mult;
269		}
270	}
271
272	return 1;
273}
274
275/**
276 * Return true if a character is a end of an atom
277 * @param c
278 * @return
279 */
280static inline bool
281ucl_lex_is_atom_end (const unsigned char c)
282{
283	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
284}
285
286static inline bool
287ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
288{
289	if (c1 == '/') {
290		if (c2 == '*') {
291			return true;
292		}
293	}
294	else if (c1 == '#') {
295		return true;
296	}
297	return false;
298}
299
300/**
301 * Check variable found
302 * @param parser
303 * @param ptr
304 * @param remain
305 * @param out_len
306 * @param strict
307 * @param found
308 * @return
309 */
310static inline const char *
311ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
312		size_t *out_len, bool strict, bool *found)
313{
314	struct ucl_variable *var;
315	unsigned char *dst;
316	size_t dstlen;
317	bool need_free = false;
318
319	LL_FOREACH (parser->variables, var) {
320		if (strict) {
321			if (remain == var->var_len) {
322				if (memcmp (ptr, var->var, var->var_len) == 0) {
323					*out_len += var->value_len;
324					*found = true;
325					return (ptr + var->var_len);
326				}
327			}
328		}
329		else {
330			if (remain >= var->var_len) {
331				if (memcmp (ptr, var->var, var->var_len) == 0) {
332					*out_len += var->value_len;
333					*found = true;
334					return (ptr + var->var_len);
335				}
336			}
337		}
338	}
339
340	/* XXX: can only handle ${VAR} */
341	if (!(*found) && parser->var_handler != NULL && strict) {
342		/* Call generic handler */
343		if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
344				parser->var_data)) {
345			*out_len += dstlen;
346			*found = true;
347			if (need_free) {
348				free (dst);
349			}
350			return (ptr + remain);
351		}
352	}
353
354	return ptr;
355}
356
357/**
358 * Check for a variable in a given string
359 * @param parser
360 * @param ptr
361 * @param remain
362 * @param out_len
363 * @param vars_found
364 * @return
365 */
366static const char *
367ucl_check_variable (struct ucl_parser *parser, const char *ptr,
368		size_t remain, size_t *out_len, bool *vars_found)
369{
370	const char *p, *end, *ret = ptr;
371	bool found = false;
372
373	if (*ptr == '{') {
374		/* We need to match the variable enclosed in braces */
375		p = ptr + 1;
376		end = ptr + remain;
377		while (p < end) {
378			if (*p == '}') {
379				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
380						out_len, true, &found);
381				if (found) {
382					/* {} must be excluded actually */
383					ret ++;
384					if (!*vars_found) {
385						*vars_found = true;
386					}
387				}
388				else {
389					*out_len += 2;
390				}
391				break;
392			}
393			p ++;
394		}
395	}
396	else if (*ptr != '$') {
397		/* Not count escaped dollar sign */
398		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
399		if (found && !*vars_found) {
400			*vars_found = true;
401		}
402		if (!found) {
403			(*out_len) ++;
404		}
405	}
406	else {
407		ret ++;
408		(*out_len) ++;
409	}
410
411	return ret;
412}
413
414/**
415 * Expand a single variable
416 * @param parser
417 * @param ptr
418 * @param remain
419 * @param dest
420 * @return
421 */
422static const char *
423ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
424		size_t remain, unsigned char **dest)
425{
426	unsigned char *d = *dest, *dst;
427	const char *p = ptr + 1, *ret;
428	struct ucl_variable *var;
429	size_t dstlen;
430	bool need_free = false;
431	bool found = false;
432	bool strict = false;
433
434	ret = ptr + 1;
435	remain --;
436
437	if (*p == '$') {
438		*d++ = *p++;
439		*dest = d;
440		return p;
441	}
442	else if (*p == '{') {
443		p ++;
444		strict = true;
445		ret += 2;
446		remain -= 2;
447	}
448
449	LL_FOREACH (parser->variables, var) {
450		if (remain >= var->var_len) {
451			if (memcmp (p, var->var, var->var_len) == 0) {
452				memcpy (d, var->value, var->value_len);
453				ret += var->var_len;
454				d += var->value_len;
455				found = true;
456				break;
457			}
458		}
459	}
460	if (!found) {
461		if (strict && parser->var_handler != NULL) {
462			size_t var_len = 0;
463			while (var_len < remain && p[var_len] != '}')
464				var_len ++;
465
466			if (parser->var_handler (p, var_len, &dst, &dstlen, &need_free,
467							parser->var_data)) {
468				memcpy (d, dst, dstlen);
469				ret += var_len;
470				d += dstlen;
471				if (need_free) {
472					free (dst);
473				}
474				found = true;
475			}
476		}
477
478		/* Leave variable as is */
479		if (!found) {
480			if (strict) {
481				/* Copy '${' */
482				memcpy (d, ptr, 2);
483				d += 2;
484				ret --;
485			}
486			else {
487				memcpy (d, ptr, 1);
488				d ++;
489			}
490		}
491	}
492
493	*dest = d;
494	return ret;
495}
496
497/**
498 * Expand variables in string
499 * @param parser
500 * @param dst
501 * @param src
502 * @param in_len
503 * @return
504 */
505static ssize_t
506ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
507		const char *src, size_t in_len)
508{
509	const char *p, *end = src + in_len;
510	unsigned char *d;
511	size_t out_len = 0;
512	bool vars_found = false;
513
514	if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
515		*dst = NULL;
516		return in_len;
517	}
518
519	p = src;
520	while (p != end) {
521		if (*p == '$') {
522			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
523		}
524		else {
525			p ++;
526			out_len ++;
527		}
528	}
529
530	if (!vars_found) {
531		/* Trivial case */
532		*dst = NULL;
533		return in_len;
534	}
535
536	*dst = UCL_ALLOC (out_len + 1);
537	if (*dst == NULL) {
538		return in_len;
539	}
540
541	d = *dst;
542	p = src;
543	while (p != end) {
544		if (*p == '$') {
545			p = ucl_expand_single_variable (parser, p, end - p, &d);
546		}
547		else {
548			*d++ = *p++;
549		}
550	}
551
552	*d = '\0';
553
554	return out_len;
555}
556
557/**
558 * Store or copy pointer to the trash stack
559 * @param parser parser object
560 * @param src src string
561 * @param dst destination buffer (trash stack pointer)
562 * @param dst_const const destination pointer (e.g. value of object)
563 * @param in_len input length
564 * @param need_unescape need to unescape source (and copy it)
565 * @param need_lowercase need to lowercase value (and copy)
566 * @param need_expand need to expand variables (and copy as well)
567 * @return output length (excluding \0 symbol)
568 */
569static inline ssize_t
570ucl_copy_or_store_ptr (struct ucl_parser *parser,
571		const unsigned char *src, unsigned char **dst,
572		const char **dst_const, size_t in_len,
573		bool need_unescape, bool need_lowercase, bool need_expand)
574{
575	ssize_t ret = -1, tret;
576	unsigned char *tmp;
577
578	if (need_unescape || need_lowercase ||
579			(need_expand && parser->variables != NULL) ||
580			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
581		/* Copy string */
582		*dst = UCL_ALLOC (in_len + 1);
583		if (*dst == NULL) {
584			ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string",
585					&parser->err);
586			return false;
587		}
588		if (need_lowercase) {
589			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
590		}
591		else {
592			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
593		}
594
595		if (need_unescape) {
596			ret = ucl_unescape_json_string (*dst, ret);
597		}
598		if (need_expand) {
599			tmp = *dst;
600			tret = ret;
601			ret = ucl_expand_variable (parser, dst, tmp, ret);
602			if (*dst == NULL) {
603				/* Nothing to expand */
604				*dst = tmp;
605				ret = tret;
606			}
607			else {
608				/* Free unexpanded value */
609				UCL_FREE (in_len + 1, tmp);
610			}
611		}
612		*dst_const = *dst;
613	}
614	else {
615		*dst_const = src;
616		ret = in_len;
617	}
618
619	return ret;
620}
621
622/**
623 * Create and append an object at the specified level
624 * @param parser
625 * @param is_array
626 * @param level
627 * @return
628 */
629static inline ucl_object_t *
630ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser,
631		bool is_array, int level)
632{
633	struct ucl_stack *st;
634
635	if (!is_array) {
636		if (obj == NULL) {
637			obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority);
638		}
639		else {
640			obj->type = UCL_OBJECT;
641		}
642		if (obj->value.ov == NULL) {
643			obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE);
644		}
645		parser->state = UCL_STATE_KEY;
646	}
647	else {
648		if (obj == NULL) {
649			obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority);
650		}
651		else {
652			obj->type = UCL_ARRAY;
653		}
654		parser->state = UCL_STATE_VALUE;
655	}
656
657	st = UCL_ALLOC (sizeof (struct ucl_stack));
658
659	if (st == NULL) {
660		ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object",
661				&parser->err);
662		ucl_object_unref (obj);
663		return NULL;
664	}
665
666	st->obj = obj;
667	st->level = level;
668	LL_PREPEND (parser->stack, st);
669	parser->cur_obj = obj;
670
671	return obj;
672}
673
674int
675ucl_maybe_parse_number (ucl_object_t *obj,
676		const char *start, const char *end, const char **pos,
677		bool allow_double, bool number_bytes, bool allow_time)
678{
679	const char *p = start, *c = start;
680	char *endptr;
681	bool got_dot = false, got_exp = false, need_double = false,
682			is_time = false, valid_start = false, is_hex = false,
683			is_neg = false;
684	double dv = 0;
685	int64_t lv = 0;
686
687	if (*p == '-') {
688		is_neg = true;
689		c ++;
690		p ++;
691	}
692	while (p < end) {
693		if (is_hex && isxdigit (*p)) {
694			p ++;
695		}
696		else if (isdigit (*p)) {
697			valid_start = true;
698			p ++;
699		}
700		else if (!is_hex && (*p == 'x' || *p == 'X')) {
701			is_hex = true;
702			allow_double = false;
703			c = p + 1;
704		}
705		else if (allow_double) {
706			if (p == c) {
707				/* Empty digits sequence, not a number */
708				*pos = start;
709				return EINVAL;
710			}
711			else if (*p == '.') {
712				if (got_dot) {
713					/* Double dots, not a number */
714					*pos = start;
715					return EINVAL;
716				}
717				else {
718					got_dot = true;
719					need_double = true;
720					p ++;
721				}
722			}
723			else if (*p == 'e' || *p == 'E') {
724				if (got_exp) {
725					/* Double exp, not a number */
726					*pos = start;
727					return EINVAL;
728				}
729				else {
730					got_exp = true;
731					need_double = true;
732					p ++;
733					if (p >= end) {
734						*pos = start;
735						return EINVAL;
736					}
737					if (!isdigit (*p) && *p != '+' && *p != '-') {
738						/* Wrong exponent sign */
739						*pos = start;
740						return EINVAL;
741					}
742					else {
743						p ++;
744					}
745				}
746			}
747			else {
748				/* Got the end of the number, need to check */
749				break;
750			}
751		}
752		else {
753			break;
754		}
755	}
756
757	if (!valid_start) {
758		*pos = start;
759		return EINVAL;
760	}
761
762	errno = 0;
763	if (need_double) {
764		dv = strtod (c, &endptr);
765	}
766	else {
767		if (is_hex) {
768			lv = strtoimax (c, &endptr, 16);
769		}
770		else {
771			lv = strtoimax (c, &endptr, 10);
772		}
773	}
774	if (errno == ERANGE) {
775		*pos = start;
776		return ERANGE;
777	}
778
779	/* Now check endptr */
780	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
781		p = endptr;
782		goto set_obj;
783	}
784
785	if (endptr < end && endptr != start) {
786		p = endptr;
787		switch (*p) {
788		case 'm':
789		case 'M':
790		case 'g':
791		case 'G':
792		case 'k':
793		case 'K':
794			if (end - p >= 2) {
795				if (p[1] == 's' || p[1] == 'S') {
796					/* Milliseconds */
797					if (!need_double) {
798						need_double = true;
799						dv = lv;
800					}
801					is_time = true;
802					if (p[0] == 'm' || p[0] == 'M') {
803						dv /= 1000.;
804					}
805					else {
806						dv *= ucl_lex_num_multiplier (*p, false);
807					}
808					p += 2;
809					goto set_obj;
810				}
811				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
812					/* Bytes */
813					if (need_double) {
814						need_double = false;
815						lv = dv;
816					}
817					lv *= ucl_lex_num_multiplier (*p, true);
818					p += 2;
819					goto set_obj;
820				}
821				else if (ucl_lex_is_atom_end (p[1])) {
822					if (need_double) {
823						dv *= ucl_lex_num_multiplier (*p, false);
824					}
825					else {
826						lv *= ucl_lex_num_multiplier (*p, number_bytes);
827					}
828					p ++;
829					goto set_obj;
830				}
831				else if (allow_time && end - p >= 3) {
832					if (tolower (p[0]) == 'm' &&
833							tolower (p[1]) == 'i' &&
834							tolower (p[2]) == 'n') {
835						/* Minutes */
836						if (!need_double) {
837							need_double = true;
838							dv = lv;
839						}
840						is_time = true;
841						dv *= 60.;
842						p += 3;
843						goto set_obj;
844					}
845				}
846			}
847			else {
848				if (need_double) {
849					dv *= ucl_lex_num_multiplier (*p, false);
850				}
851				else {
852					lv *= ucl_lex_num_multiplier (*p, number_bytes);
853				}
854				p ++;
855				goto set_obj;
856			}
857			break;
858		case 'S':
859		case 's':
860			if (allow_time &&
861					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
862				if (!need_double) {
863					need_double = true;
864					dv = lv;
865				}
866				p ++;
867				is_time = true;
868				goto set_obj;
869			}
870			break;
871		case 'h':
872		case 'H':
873		case 'd':
874		case 'D':
875		case 'w':
876		case 'W':
877		case 'Y':
878		case 'y':
879			if (allow_time &&
880					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
881				if (!need_double) {
882					need_double = true;
883					dv = lv;
884				}
885				is_time = true;
886				dv *= ucl_lex_time_multiplier (*p);
887				p ++;
888				goto set_obj;
889			}
890			break;
891		case '\t':
892		case ' ':
893			while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
894				p++;
895			}
896			if (ucl_lex_is_atom_end(*p))
897				goto set_obj;
898			break;
899		}
900	}
901	else if (endptr == end) {
902		/* Just a number at the end of chunk */
903		p = endptr;
904		goto set_obj;
905	}
906
907	*pos = c;
908	return EINVAL;
909
910set_obj:
911	if (obj != NULL) {
912		if (allow_double && (need_double || is_time)) {
913			if (!is_time) {
914				obj->type = UCL_FLOAT;
915			}
916			else {
917				obj->type = UCL_TIME;
918			}
919			obj->value.dv = is_neg ? (-dv) : dv;
920		}
921		else {
922			obj->type = UCL_INT;
923			obj->value.iv = is_neg ? (-lv) : lv;
924		}
925	}
926	*pos = p;
927	return 0;
928}
929
930/**
931 * Parse possible number
932 * @param parser
933 * @param chunk
934 * @param obj
935 * @return true if a number has been parsed
936 */
937static bool
938ucl_lex_number (struct ucl_parser *parser,
939		struct ucl_chunk *chunk, ucl_object_t *obj)
940{
941	const unsigned char *pos;
942	int ret;
943
944	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
945			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
946
947	if (ret == 0) {
948		chunk->remain -= pos - chunk->pos;
949		chunk->column += pos - chunk->pos;
950		chunk->pos = pos;
951		return true;
952	}
953	else if (ret == ERANGE) {
954		ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range",
955				&parser->err);
956	}
957
958	return false;
959}
960
961/**
962 * Parse quoted string with possible escapes
963 * @param parser
964 * @param chunk
965 * @param need_unescape
966 * @param ucl_escape
967 * @param var_expand
968 * @return true if a string has been parsed
969 */
970static bool
971ucl_lex_json_string (struct ucl_parser *parser,
972		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
973{
974	const unsigned char *p = chunk->pos;
975	unsigned char c;
976	int i;
977
978	while (p < chunk->end) {
979		c = *p;
980		if (c < 0x1F) {
981			/* Unmasked control character */
982			if (c == '\n') {
983				ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline",
984						&parser->err);
985			}
986			else {
987				ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character",
988						&parser->err);
989			}
990			return false;
991		}
992		else if (c == '\\') {
993			ucl_chunk_skipc (chunk, p);
994			c = *p;
995			if (p >= chunk->end) {
996				ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
997						&parser->err);
998				return false;
999			}
1000			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
1001				if (c == 'u') {
1002					ucl_chunk_skipc (chunk, p);
1003					for (i = 0; i < 4 && p < chunk->end; i ++) {
1004						if (!isxdigit (*p)) {
1005							ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape",
1006									&parser->err);
1007							return false;
1008						}
1009						ucl_chunk_skipc (chunk, p);
1010					}
1011					if (p >= chunk->end) {
1012						ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
1013								&parser->err);
1014						return false;
1015					}
1016				}
1017				else {
1018					ucl_chunk_skipc (chunk, p);
1019				}
1020			}
1021			*need_unescape = true;
1022			*ucl_escape = true;
1023			continue;
1024		}
1025		else if (c == '"') {
1026			ucl_chunk_skipc (chunk, p);
1027			return true;
1028		}
1029		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
1030			*ucl_escape = true;
1031		}
1032		else if (c == '$') {
1033			*var_expand = true;
1034		}
1035		ucl_chunk_skipc (chunk, p);
1036	}
1037
1038	ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string",
1039			&parser->err);
1040	return false;
1041}
1042
1043static void
1044ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont,
1045		ucl_object_t *top,
1046		ucl_object_t *elt)
1047{
1048	ucl_object_t *nobj;
1049
1050	if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
1051		/* Implicit array */
1052		top->flags |= UCL_OBJECT_MULTIVALUE;
1053		DL_APPEND (top, elt);
1054		parser->stack->obj->len ++;
1055	}
1056	else {
1057		if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
1058			/* Just add to the explicit array */
1059			ucl_array_append (top, elt);
1060		}
1061		else {
1062			/* Convert to an array */
1063			nobj = ucl_object_typed_new (UCL_ARRAY);
1064			nobj->key = top->key;
1065			nobj->keylen = top->keylen;
1066			nobj->flags |= UCL_OBJECT_MULTIVALUE;
1067			ucl_array_append (nobj, top);
1068			ucl_array_append (nobj, elt);
1069			ucl_hash_replace (cont, top, nobj);
1070		}
1071	}
1072}
1073
1074bool
1075ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj)
1076{
1077	ucl_hash_t *container;
1078	ucl_object_t *tobj;
1079	char errmsg[256];
1080
1081	container = parser->stack->obj->value.ov;
1082
1083	tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1084	if (tobj == NULL) {
1085		container = ucl_hash_insert_object (container, nobj,
1086				parser->flags & UCL_PARSER_KEY_LOWERCASE);
1087		nobj->prev = nobj;
1088		nobj->next = NULL;
1089		parser->stack->obj->len ++;
1090	}
1091	else {
1092		unsigned priold = ucl_object_get_priority (tobj),
1093				prinew = ucl_object_get_priority (nobj);
1094		switch (parser->chunks->strategy) {
1095
1096		case UCL_DUPLICATE_APPEND:
1097			/*
1098			 * The logic here is the following:
1099			 *
1100			 * - if we have two objects with the same priority, then we form an
1101			 * implicit or explicit array
1102			 * - if a new object has bigger priority, then we overwrite an old one
1103			 * - if a new object has lower priority, then we ignore it
1104			 */
1105
1106
1107			/* Special case for inherited objects */
1108			if (tobj->flags & UCL_OBJECT_INHERITED) {
1109				prinew = priold + 1;
1110			}
1111
1112			if (priold == prinew) {
1113				ucl_parser_append_elt (parser, container, tobj, nobj);
1114			}
1115			else if (priold > prinew) {
1116				/*
1117				 * We add this new object to a list of trash objects just to ensure
1118				 * that it won't come to any real object
1119				 * XXX: rather inefficient approach
1120				 */
1121				DL_APPEND (parser->trash_objs, nobj);
1122			}
1123			else {
1124				ucl_hash_replace (container, tobj, nobj);
1125				ucl_object_unref (tobj);
1126			}
1127
1128			break;
1129
1130		case UCL_DUPLICATE_REWRITE:
1131			/* We just rewrite old values regardless of priority */
1132			ucl_hash_replace (container, tobj, nobj);
1133			ucl_object_unref (tobj);
1134
1135			break;
1136
1137		case UCL_DUPLICATE_ERROR:
1138			snprintf(errmsg, sizeof(errmsg),
1139					"duplicate element for key '%s' found",
1140					nobj->key);
1141			ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err);
1142			return false;
1143
1144		case UCL_DUPLICATE_MERGE:
1145			/*
1146			 * Here we do have some old object so we just push it on top of objects stack
1147			 * Check priority and then perform the merge on the remaining objects
1148			 */
1149			if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) {
1150				ucl_object_unref (nobj);
1151				nobj = tobj;
1152			}
1153			else if (priold == prinew) {
1154				ucl_parser_append_elt (parser, container, tobj, nobj);
1155			}
1156			else if (priold > prinew) {
1157				/*
1158				 * We add this new object to a list of trash objects just to ensure
1159				 * that it won't come to any real object
1160				 * XXX: rather inefficient approach
1161				 */
1162				DL_APPEND (parser->trash_objs, nobj);
1163			}
1164			else {
1165				ucl_hash_replace (container, tobj, nobj);
1166				ucl_object_unref (tobj);
1167			}
1168			break;
1169		}
1170	}
1171
1172	parser->stack->obj->value.ov = container;
1173	parser->cur_obj = nobj;
1174	ucl_attach_comment (parser, nobj, false);
1175
1176	return true;
1177}
1178
1179/**
1180 * Parse a key in an object
1181 * @param parser
1182 * @param chunk
1183 * @param next_key
1184 * @param end_of_object
1185 * @return true if a key has been parsed
1186 */
1187static bool
1188ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk,
1189		bool *next_key, bool *end_of_object)
1190{
1191	const unsigned char *p, *c = NULL, *end, *t;
1192	const char *key = NULL;
1193	bool got_quote = false, got_eq = false, got_semicolon = false,
1194			need_unescape = false, ucl_escape = false, var_expand = false,
1195			got_content = false, got_sep = false;
1196	ucl_object_t *nobj;
1197	ssize_t keylen;
1198
1199	p = chunk->pos;
1200
1201	if (*p == '.') {
1202		/* It is macro actually */
1203		if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) {
1204			ucl_chunk_skipc (chunk, p);
1205		}
1206
1207		parser->prev_state = parser->state;
1208		parser->state = UCL_STATE_MACRO_NAME;
1209		*end_of_object = false;
1210		return true;
1211	}
1212	while (p < chunk->end) {
1213		/*
1214		 * A key must start with alpha, number, '/' or '_' and end with space character
1215		 */
1216		if (c == NULL) {
1217			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1218				if (!ucl_skip_comments (parser)) {
1219					return false;
1220				}
1221				p = chunk->pos;
1222			}
1223			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1224				ucl_chunk_skipc (chunk, p);
1225			}
1226			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
1227				/* The first symbol */
1228				c = p;
1229				ucl_chunk_skipc (chunk, p);
1230				got_content = true;
1231			}
1232			else if (*p == '"') {
1233				/* JSON style key */
1234				c = p + 1;
1235				got_quote = true;
1236				got_content = true;
1237				ucl_chunk_skipc (chunk, p);
1238			}
1239			else if (*p == '}') {
1240				/* We have actually end of an object */
1241				*end_of_object = true;
1242				return true;
1243			}
1244			else if (*p == '.') {
1245				ucl_chunk_skipc (chunk, p);
1246				parser->prev_state = parser->state;
1247				parser->state = UCL_STATE_MACRO_NAME;
1248				return true;
1249			}
1250			else {
1251				/* Invalid identifier */
1252				ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter",
1253						&parser->err);
1254				return false;
1255			}
1256		}
1257		else {
1258			/* Parse the body of a key */
1259			if (!got_quote) {
1260				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
1261					got_content = true;
1262					ucl_chunk_skipc (chunk, p);
1263				}
1264				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
1265					end = p;
1266					break;
1267				}
1268				else {
1269					ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key",
1270							&parser->err);
1271					return false;
1272				}
1273			}
1274			else {
1275				/* We need to parse json like quoted string */
1276				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1277					return false;
1278				}
1279				/* Always escape keys obtained via json */
1280				end = chunk->pos - 1;
1281				p = chunk->pos;
1282				break;
1283			}
1284		}
1285	}
1286
1287	if (p >= chunk->end && got_content) {
1288		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1289		return false;
1290	}
1291	else if (!got_content) {
1292		return true;
1293	}
1294	*end_of_object = false;
1295	/* We are now at the end of the key, need to parse the rest */
1296	while (p < chunk->end) {
1297		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1298			ucl_chunk_skipc (chunk, p);
1299		}
1300		else if (*p == '=') {
1301			if (!got_eq && !got_semicolon) {
1302				ucl_chunk_skipc (chunk, p);
1303				got_eq = true;
1304			}
1305			else {
1306				ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character",
1307						&parser->err);
1308				return false;
1309			}
1310		}
1311		else if (*p == ':') {
1312			if (!got_eq && !got_semicolon) {
1313				ucl_chunk_skipc (chunk, p);
1314				got_semicolon = true;
1315			}
1316			else {
1317				ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character",
1318						&parser->err);
1319				return false;
1320			}
1321		}
1322		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1323			/* Check for comment */
1324			if (!ucl_skip_comments (parser)) {
1325				return false;
1326			}
1327			p = chunk->pos;
1328		}
1329		else {
1330			/* Start value */
1331			break;
1332		}
1333	}
1334
1335	if (p >= chunk->end && got_content) {
1336		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1337		return false;
1338	}
1339
1340	got_sep = got_semicolon || got_eq;
1341
1342	if (!got_sep) {
1343		/*
1344		 * Maybe we have more keys nested, so search for termination character.
1345		 * Possible choices:
1346		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1347		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1348		 * 3) key1 value[;,\n] <- we treat that as linear object
1349		 */
1350		t = p;
1351		*next_key = false;
1352		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1353			t ++;
1354		}
1355		/* Check first non-space character after a key */
1356		if (*t != '{' && *t != '[') {
1357			while (t < chunk->end) {
1358				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1359					break;
1360				}
1361				else if (*t == '{' || *t == '[') {
1362					*next_key = true;
1363					break;
1364				}
1365				t ++;
1366			}
1367		}
1368	}
1369
1370	/* Create a new object */
1371	nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1372	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1373			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1374	if (keylen == -1) {
1375		ucl_object_unref (nobj);
1376		return false;
1377	}
1378	else if (keylen == 0) {
1379		ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1380		ucl_object_unref (nobj);
1381		return false;
1382	}
1383
1384	nobj->key = key;
1385	nobj->keylen = keylen;
1386
1387	if (!ucl_parser_process_object_element (parser, nobj)) {
1388		return false;
1389	}
1390
1391	if (ucl_escape) {
1392		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1393	}
1394
1395
1396	return true;
1397}
1398
1399/**
1400 * Parse a cl string
1401 * @param parser
1402 * @param chunk
1403 * @param var_expand
1404 * @param need_unescape
1405 * @return true if a key has been parsed
1406 */
1407static bool
1408ucl_parse_string_value (struct ucl_parser *parser,
1409		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1410{
1411	const unsigned char *p;
1412	enum {
1413		UCL_BRACE_ROUND = 0,
1414		UCL_BRACE_SQUARE,
1415		UCL_BRACE_FIGURE
1416	};
1417	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1418
1419	p = chunk->pos;
1420
1421	while (p < chunk->end) {
1422
1423		/* Skip pairs of figure braces */
1424		if (*p == '{') {
1425			braces[UCL_BRACE_FIGURE][0] ++;
1426		}
1427		else if (*p == '}') {
1428			braces[UCL_BRACE_FIGURE][1] ++;
1429			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1430				/* This is not a termination symbol, continue */
1431				ucl_chunk_skipc (chunk, p);
1432				continue;
1433			}
1434		}
1435		/* Skip pairs of square braces */
1436		else if (*p == '[') {
1437			braces[UCL_BRACE_SQUARE][0] ++;
1438		}
1439		else if (*p == ']') {
1440			braces[UCL_BRACE_SQUARE][1] ++;
1441			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1442				/* This is not a termination symbol, continue */
1443				ucl_chunk_skipc (chunk, p);
1444				continue;
1445			}
1446		}
1447		else if (*p == '$') {
1448			*var_expand = true;
1449		}
1450		else if (*p == '\\') {
1451			*need_unescape = true;
1452			ucl_chunk_skipc (chunk, p);
1453			if (p < chunk->end) {
1454				ucl_chunk_skipc (chunk, p);
1455			}
1456			continue;
1457		}
1458
1459		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1460			break;
1461		}
1462		ucl_chunk_skipc (chunk, p);
1463	}
1464
1465	return true;
1466}
1467
1468/**
1469 * Parse multiline string ending with \n{term}\n
1470 * @param parser
1471 * @param chunk
1472 * @param term
1473 * @param term_len
1474 * @param beg
1475 * @param var_expand
1476 * @return size of multiline string or 0 in case of error
1477 */
1478static int
1479ucl_parse_multiline_string (struct ucl_parser *parser,
1480		struct ucl_chunk *chunk, const unsigned char *term,
1481		int term_len, unsigned char const **beg,
1482		bool *var_expand)
1483{
1484	const unsigned char *p, *c, *tend;
1485	bool newline = false;
1486	int len = 0;
1487
1488	p = chunk->pos;
1489
1490	c = p;
1491
1492	while (p < chunk->end) {
1493		if (newline) {
1494			if (chunk->end - p < term_len) {
1495				return 0;
1496			}
1497			else if (memcmp (p, term, term_len) == 0) {
1498				tend = p + term_len;
1499				if (*tend != '\n' && *tend != ';' && *tend != ',') {
1500					/* Incomplete terminator */
1501					ucl_chunk_skipc (chunk, p);
1502					continue;
1503				}
1504				len = p - c;
1505				chunk->remain -= term_len;
1506				chunk->pos = p + term_len;
1507				chunk->column = term_len;
1508				*beg = c;
1509				break;
1510			}
1511		}
1512		if (*p == '\n') {
1513			newline = true;
1514		}
1515		else {
1516			if (*p == '$') {
1517				*var_expand = true;
1518			}
1519			newline = false;
1520		}
1521		ucl_chunk_skipc (chunk, p);
1522	}
1523
1524	return len;
1525}
1526
1527static inline ucl_object_t*
1528ucl_parser_get_container (struct ucl_parser *parser)
1529{
1530	ucl_object_t *t, *obj = NULL;
1531
1532	if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) {
1533		return NULL;
1534	}
1535
1536	if (parser->stack->obj->type == UCL_ARRAY) {
1537		/* Object must be allocated */
1538		obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1539		t = parser->stack->obj;
1540
1541		if (!ucl_array_append (t, obj)) {
1542			ucl_object_unref (obj);
1543			return NULL;
1544		}
1545
1546		parser->cur_obj = obj;
1547		ucl_attach_comment (parser, obj, false);
1548	}
1549	else {
1550		/* Object has been already allocated */
1551		obj = parser->cur_obj;
1552	}
1553
1554	return obj;
1555}
1556
1557/**
1558 * Handle value data
1559 * @param parser
1560 * @param chunk
1561 * @return
1562 */
1563static bool
1564ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1565{
1566	const unsigned char *p, *c;
1567	ucl_object_t *obj = NULL;
1568	unsigned int stripped_spaces;
1569	int str_len;
1570	bool need_unescape = false, ucl_escape = false, var_expand = false;
1571
1572	p = chunk->pos;
1573
1574	/* Skip any spaces and comments */
1575	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1576			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1577		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1578			ucl_chunk_skipc (chunk, p);
1579		}
1580		if (!ucl_skip_comments (parser)) {
1581			return false;
1582		}
1583		p = chunk->pos;
1584	}
1585
1586	while (p < chunk->end) {
1587		c = p;
1588		switch (*p) {
1589		case '"':
1590			ucl_chunk_skipc (chunk, p);
1591
1592			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape,
1593					&var_expand)) {
1594				return false;
1595			}
1596
1597			obj = ucl_parser_get_container (parser);
1598			if (!obj) {
1599				return false;
1600			}
1601
1602			str_len = chunk->pos - c - 2;
1603			obj->type = UCL_STRING;
1604			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1,
1605					&obj->trash_stack[UCL_TRASH_VALUE],
1606					&obj->value.sv, str_len, need_unescape, false,
1607					var_expand)) == -1) {
1608				return false;
1609			}
1610			obj->len = str_len;
1611
1612			parser->state = UCL_STATE_AFTER_VALUE;
1613			p = chunk->pos;
1614
1615			return true;
1616			break;
1617		case '{':
1618			obj = ucl_parser_get_container (parser);
1619			/* We have a new object */
1620			obj = ucl_parser_add_container (obj, parser, false, parser->stack->level);
1621			if (obj == NULL) {
1622				return false;
1623			}
1624
1625			ucl_chunk_skipc (chunk, p);
1626
1627			return true;
1628			break;
1629		case '[':
1630			obj = ucl_parser_get_container (parser);
1631			/* We have a new array */
1632			obj = ucl_parser_add_container (obj, parser, true, parser->stack->level);
1633			if (obj == NULL) {
1634				return false;
1635			}
1636
1637			ucl_chunk_skipc (chunk, p);
1638
1639			return true;
1640			break;
1641		case ']':
1642			/* We have the array ending */
1643			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1644				parser->state = UCL_STATE_AFTER_VALUE;
1645				return true;
1646			}
1647			else {
1648				goto parse_string;
1649			}
1650			break;
1651		case '<':
1652			obj = ucl_parser_get_container (parser);
1653			/* We have something like multiline value, which must be <<[A-Z]+\n */
1654			if (chunk->end - p > 3) {
1655				if (memcmp (p, "<<", 2) == 0) {
1656					p += 2;
1657					/* We allow only uppercase characters in multiline definitions */
1658					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1659						p ++;
1660					}
1661					if (*p =='\n') {
1662						/* Set chunk positions and start multiline parsing */
1663						c += 2;
1664						chunk->remain -= p - c;
1665						chunk->pos = p + 1;
1666						chunk->column = 0;
1667						chunk->line ++;
1668						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1669								p - c, &c, &var_expand)) == 0) {
1670							ucl_set_err (parser, UCL_ESYNTAX,
1671									"unterminated multiline value", &parser->err);
1672							return false;
1673						}
1674
1675						obj->type = UCL_STRING;
1676						obj->flags |= UCL_OBJECT_MULTILINE;
1677						if ((str_len = ucl_copy_or_store_ptr (parser, c,
1678								&obj->trash_stack[UCL_TRASH_VALUE],
1679								&obj->value.sv, str_len - 1, false,
1680								false, var_expand)) == -1) {
1681							return false;
1682						}
1683						obj->len = str_len;
1684
1685						parser->state = UCL_STATE_AFTER_VALUE;
1686
1687						return true;
1688					}
1689				}
1690			}
1691			/* Fallback to ordinary strings */
1692		default:
1693parse_string:
1694			if (obj == NULL) {
1695				obj = ucl_parser_get_container (parser);
1696			}
1697
1698			/* Parse atom */
1699			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1700				if (!ucl_lex_number (parser, chunk, obj)) {
1701					if (parser->state == UCL_STATE_ERROR) {
1702						return false;
1703					}
1704				}
1705				else {
1706					parser->state = UCL_STATE_AFTER_VALUE;
1707					return true;
1708				}
1709				/* Fallback to normal string */
1710			}
1711
1712			if (!ucl_parse_string_value (parser, chunk, &var_expand,
1713					&need_unescape)) {
1714				return false;
1715			}
1716			/* Cut trailing spaces */
1717			stripped_spaces = 0;
1718			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1719					UCL_CHARACTER_WHITESPACE)) {
1720				stripped_spaces ++;
1721			}
1722			str_len = chunk->pos - c - stripped_spaces;
1723			if (str_len <= 0) {
1724				ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty",
1725						&parser->err);
1726				return false;
1727			}
1728			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1729				obj->len = 0;
1730				obj->type = UCL_NULL;
1731			}
1732			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1733				obj->type = UCL_STRING;
1734				if ((str_len = ucl_copy_or_store_ptr (parser, c,
1735						&obj->trash_stack[UCL_TRASH_VALUE],
1736						&obj->value.sv, str_len, need_unescape,
1737						false, var_expand)) == -1) {
1738					return false;
1739				}
1740				obj->len = str_len;
1741			}
1742			parser->state = UCL_STATE_AFTER_VALUE;
1743			p = chunk->pos;
1744
1745			return true;
1746			break;
1747		}
1748	}
1749
1750	return true;
1751}
1752
1753/**
1754 * Handle after value data
1755 * @param parser
1756 * @param chunk
1757 * @return
1758 */
1759static bool
1760ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1761{
1762	const unsigned char *p;
1763	bool got_sep = false;
1764	struct ucl_stack *st;
1765
1766	p = chunk->pos;
1767
1768	while (p < chunk->end) {
1769		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1770			/* Skip whitespaces */
1771			ucl_chunk_skipc (chunk, p);
1772		}
1773		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1774			/* Skip comment */
1775			if (!ucl_skip_comments (parser)) {
1776				return false;
1777			}
1778			/* Treat comment as a separator */
1779			got_sep = true;
1780			p = chunk->pos;
1781		}
1782		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1783			if (*p == '}' || *p == ']') {
1784				if (parser->stack == NULL) {
1785					ucl_set_err (parser, UCL_ESYNTAX,
1786							"end of array or object detected without corresponding start",
1787							&parser->err);
1788					return false;
1789				}
1790				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1791						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1792
1793					/* Pop all nested objects from a stack */
1794					st = parser->stack;
1795					parser->stack = st->next;
1796					UCL_FREE (sizeof (struct ucl_stack), st);
1797
1798					if (parser->cur_obj) {
1799						ucl_attach_comment (parser, parser->cur_obj, true);
1800					}
1801
1802					while (parser->stack != NULL) {
1803						st = parser->stack;
1804
1805						if (st->next == NULL || st->next->level == st->level) {
1806							break;
1807						}
1808
1809						parser->stack = st->next;
1810						parser->cur_obj = st->obj;
1811						UCL_FREE (sizeof (struct ucl_stack), st);
1812					}
1813				}
1814				else {
1815					ucl_set_err (parser, UCL_ESYNTAX,
1816							"unexpected terminating symbol detected",
1817							&parser->err);
1818					return false;
1819				}
1820
1821				if (parser->stack == NULL) {
1822					/* Ignore everything after a top object */
1823					return true;
1824				}
1825				else {
1826					ucl_chunk_skipc (chunk, p);
1827				}
1828				got_sep = true;
1829			}
1830			else {
1831				/* Got a separator */
1832				got_sep = true;
1833				ucl_chunk_skipc (chunk, p);
1834			}
1835		}
1836		else {
1837			/* Anything else */
1838			if (!got_sep) {
1839				ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing",
1840						&parser->err);
1841				return false;
1842			}
1843			return true;
1844		}
1845	}
1846
1847	return true;
1848}
1849
1850static bool
1851ucl_skip_macro_as_comment (struct ucl_parser *parser,
1852		struct ucl_chunk *chunk)
1853{
1854	const unsigned char *p, *c;
1855	enum {
1856		macro_skip_start = 0,
1857		macro_has_symbols,
1858		macro_has_obrace,
1859		macro_has_quote,
1860		macro_has_backslash,
1861		macro_has_sqbrace,
1862		macro_save
1863	} state = macro_skip_start, prev_state = macro_skip_start;
1864
1865	p = chunk->pos;
1866	c = chunk->pos;
1867
1868	while (p < chunk->end) {
1869		switch (state) {
1870		case macro_skip_start:
1871			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1872				state = macro_has_symbols;
1873			}
1874			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1875				state = macro_save;
1876				continue;
1877			}
1878
1879			ucl_chunk_skipc (chunk, p);
1880			break;
1881
1882		case macro_has_symbols:
1883			if (*p == '{') {
1884				state = macro_has_sqbrace;
1885			}
1886			else if (*p == '(') {
1887				state = macro_has_obrace;
1888			}
1889			else if (*p == '"') {
1890				state = macro_has_quote;
1891			}
1892			else if (*p == '\n') {
1893				state = macro_save;
1894				continue;
1895			}
1896
1897			ucl_chunk_skipc (chunk, p);
1898			break;
1899
1900		case macro_has_obrace:
1901			if (*p == '\\') {
1902				prev_state = state;
1903				state = macro_has_backslash;
1904			}
1905			else if (*p == ')') {
1906				state = macro_has_symbols;
1907			}
1908
1909			ucl_chunk_skipc (chunk, p);
1910			break;
1911
1912		case macro_has_sqbrace:
1913			if (*p == '\\') {
1914				prev_state = state;
1915				state = macro_has_backslash;
1916			}
1917			else if (*p == '}') {
1918				state = macro_save;
1919			}
1920
1921			ucl_chunk_skipc (chunk, p);
1922			break;
1923
1924		case macro_has_quote:
1925			if (*p == '\\') {
1926				prev_state = state;
1927				state = macro_has_backslash;
1928			}
1929			else if (*p == '"') {
1930				state = macro_save;
1931			}
1932
1933			ucl_chunk_skipc (chunk, p);
1934			break;
1935
1936		case macro_has_backslash:
1937			state = prev_state;
1938			ucl_chunk_skipc (chunk, p);
1939			break;
1940
1941		case macro_save:
1942			if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
1943				ucl_save_comment (parser, c, p - c);
1944			}
1945
1946			return true;
1947		}
1948	}
1949
1950	return false;
1951}
1952
1953/**
1954 * Handle macro data
1955 * @param parser
1956 * @param chunk
1957 * @param marco
1958 * @param macro_start
1959 * @param macro_len
1960 * @return
1961 */
1962static bool
1963ucl_parse_macro_value (struct ucl_parser *parser,
1964		struct ucl_chunk *chunk, struct ucl_macro *macro,
1965		unsigned char const **macro_start, size_t *macro_len)
1966{
1967	const unsigned char *p, *c;
1968	bool need_unescape = false, ucl_escape = false, var_expand = false;
1969
1970	p = chunk->pos;
1971
1972	switch (*p) {
1973	case '"':
1974		/* We have macro value encoded in quotes */
1975		c = p;
1976		ucl_chunk_skipc (chunk, p);
1977		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1978			return false;
1979		}
1980
1981		*macro_start = c + 1;
1982		*macro_len = chunk->pos - c - 2;
1983		p = chunk->pos;
1984		break;
1985	case '{':
1986		/* We got a multiline macro body */
1987		ucl_chunk_skipc (chunk, p);
1988		/* Skip spaces at the beginning */
1989		while (p < chunk->end) {
1990			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1991				ucl_chunk_skipc (chunk, p);
1992			}
1993			else {
1994				break;
1995			}
1996		}
1997		c = p;
1998		while (p < chunk->end) {
1999			if (*p == '}') {
2000				break;
2001			}
2002			ucl_chunk_skipc (chunk, p);
2003		}
2004		*macro_start = c;
2005		*macro_len = p - c;
2006		ucl_chunk_skipc (chunk, p);
2007		break;
2008	default:
2009		/* Macro is not enclosed in quotes or braces */
2010		c = p;
2011		while (p < chunk->end) {
2012			if (ucl_lex_is_atom_end (*p)) {
2013				break;
2014			}
2015			ucl_chunk_skipc (chunk, p);
2016		}
2017		*macro_start = c;
2018		*macro_len = p - c;
2019		break;
2020	}
2021
2022	/* We are at the end of a macro */
2023	/* Skip ';' and space characters and return to previous state */
2024	while (p < chunk->end) {
2025		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
2026			break;
2027		}
2028		ucl_chunk_skipc (chunk, p);
2029	}
2030	return true;
2031}
2032
2033/**
2034 * Parse macro arguments as UCL object
2035 * @param parser parser structure
2036 * @param chunk the current data chunk
2037 * @return
2038 */
2039static ucl_object_t *
2040ucl_parse_macro_arguments (struct ucl_parser *parser,
2041		struct ucl_chunk *chunk)
2042{
2043	ucl_object_t *res = NULL;
2044	struct ucl_parser *params_parser;
2045	int obraces = 1, ebraces = 0, state = 0;
2046	const unsigned char *p, *c;
2047	size_t args_len = 0;
2048	struct ucl_parser_saved_state saved;
2049
2050	saved.column = chunk->column;
2051	saved.line = chunk->line;
2052	saved.pos = chunk->pos;
2053	saved.remain = chunk->remain;
2054	p = chunk->pos;
2055
2056	if (*p != '(' || chunk->remain < 2) {
2057		return NULL;
2058	}
2059
2060	/* Set begin and start */
2061	ucl_chunk_skipc (chunk, p);
2062	c = p;
2063
2064	while ((p) < (chunk)->end) {
2065		switch (state) {
2066		case 0:
2067			/* Parse symbols and check for '(', ')' and '"' */
2068			if (*p == '(') {
2069				obraces ++;
2070			}
2071			else if (*p == ')') {
2072				ebraces ++;
2073			}
2074			else if (*p == '"') {
2075				state = 1;
2076			}
2077			/* Check pairing */
2078			if (obraces == ebraces) {
2079				state = 99;
2080			}
2081			else {
2082				args_len ++;
2083			}
2084			/* Check overflow */
2085			if (chunk->remain == 0) {
2086				goto restore_chunk;
2087			}
2088			ucl_chunk_skipc (chunk, p);
2089			break;
2090		case 1:
2091			/* We have quote character, so skip all but quotes */
2092			if (*p == '"' && *(p - 1) != '\\') {
2093				state = 0;
2094			}
2095			if (chunk->remain == 0) {
2096				goto restore_chunk;
2097			}
2098			args_len ++;
2099			ucl_chunk_skipc (chunk, p);
2100			break;
2101		case 99:
2102			/*
2103			 * We have read the full body of arguments, so we need to parse and set
2104			 * object from that
2105			 */
2106			params_parser = ucl_parser_new (parser->flags);
2107			if (!ucl_parser_add_chunk (params_parser, c, args_len)) {
2108				ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error",
2109						&parser->err);
2110			}
2111			else {
2112				res = ucl_parser_get_object (params_parser);
2113			}
2114			ucl_parser_free (params_parser);
2115
2116			return res;
2117
2118			break;
2119		}
2120	}
2121
2122	return res;
2123
2124restore_chunk:
2125	chunk->column = saved.column;
2126	chunk->line = saved.line;
2127	chunk->pos = saved.pos;
2128	chunk->remain = saved.remain;
2129
2130	return NULL;
2131}
2132
2133#define SKIP_SPACES_COMMENTS(parser, chunk, p) do {								\
2134	while ((p) < (chunk)->end) {												\
2135		if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) {		\
2136			if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) {	\
2137				if (!ucl_skip_comments (parser)) {								\
2138					return false;												\
2139				}																\
2140				p = (chunk)->pos;												\
2141			}																	\
2142			break;																\
2143		}																		\
2144		ucl_chunk_skipc (chunk, p);												\
2145	}																			\
2146} while(0)
2147
2148/**
2149 * Handle the main states of rcl parser
2150 * @param parser parser structure
2151 * @return true if chunk has been parsed and false in case of error
2152 */
2153static bool
2154ucl_state_machine (struct ucl_parser *parser)
2155{
2156	ucl_object_t *obj, *macro_args;
2157	struct ucl_chunk *chunk = parser->chunks;
2158	const unsigned char *p, *c = NULL, *macro_start = NULL;
2159	unsigned char *macro_escaped;
2160	size_t macro_len = 0;
2161	struct ucl_macro *macro = NULL;
2162	bool next_key = false, end_of_object = false, ret;
2163
2164	if (parser->top_obj == NULL) {
2165		parser->state = UCL_STATE_INIT;
2166	}
2167
2168	p = chunk->pos;
2169	while (chunk->pos < chunk->end) {
2170		switch (parser->state) {
2171		case UCL_STATE_INIT:
2172			/*
2173			 * At the init state we can either go to the parse array or object
2174			 * if we got [ or { correspondingly or can just treat new data as
2175			 * a key of newly created object
2176			 */
2177			if (!ucl_skip_comments (parser)) {
2178				parser->prev_state = parser->state;
2179				parser->state = UCL_STATE_ERROR;
2180				return false;
2181			}
2182			else {
2183				/* Skip any spaces */
2184				while (p < chunk->end && ucl_test_character (*p,
2185						UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2186					ucl_chunk_skipc (chunk, p);
2187				}
2188
2189				p = chunk->pos;
2190
2191				if (*p == '[') {
2192					parser->state = UCL_STATE_VALUE;
2193					ucl_chunk_skipc (chunk, p);
2194				}
2195				else {
2196					parser->state = UCL_STATE_KEY;
2197					if (*p == '{') {
2198						ucl_chunk_skipc (chunk, p);
2199					}
2200				}
2201
2202				if (parser->top_obj == NULL) {
2203					if (parser->state == UCL_STATE_VALUE) {
2204						obj = ucl_parser_add_container (NULL, parser, true, 0);
2205					}
2206					else {
2207						obj = ucl_parser_add_container (NULL, parser, false, 0);
2208					}
2209
2210					if (obj == NULL) {
2211						return false;
2212					}
2213
2214					parser->top_obj = obj;
2215					parser->cur_obj = obj;
2216				}
2217
2218			}
2219			break;
2220		case UCL_STATE_KEY:
2221			/* Skip any spaces */
2222			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2223				ucl_chunk_skipc (chunk, p);
2224			}
2225			if (p == chunk->end || *p == '}') {
2226				/* We have the end of an object */
2227				parser->state = UCL_STATE_AFTER_VALUE;
2228				continue;
2229			}
2230			if (parser->stack == NULL) {
2231				/* No objects are on stack, but we want to parse a key */
2232				ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser "
2233						"expects a key", &parser->err);
2234				parser->prev_state = parser->state;
2235				parser->state = UCL_STATE_ERROR;
2236				return false;
2237			}
2238			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
2239				parser->prev_state = parser->state;
2240				parser->state = UCL_STATE_ERROR;
2241				return false;
2242			}
2243			if (end_of_object) {
2244				p = chunk->pos;
2245				parser->state = UCL_STATE_AFTER_VALUE;
2246				continue;
2247			}
2248			else if (parser->state != UCL_STATE_MACRO_NAME) {
2249				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
2250					/* Parse more keys and nest objects accordingly */
2251					obj = ucl_parser_add_container (parser->cur_obj, parser, false,
2252							parser->stack->level + 1);
2253					if (obj == NULL) {
2254						return false;
2255					}
2256				}
2257				else {
2258					parser->state = UCL_STATE_VALUE;
2259				}
2260			}
2261			else {
2262				c = chunk->pos;
2263			}
2264			p = chunk->pos;
2265			break;
2266		case UCL_STATE_VALUE:
2267			/* We need to check what we do have */
2268			if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) {
2269				parser->prev_state = parser->state;
2270				parser->state = UCL_STATE_ERROR;
2271				return false;
2272			}
2273			/* State is set in ucl_parse_value call */
2274			p = chunk->pos;
2275			break;
2276		case UCL_STATE_AFTER_VALUE:
2277			if (!ucl_parse_after_value (parser, chunk)) {
2278				parser->prev_state = parser->state;
2279				parser->state = UCL_STATE_ERROR;
2280				return false;
2281			}
2282
2283			if (parser->stack != NULL) {
2284				if (parser->stack->obj->type == UCL_OBJECT) {
2285					parser->state = UCL_STATE_KEY;
2286				}
2287				else {
2288					/* Array */
2289					parser->state = UCL_STATE_VALUE;
2290				}
2291			}
2292			else {
2293				/* Skip everything at the end */
2294				return true;
2295			}
2296
2297			p = chunk->pos;
2298			break;
2299		case UCL_STATE_MACRO_NAME:
2300			if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
2301				if (!ucl_skip_macro_as_comment (parser, chunk)) {
2302					/* We have invalid macro */
2303					ucl_create_err (&parser->err,
2304							"error on line %d at column %d: invalid macro",
2305							chunk->line,
2306							chunk->column);
2307					parser->state = UCL_STATE_ERROR;
2308					return false;
2309				}
2310				else {
2311					p = chunk->pos;
2312					parser->state = parser->prev_state;
2313				}
2314			}
2315			else {
2316				if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
2317						*p != '(') {
2318					ucl_chunk_skipc (chunk, p);
2319				}
2320				else {
2321					if (c != NULL && p - c > 0) {
2322						/* We got macro name */
2323						macro_len = (size_t) (p - c);
2324						HASH_FIND (hh, parser->macroes, c, macro_len, macro);
2325						if (macro == NULL) {
2326							ucl_create_err (&parser->err,
2327									"error on line %d at column %d: "
2328									"unknown macro: '%.*s', character: '%c'",
2329									chunk->line,
2330									chunk->column,
2331									(int) (p - c),
2332									c,
2333									*chunk->pos);
2334							parser->state = UCL_STATE_ERROR;
2335							return false;
2336						}
2337						/* Now we need to skip all spaces */
2338						SKIP_SPACES_COMMENTS(parser, chunk, p);
2339						parser->state = UCL_STATE_MACRO;
2340					}
2341					else {
2342						/* We have invalid macro name */
2343						ucl_create_err (&parser->err,
2344								"error on line %d at column %d: invalid macro name",
2345								chunk->line,
2346								chunk->column);
2347						parser->state = UCL_STATE_ERROR;
2348						return false;
2349					}
2350				}
2351			}
2352			break;
2353		case UCL_STATE_MACRO:
2354			if (*chunk->pos == '(') {
2355				macro_args = ucl_parse_macro_arguments (parser, chunk);
2356				p = chunk->pos;
2357				if (macro_args) {
2358					SKIP_SPACES_COMMENTS(parser, chunk, p);
2359				}
2360			}
2361			else {
2362				macro_args = NULL;
2363			}
2364			if (!ucl_parse_macro_value (parser, chunk, macro,
2365					&macro_start, &macro_len)) {
2366				parser->prev_state = parser->state;
2367				parser->state = UCL_STATE_ERROR;
2368				return false;
2369			}
2370			macro_len = ucl_expand_variable (parser, &macro_escaped,
2371					macro_start, macro_len);
2372			parser->state = parser->prev_state;
2373
2374			if (macro_escaped == NULL && macro != NULL) {
2375				if (macro->is_context) {
2376					ret = macro->h.context_handler (macro_start, macro_len,
2377							macro_args,
2378							parser->top_obj,
2379							macro->ud);
2380				}
2381				else {
2382					ret = macro->h.handler (macro_start, macro_len, macro_args,
2383							macro->ud);
2384				}
2385			}
2386			else if (macro != NULL) {
2387				if (macro->is_context) {
2388					ret = macro->h.context_handler (macro_escaped, macro_len,
2389							macro_args,
2390							parser->top_obj,
2391							macro->ud);
2392				}
2393				else {
2394					ret = macro->h.handler (macro_escaped, macro_len, macro_args,
2395						macro->ud);
2396				}
2397
2398				UCL_FREE (macro_len + 1, macro_escaped);
2399			}
2400			else {
2401				ret = false;
2402				ucl_set_err (parser, UCL_EINTERNAL,
2403						"internal error: parser has macro undefined", &parser->err);
2404			}
2405
2406			/*
2407			 * Chunk can be modified within macro handler
2408			 */
2409			chunk = parser->chunks;
2410			p = chunk->pos;
2411
2412			if (macro_args) {
2413				ucl_object_unref (macro_args);
2414			}
2415
2416			if (!ret) {
2417				return false;
2418			}
2419			break;
2420		default:
2421			ucl_set_err (parser, UCL_EINTERNAL,
2422					"internal error: parser is in an unknown state", &parser->err);
2423			parser->state = UCL_STATE_ERROR;
2424			return false;
2425		}
2426	}
2427
2428	if (parser->last_comment) {
2429		if (parser->cur_obj) {
2430			ucl_attach_comment (parser, parser->cur_obj, true);
2431		}
2432		else if (parser->stack && parser->stack->obj) {
2433			ucl_attach_comment (parser, parser->stack->obj, true);
2434		}
2435		else if (parser->top_obj) {
2436			ucl_attach_comment (parser, parser->top_obj, true);
2437		}
2438		else {
2439			ucl_object_unref (parser->last_comment);
2440		}
2441	}
2442
2443	return true;
2444}
2445
2446struct ucl_parser*
2447ucl_parser_new (int flags)
2448{
2449	struct ucl_parser *parser;
2450
2451	parser = UCL_ALLOC (sizeof (struct ucl_parser));
2452	if (parser == NULL) {
2453		return NULL;
2454	}
2455
2456	memset (parser, 0, sizeof (struct ucl_parser));
2457
2458	ucl_parser_register_macro (parser, "include", ucl_include_handler, parser);
2459	ucl_parser_register_macro (parser, "try_include", ucl_try_include_handler, parser);
2460	ucl_parser_register_macro (parser, "includes", ucl_includes_handler, parser);
2461	ucl_parser_register_macro (parser, "priority", ucl_priority_handler, parser);
2462	ucl_parser_register_macro (parser, "load", ucl_load_handler, parser);
2463	ucl_parser_register_context_macro (parser, "inherit", ucl_inherit_handler, parser);
2464
2465	parser->flags = flags;
2466	parser->includepaths = NULL;
2467
2468	if (flags & UCL_PARSER_SAVE_COMMENTS) {
2469		parser->comments = ucl_object_typed_new (UCL_OBJECT);
2470	}
2471
2472	if (!(flags & UCL_PARSER_NO_FILEVARS)) {
2473		/* Initial assumption about filevars */
2474		ucl_parser_set_filevars (parser, NULL, false);
2475	}
2476
2477	return parser;
2478}
2479
2480bool
2481ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio)
2482{
2483	if (parser == NULL) {
2484		return false;
2485	}
2486
2487	parser->default_priority = prio;
2488
2489	return true;
2490}
2491
2492void
2493ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
2494		ucl_macro_handler handler, void* ud)
2495{
2496	struct ucl_macro *new;
2497
2498	if (macro == NULL || handler == NULL) {
2499		return;
2500	}
2501
2502	new = UCL_ALLOC (sizeof (struct ucl_macro));
2503	if (new == NULL) {
2504		return;
2505	}
2506
2507	memset (new, 0, sizeof (struct ucl_macro));
2508	new->h.handler = handler;
2509	new->name = strdup (macro);
2510	new->ud = ud;
2511	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2512}
2513
2514void
2515ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro,
2516		ucl_context_macro_handler handler, void* ud)
2517{
2518	struct ucl_macro *new;
2519
2520	if (macro == NULL || handler == NULL) {
2521		return;
2522	}
2523
2524	new = UCL_ALLOC (sizeof (struct ucl_macro));
2525	if (new == NULL) {
2526		return;
2527	}
2528
2529	memset (new, 0, sizeof (struct ucl_macro));
2530	new->h.context_handler = handler;
2531	new->name = strdup (macro);
2532	new->ud = ud;
2533	new->is_context = true;
2534	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2535}
2536
2537void
2538ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
2539		const char *value)
2540{
2541	struct ucl_variable *new = NULL, *cur;
2542
2543	if (var == NULL) {
2544		return;
2545	}
2546
2547	/* Find whether a variable already exists */
2548	LL_FOREACH (parser->variables, cur) {
2549		if (strcmp (cur->var, var) == 0) {
2550			new = cur;
2551			break;
2552		}
2553	}
2554
2555	if (value == NULL) {
2556
2557		if (new != NULL) {
2558			/* Remove variable */
2559			DL_DELETE (parser->variables, new);
2560			free (new->var);
2561			free (new->value);
2562			UCL_FREE (sizeof (struct ucl_variable), new);
2563		}
2564		else {
2565			/* Do nothing */
2566			return;
2567		}
2568	}
2569	else {
2570		if (new == NULL) {
2571			new = UCL_ALLOC (sizeof (struct ucl_variable));
2572			if (new == NULL) {
2573				return;
2574			}
2575			memset (new, 0, sizeof (struct ucl_variable));
2576			new->var = strdup (var);
2577			new->var_len = strlen (var);
2578			new->value = strdup (value);
2579			new->value_len = strlen (value);
2580
2581			DL_APPEND (parser->variables, new);
2582		}
2583		else {
2584			free (new->value);
2585			new->value = strdup (value);
2586			new->value_len = strlen (value);
2587		}
2588	}
2589}
2590
2591void
2592ucl_parser_set_variables_handler (struct ucl_parser *parser,
2593		ucl_variable_handler handler, void *ud)
2594{
2595	parser->var_handler = handler;
2596	parser->var_data = ud;
2597}
2598
2599bool
2600ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
2601		size_t len, unsigned priority, enum ucl_duplicate_strategy strat,
2602		enum ucl_parse_type parse_type)
2603{
2604	struct ucl_chunk *chunk;
2605
2606	if (parser == NULL) {
2607		return false;
2608	}
2609
2610	if (data == NULL && len != 0) {
2611		ucl_create_err (&parser->err, "invalid chunk added");
2612		return false;
2613	}
2614
2615	if (parser->state != UCL_STATE_ERROR) {
2616		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
2617		if (chunk == NULL) {
2618			ucl_create_err (&parser->err, "cannot allocate chunk structure");
2619			return false;
2620		}
2621
2622		if (parse_type == UCL_PARSE_AUTO && len > 0) {
2623			/* We need to detect parse type by the first symbol */
2624			if ((*data & 0x80) == 0x80 && (*data >= 0xdc && *data <= 0xdf)) {
2625				parse_type = UCL_PARSE_MSGPACK;
2626			}
2627			else if (*data == '(') {
2628				parse_type = UCL_PARSE_CSEXP;
2629			}
2630			else {
2631				parse_type = UCL_PARSE_UCL;
2632			}
2633		}
2634
2635		chunk->begin = data;
2636		chunk->remain = len;
2637		chunk->pos = chunk->begin;
2638		chunk->end = chunk->begin + len;
2639		chunk->line = 1;
2640		chunk->column = 0;
2641		chunk->priority = priority;
2642		chunk->strategy = strat;
2643		chunk->parse_type = parse_type;
2644		LL_PREPEND (parser->chunks, chunk);
2645		parser->recursion ++;
2646
2647		if (parser->recursion > UCL_MAX_RECURSION) {
2648			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
2649					parser->recursion);
2650			return false;
2651		}
2652
2653		if (len > 0) {
2654			/* Need to parse something */
2655			switch (parse_type) {
2656			default:
2657			case UCL_PARSE_UCL:
2658				return ucl_state_machine (parser);
2659			case UCL_PARSE_MSGPACK:
2660				return ucl_parse_msgpack (parser);
2661			case UCL_PARSE_CSEXP:
2662				return ucl_parse_csexp (parser);
2663			}
2664		}
2665		else {
2666			/* Just add empty chunk and go forward */
2667			if (parser->top_obj == NULL) {
2668				/*
2669				 * In case of empty object, create one to indicate that we've
2670				 * read something
2671				 */
2672				parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
2673			}
2674
2675			return true;
2676		}
2677	}
2678
2679	ucl_create_err (&parser->err, "a parser is in an invalid state");
2680
2681	return false;
2682}
2683
2684bool
2685ucl_parser_add_chunk_priority (struct ucl_parser *parser,
2686		const unsigned char *data, size_t len, unsigned priority)
2687{
2688	/* We dereference parser, so this check is essential */
2689	if (parser == NULL) {
2690		return false;
2691	}
2692
2693	return ucl_parser_add_chunk_full (parser, data, len,
2694				priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
2695}
2696
2697bool
2698ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
2699		size_t len)
2700{
2701	if (parser == NULL) {
2702		return false;
2703	}
2704
2705	return ucl_parser_add_chunk_full (parser, data, len,
2706			parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
2707}
2708
2709bool
2710ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data,
2711		size_t len, unsigned priority)
2712{
2713	if (data == NULL) {
2714		ucl_create_err (&parser->err, "invalid string added");
2715		return false;
2716	}
2717	if (len == 0) {
2718		len = strlen (data);
2719	}
2720
2721	return ucl_parser_add_chunk_priority (parser,
2722			(const unsigned char *)data, len, priority);
2723}
2724
2725bool
2726ucl_parser_add_string (struct ucl_parser *parser, const char *data,
2727		size_t len)
2728{
2729	if (parser == NULL) {
2730		return false;
2731	}
2732
2733	return ucl_parser_add_string_priority (parser,
2734			(const unsigned char *)data, len, parser->default_priority);
2735}
2736
2737bool
2738ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths)
2739{
2740	if (parser == NULL || paths == NULL) {
2741		return false;
2742	}
2743
2744	if (parser->includepaths == NULL) {
2745		parser->includepaths = ucl_object_copy (paths);
2746	}
2747	else {
2748		ucl_object_unref (parser->includepaths);
2749		parser->includepaths = ucl_object_copy (paths);
2750	}
2751
2752	if (parser->includepaths == NULL) {
2753		return false;
2754	}
2755
2756	return true;
2757}
2758