ucl_internal.h revision 298166
1/* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *       * Redistributions of source code must retain the above copyright
7 *         notice, this list of conditions and the following disclaimer.
8 *       * Redistributions in binary form must reproduce the above copyright
9 *         notice, this list of conditions and the following disclaimer in the
10 *         documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24#ifndef UCL_INTERNAL_H_
25#define UCL_INTERNAL_H_
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#else
30/* Help embedded builds */
31#define HAVE_SYS_TYPES_H
32#define HAVE_SYS_MMAN_H
33#define HAVE_SYS_STAT_H
34#define HAVE_SYS_PARAM_H
35#define HAVE_LIMITS_H
36#define HAVE_FCNTL_H
37#define HAVE_ERRNO_H
38#define HAVE_UNISTD_H
39#define HAVE_CTYPE_H
40#define HAVE_STDIO_H
41#define HAVE_STRING_H
42#define HAVE_FLOAT_H
43#define HAVE_LIBGEN_H
44#define HAVE_MATH_H
45#define HAVE_STDBOOL_H
46#define HAVE_STDINT_H
47#define HAVE_STDARG_H
48#ifndef _WIN32
49# define HAVE_REGEX_H
50#endif
51#endif
52
53#ifdef HAVE_SYS_TYPES_H
54#include <sys/types.h>
55#endif
56
57#ifdef HAVE_SYS_MMAN_H
58# ifndef _WIN32
59#  include <sys/mman.h>
60# endif
61#endif
62#ifdef HAVE_SYS_STAT_H
63#include <sys/stat.h>
64#endif
65#ifdef HAVE_SYS_PARAM_H
66#include <sys/param.h>
67#endif
68
69#ifdef HAVE_LIMITS_H
70#include <limits.h>
71#endif
72#ifdef HAVE_FCNTL_H
73#include <fcntl.h>
74#endif
75#ifdef HAVE_ERRNO_H
76#include <errno.h>
77#endif
78#ifdef HAVE_UNISTD_H
79#include <unistd.h>
80#endif
81#ifdef HAVE_CTYPE_H
82#include <ctype.h>
83#endif
84#ifdef HAVE_STDIO_H
85#include <stdio.h>
86#endif
87#ifdef HAVE_STRING_H
88#include <string.h>
89#endif
90
91#include "utlist.h"
92#include "utstring.h"
93#include "uthash.h"
94#include "ucl.h"
95#include "ucl_hash.h"
96#include "xxhash.h"
97
98#ifdef HAVE_OPENSSL
99#include <openssl/evp.h>
100#endif
101
102#ifndef __DECONST
103#define __DECONST(type, var)    ((type)(uintptr_t)(const void *)(var))
104#endif
105
106/**
107 * @file rcl_internal.h
108 * Internal structures and functions of UCL library
109 */
110
111#define UCL_MAX_RECURSION 16
112#define UCL_TRASH_KEY 0
113#define UCL_TRASH_VALUE 1
114
115enum ucl_parser_state {
116	UCL_STATE_INIT = 0,
117	UCL_STATE_OBJECT,
118	UCL_STATE_ARRAY,
119	UCL_STATE_KEY,
120	UCL_STATE_VALUE,
121	UCL_STATE_AFTER_VALUE,
122	UCL_STATE_ARRAY_VALUE,
123	UCL_STATE_SCOMMENT,
124	UCL_STATE_MCOMMENT,
125	UCL_STATE_MACRO_NAME,
126	UCL_STATE_MACRO,
127	UCL_STATE_ERROR
128};
129
130enum ucl_character_type {
131	UCL_CHARACTER_DENIED = 0,
132	UCL_CHARACTER_KEY = 1,
133	UCL_CHARACTER_KEY_START = 1 << 1,
134	UCL_CHARACTER_WHITESPACE = 1 << 2,
135	UCL_CHARACTER_WHITESPACE_UNSAFE = 1 << 3,
136	UCL_CHARACTER_VALUE_END = 1 << 4,
137	UCL_CHARACTER_VALUE_STR = 1 << 5,
138	UCL_CHARACTER_VALUE_DIGIT = 1 << 6,
139	UCL_CHARACTER_VALUE_DIGIT_START = 1 << 7,
140	UCL_CHARACTER_ESCAPE = 1 << 8,
141	UCL_CHARACTER_KEY_SEP = 1 << 9,
142	UCL_CHARACTER_JSON_UNSAFE = 1 << 10,
143	UCL_CHARACTER_UCL_UNSAFE = 1 << 11
144};
145
146struct ucl_macro {
147	char *name;
148	union {
149		ucl_macro_handler handler;
150		ucl_context_macro_handler context_handler;
151	} h;
152	void* ud;
153	bool is_context;
154	UT_hash_handle hh;
155};
156
157struct ucl_stack {
158	ucl_object_t *obj;
159	struct ucl_stack *next;
160	uint64_t level;
161};
162
163struct ucl_chunk {
164	const unsigned char *begin;
165	const unsigned char *end;
166	const unsigned char *pos;
167	size_t remain;
168	unsigned int line;
169	unsigned int column;
170	unsigned priority;
171	enum ucl_duplicate_strategy strategy;
172	enum ucl_parse_type parse_type;
173	struct ucl_chunk *next;
174};
175
176#ifdef HAVE_OPENSSL
177struct ucl_pubkey {
178	EVP_PKEY *key;
179	struct ucl_pubkey *next;
180};
181#else
182struct ucl_pubkey {
183	struct ucl_pubkey *next;
184};
185#endif
186
187struct ucl_variable {
188	char *var;
189	char *value;
190	size_t var_len;
191	size_t value_len;
192	struct ucl_variable *prev, *next;
193};
194
195struct ucl_parser {
196	enum ucl_parser_state state;
197	enum ucl_parser_state prev_state;
198	unsigned int recursion;
199	int flags;
200	unsigned default_priority;
201	int err_code;
202	ucl_object_t *top_obj;
203	ucl_object_t *cur_obj;
204	ucl_object_t *trash_objs;
205	ucl_object_t *includepaths;
206	char *cur_file;
207	struct ucl_macro *macroes;
208	struct ucl_stack *stack;
209	struct ucl_chunk *chunks;
210	struct ucl_pubkey *keys;
211	struct ucl_variable *variables;
212	ucl_variable_handler var_handler;
213	void *var_data;
214	ucl_object_t *comments;
215	ucl_object_t *last_comment;
216	UT_string *err;
217};
218
219struct ucl_object_userdata {
220	ucl_object_t obj;
221	ucl_userdata_dtor dtor;
222	ucl_userdata_emitter emitter;
223};
224
225/**
226 * Unescape json string inplace
227 * @param str
228 */
229size_t ucl_unescape_json_string (char *str, size_t len);
230
231/**
232 * Handle include macro
233 * @param data include data
234 * @param len length of data
235 * @param args UCL object representing arguments to the macro
236 * @param ud user data
237 * @return
238 */
239bool ucl_include_handler (const unsigned char *data, size_t len,
240		const ucl_object_t *args, void* ud);
241
242/**
243 * Handle tryinclude macro
244 * @param data include data
245 * @param len length of data
246 * @param args UCL object representing arguments to the macro
247 * @param ud user data
248 * @return
249 */
250bool ucl_try_include_handler (const unsigned char *data, size_t len,
251		const ucl_object_t *args, void* ud);
252
253/**
254 * Handle includes macro
255 * @param data include data
256 * @param len length of data
257 * @param args UCL object representing arguments to the macro
258 * @param ud user data
259 * @return
260 */
261bool ucl_includes_handler (const unsigned char *data, size_t len,
262		const ucl_object_t *args, void* ud);
263
264/**
265 * Handle priority macro
266 * @param data include data
267 * @param len length of data
268 * @param args UCL object representing arguments to the macro
269 * @param ud user data
270 * @return
271 */
272bool ucl_priority_handler (const unsigned char *data, size_t len,
273		const ucl_object_t *args, void* ud);
274
275/**
276 * Handle load macro
277 * @param data include data
278 * @param len length of data
279 * @param args UCL object representing arguments to the macro
280 * @param ud user data
281 * @return
282 */
283bool ucl_load_handler (const unsigned char *data, size_t len,
284		const ucl_object_t *args, void* ud);
285/**
286 * Handle inherit macro
287 * @param data include data
288 * @param len length of data
289 * @param args UCL object representing arguments to the macro
290 * @param ctx the current context object
291 * @param ud user data
292 * @return
293 */
294bool ucl_inherit_handler (const unsigned char *data, size_t len,
295		const ucl_object_t *args, const ucl_object_t *ctx, void* ud);
296
297size_t ucl_strlcpy (char *dst, const char *src, size_t siz);
298size_t ucl_strlcpy_unsafe (char *dst, const char *src, size_t siz);
299size_t ucl_strlcpy_tolower (char *dst, const char *src, size_t siz);
300
301char *ucl_strnstr (const char *s, const char *find, int len);
302char *ucl_strncasestr (const char *s, const char *find, int len);
303
304#ifdef __GNUC__
305static inline void
306ucl_create_err (UT_string **err, const char *fmt, ...)
307__attribute__ (( format( printf, 2, 3) ));
308#endif
309
310#undef UCL_FATAL_ERRORS
311
312static inline void
313ucl_create_err (UT_string **err, const char *fmt, ...)
314{
315	if (*err == NULL) {
316		utstring_new (*err);
317		va_list ap;
318		va_start (ap, fmt);
319		utstring_printf_va (*err, fmt, ap);
320		va_end (ap);
321	}
322
323#ifdef UCL_FATAL_ERRORS
324	assert (0);
325#endif
326}
327
328/**
329 * Check whether a given string contains a boolean value
330 * @param obj object to set
331 * @param start start of a string
332 * @param len length of a string
333 * @return true if a string is a boolean value
334 */
335static inline bool
336ucl_maybe_parse_boolean (ucl_object_t *obj, const unsigned char *start, size_t len)
337{
338	const char *p = (const char *)start;
339	bool ret = false, val = false;
340
341	if (len == 5) {
342		if ((p[0] == 'f' || p[0] == 'F') && strncasecmp (p, "false", 5) == 0) {
343			ret = true;
344			val = false;
345		}
346	}
347	else if (len == 4) {
348		if ((p[0] == 't' || p[0] == 'T') && strncasecmp (p, "true", 4) == 0) {
349			ret = true;
350			val = true;
351		}
352	}
353	else if (len == 3) {
354		if ((p[0] == 'y' || p[0] == 'Y') && strncasecmp (p, "yes", 3) == 0) {
355			ret = true;
356			val = true;
357		}
358		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "off", 3) == 0) {
359			ret = true;
360			val = false;
361		}
362	}
363	else if (len == 2) {
364		if ((p[0] == 'n' || p[0] == 'N') && strncasecmp (p, "no", 2) == 0) {
365			ret = true;
366			val = false;
367		}
368		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "on", 2) == 0) {
369			ret = true;
370			val = true;
371		}
372	}
373
374	if (ret && obj != NULL) {
375		obj->type = UCL_BOOLEAN;
376		obj->value.iv = val;
377	}
378
379	return ret;
380}
381
382/**
383 * Check numeric string
384 * @param obj object to set if a string is numeric
385 * @param start start of string
386 * @param end end of string
387 * @param pos position where parsing has stopped
388 * @param allow_double allow parsing of floating point values
389 * @return 0 if string is numeric and error code (EINVAL or ERANGE) in case of conversion error
390 */
391int ucl_maybe_parse_number (ucl_object_t *obj,
392		const char *start, const char *end, const char **pos,
393		bool allow_double, bool number_bytes, bool allow_time);
394
395
396static inline const ucl_object_t *
397ucl_hash_search_obj (ucl_hash_t* hashlin, ucl_object_t *obj)
398{
399	return (const ucl_object_t *)ucl_hash_search (hashlin, obj->key, obj->keylen);
400}
401
402static inline ucl_hash_t * ucl_hash_insert_object (ucl_hash_t *hashlin,
403		const ucl_object_t *obj,
404		bool ignore_case) UCL_WARN_UNUSED_RESULT;
405
406static inline ucl_hash_t *
407ucl_hash_insert_object (ucl_hash_t *hashlin,
408		const ucl_object_t *obj,
409		bool ignore_case)
410{
411	if (hashlin == NULL) {
412		hashlin = ucl_hash_create (ignore_case);
413	}
414	ucl_hash_insert (hashlin, obj, obj->key, obj->keylen);
415
416	return hashlin;
417}
418
419/**
420 * Get standard emitter context for a specified emit_type
421 * @param emit_type type of emitter
422 * @return context or NULL if input is invalid
423 */
424const struct ucl_emitter_context *
425ucl_emit_get_standard_context (enum ucl_emitter emit_type);
426
427/**
428 * Serialize string as JSON string
429 * @param str string to emit
430 * @param buf target buffer
431 */
432void ucl_elt_string_write_json (const char *str, size_t size,
433		struct ucl_emitter_context *ctx);
434
435/**
436 * Write multiline string using `EOD` as string terminator
437 * @param str
438 * @param size
439 * @param ctx
440 */
441void ucl_elt_string_write_multiline (const char *str, size_t size,
442		struct ucl_emitter_context *ctx);
443
444/**
445 * Emit a single object to string
446 * @param obj
447 * @return
448 */
449unsigned char * ucl_object_emit_single_json (const ucl_object_t *obj);
450
451/**
452 * Check whether a specified string is long and should be likely printed in
453 * multiline mode
454 * @param obj
455 * @return
456 */
457bool ucl_maybe_long_string (const ucl_object_t *obj);
458
459/**
460 * Print integer to the msgpack output
461 * @param ctx
462 * @param val
463 */
464void ucl_emitter_print_int_msgpack (struct ucl_emitter_context *ctx,
465		int64_t val);
466/**
467 * Print integer to the msgpack output
468 * @param ctx
469 * @param val
470 */
471void ucl_emitter_print_double_msgpack (struct ucl_emitter_context *ctx,
472		double val);
473/**
474 * Print double to the msgpack output
475 * @param ctx
476 * @param val
477 */
478void ucl_emitter_print_bool_msgpack (struct ucl_emitter_context *ctx,
479		bool val);
480/**
481 * Print string to the msgpack output
482 * @param ctx
483 * @param s
484 * @param len
485 */
486void ucl_emitter_print_string_msgpack (struct ucl_emitter_context *ctx,
487		const char *s, size_t len);
488
489/**
490 * Print binary string to the msgpack output
491 * @param ctx
492 * @param s
493 * @param len
494 */
495void ucl_emitter_print_binary_string_msgpack (struct ucl_emitter_context *ctx,
496		const char *s, size_t len);
497
498/**
499 * Print array preamble for msgpack
500 * @param ctx
501 * @param len
502 */
503void ucl_emitter_print_array_msgpack (struct ucl_emitter_context *ctx,
504		size_t len);
505
506/**
507 * Print object preamble for msgpack
508 * @param ctx
509 * @param len
510 */
511void ucl_emitter_print_object_msgpack (struct ucl_emitter_context *ctx,
512		size_t len);
513/**
514 * Print NULL to the msgpack output
515 * @param ctx
516 */
517void ucl_emitter_print_null_msgpack (struct ucl_emitter_context *ctx);
518/**
519 * Print object's key if needed to the msgpack output
520 * @param print_key
521 * @param ctx
522 * @param obj
523 */
524void ucl_emitter_print_key_msgpack (bool print_key,
525		struct ucl_emitter_context *ctx,
526		const ucl_object_t *obj);
527
528/**
529 * Fetch URL into a buffer
530 * @param url url to fetch
531 * @param buf pointer to buffer (must be freed by callee)
532 * @param buflen pointer to buffer length
533 * @param err pointer to error argument
534 * @param must_exist fail if cannot find a url
535 */
536bool ucl_fetch_url (const unsigned char *url,
537		unsigned char **buf,
538		size_t *buflen,
539		UT_string **err,
540		bool must_exist);
541
542/**
543 * Fetch a file and save results to the memory buffer
544 * @param filename filename to fetch
545 * @param len length of filename
546 * @param buf target buffer
547 * @param buflen target length
548 * @return
549 */
550bool ucl_fetch_file (const unsigned char *filename,
551		unsigned char **buf,
552		size_t *buflen,
553		UT_string **err,
554		bool must_exist);
555
556/**
557 * Add new element to an object using the current merge strategy and priority
558 * @param parser
559 * @param nobj
560 * @return
561 */
562bool ucl_parser_process_object_element (struct ucl_parser *parser,
563		ucl_object_t *nobj);
564
565/**
566 * Parse msgpack chunk
567 * @param parser
568 * @return
569 */
570bool ucl_parse_msgpack (struct ucl_parser *parser);
571
572#endif /* UCL_INTERNAL_H_ */
573