1262395Sbapt/* Copyright (c) 2013, Vsevolod Stakhov
2262395Sbapt * All rights reserved.
3262395Sbapt *
4262395Sbapt * Redistribution and use in source and binary forms, with or without
5262395Sbapt * modification, are permitted provided that the following conditions are met:
6262395Sbapt *       * Redistributions of source code must retain the above copyright
7262395Sbapt *         notice, this list of conditions and the following disclaimer.
8262395Sbapt *       * Redistributions in binary form must reproduce the above copyright
9262395Sbapt *         notice, this list of conditions and the following disclaimer in the
10262395Sbapt *         documentation and/or other materials provided with the distribution.
11262395Sbapt *
12262395Sbapt * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13262395Sbapt * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14262395Sbapt * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15262395Sbapt * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16262395Sbapt * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17262395Sbapt * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18262395Sbapt * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19262395Sbapt * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20262395Sbapt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21262395Sbapt * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22262395Sbapt */
23262395Sbapt
24262395Sbapt/**
25262395Sbapt * @file this utility generates character table for ucl
26262395Sbapt */
27262395Sbapt
28262395Sbapt#include <stdio.h>
29262395Sbapt#include <ctype.h>
30262395Sbapt#include <stdbool.h>
31262395Sbapt
32262395Sbaptstatic inline int
33262395Sbaptprint_flag (const char *flag, bool *need_or, char *val)
34262395Sbapt{
35262395Sbapt	int res;
36262395Sbapt	res = sprintf (val, "%s%s", *need_or ? "|" : "", flag);
37262395Sbapt
38262395Sbapt	*need_or |= true;
39262395Sbapt
40262395Sbapt	return res;
41262395Sbapt}
42262395Sbapt
43262395Sbaptint
44262395Sbaptmain (int argc, char **argv)
45262395Sbapt{
46262395Sbapt	int i, col, r;
47262395Sbapt	const char *name = "ucl_chartable";
48262395Sbapt	bool need_or;
49262395Sbapt	char valbuf[2048];
50262395Sbapt
51262395Sbapt	col = 0;
52262395Sbapt
53262395Sbapt	if (argc > 1) {
54262395Sbapt		name = argv[1];
55262395Sbapt	}
56262395Sbapt
57290071Sbapt	printf ("static const unsigned int %s[256] = {\n", name);
58262395Sbapt
59290071Sbapt	for (i = 0; i < 256; i ++) {
60262395Sbapt		need_or = false;
61262395Sbapt		r = 0;
62262395Sbapt		/* UCL_CHARACTER_VALUE_END */
63262395Sbapt
64262395Sbapt		if (i == ' ' || i == '\t') {
65262395Sbapt			r += print_flag ("UCL_CHARACTER_WHITESPACE", &need_or, valbuf + r);
66262395Sbapt		}
67262395Sbapt		if (isspace (i)) {
68262395Sbapt			r += print_flag ("UCL_CHARACTER_WHITESPACE_UNSAFE", &need_or, valbuf + r);
69262395Sbapt		}
70262395Sbapt		if (isalnum (i) || i >= 0x80 || i == '/' || i == '_') {
71262395Sbapt			r += print_flag ("UCL_CHARACTER_KEY_START", &need_or, valbuf + r);
72262395Sbapt		}
73262395Sbapt		if (isalnum (i) || i == '-' || i == '_' || i == '/' || i == '.' || i >= 0x80) {
74262395Sbapt			r += print_flag ("UCL_CHARACTER_KEY", &need_or, valbuf + r);
75262395Sbapt		}
76262395Sbapt		if (i == 0 || i == '\r' || i == '\n' || i == ']' || i == '}' || i == ';' || i == ',' || i == '#') {
77262395Sbapt			r += print_flag ("UCL_CHARACTER_VALUE_END", &need_or, valbuf + r);
78262395Sbapt		}
79262395Sbapt		else {
80262395Sbapt			if (isprint (i) || i >= 0x80) {
81262395Sbapt				r += print_flag ("UCL_CHARACTER_VALUE_STR", &need_or, valbuf + r);
82262395Sbapt			}
83262395Sbapt			if (isdigit (i) || i == '-') {
84262395Sbapt				r += print_flag ("UCL_CHARACTER_VALUE_DIGIT_START", &need_or, valbuf + r);
85262395Sbapt			}
86262395Sbapt			if (isalnum (i) || i == '.' || i == '-' || i == '+') {
87262395Sbapt				r += print_flag ("UCL_CHARACTER_VALUE_DIGIT", &need_or, valbuf + r);
88262395Sbapt			}
89262395Sbapt		}
90262395Sbapt		if (i == '"' || i == '\\' || i == '/' || i == 'b' ||
91262395Sbapt			i == 'f' || i == 'n' || i == 'r' || i == 't' || i == 'u') {
92262395Sbapt			r += print_flag ("UCL_CHARACTER_ESCAPE", &need_or, valbuf + r);
93262395Sbapt		}
94262395Sbapt		if (i == ' ' || i == '\t' || i == ':' || i == '=') {
95262395Sbapt			r += print_flag ("UCL_CHARACTER_KEY_SEP", &need_or, valbuf + r);
96262395Sbapt		}
97262395Sbapt		if (i == '\n' || i == '\r' || i == '\\' || i == '\b' || i == '\t' ||
98262395Sbapt				i == '"' || i == '\f') {
99262395Sbapt			r += print_flag ("UCL_CHARACTER_JSON_UNSAFE", &need_or, valbuf + r);
100262395Sbapt		}
101262395Sbapt		if (i == '\n' || i == '\r' || i == '\\' || i == '\b' || i == '\t' ||
102262395Sbapt				i == '"' || i == '\f' || i == '=' || i == ':' || i == '{' || i == '[' || i == ' ') {
103262395Sbapt			r += print_flag ("UCL_CHARACTER_UCL_UNSAFE", &need_or, valbuf + r);
104262395Sbapt		}
105262395Sbapt
106262395Sbapt		if (!need_or) {
107262395Sbapt			r += print_flag ("UCL_CHARACTER_DENIED", &need_or, valbuf + r);
108262395Sbapt		}
109262395Sbapt
110262395Sbapt		if (isprint (i)) {
111262395Sbapt			r += sprintf (valbuf + r, " /* %c */", i);
112262395Sbapt		}
113290071Sbapt		if (i != 255) {
114262395Sbapt			r += sprintf (valbuf + r, ", ");
115262395Sbapt		}
116262395Sbapt		col += r;
117262395Sbapt		if (col > 80) {
118262395Sbapt			printf ("\n%s", valbuf);
119262395Sbapt			col = r;
120262395Sbapt		}
121262395Sbapt		else {
122262395Sbapt			printf ("%s", valbuf);
123262395Sbapt		}
124262395Sbapt	}
125262395Sbapt	printf ("\n}\n");
126262395Sbapt
127262395Sbapt	return 0;
128262395Sbapt}
129