1/****************************************************************************
2 * Copyright 2018-2020,2024 Thomas E. Dickey                                *
3 * Copyright 2009-2013,2017 Free Software Foundation, Inc.                  *
4 *                                                                          *
5 * Permission is hereby granted, free of charge, to any person obtaining a  *
6 * copy of this software and associated documentation files (the            *
7 * "Software"), to deal in the Software without restriction, including      *
8 * without limitation the rights to use, copy, modify, merge, publish,      *
9 * distribute, distribute with modifications, sublicense, and/or sell       *
10 * copies of the Software, and to permit persons to whom the Software is    *
11 * furnished to do so, subject to the following conditions:                 *
12 *                                                                          *
13 * The above copyright notice and this permission notice shall be included  *
14 * in all copies or substantial portions of the Software.                   *
15 *                                                                          *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS  *
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF               *
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.   *
19 * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,   *
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR    *
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR    *
22 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.                               *
23 *                                                                          *
24 * Except as contained in this notice, the name(s) of the above copyright   *
25 * holders shall not be used in advertising or otherwise to promote the     *
26 * sale, use or other dealings in this Software without prior written       *
27 * authorization.                                                           *
28 ****************************************************************************/
29
30/****************************************************************************
31 *  Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995               *
32 *     and: Eric S. Raymond <esr@snark.thyrsus.com>                         *
33 *     and: Thomas E. Dickey                        1996-on                 *
34 ****************************************************************************/
35
36/*
37 *	make_hash.c --- build-time program for constructing comp_captab.c
38 */
39
40#include <build.priv.h>
41
42#include <tic.h>
43#include <hashsize.h>
44
45#include <ctype.h>
46
47MODULE_ID("$Id: make_hash.c,v 1.34 2024/03/02 19:35:40 tom Exp $")
48
49/*
50 *	_nc_make_hash_table()
51 *
52 *	Takes the entries in table[] and hashes them into hash_table[]
53 *	by name.  There are CAPTABSIZE entries in the predefined table[]
54 *	and HASHTABSIZE slots in hash_table[].
55 *
56 */
57
58#undef MODULE_ID
59#define MODULE_ID(id)		/*nothing */
60#include <tinfo/doalloc.c>
61
62#define L_PAREN "("
63#define R_PAREN ")"
64#define L_BRACE "{"
65#define R_BRACE "}"
66
67static const char *typenames[] =
68{"BOOLEAN", "NUMBER", "STRING"};
69
70static void
71failed(const char *s)
72{
73    perror(s);
74    exit(EXIT_FAILURE);
75}
76
77static char *
78strmalloc(char *s)
79{
80    size_t need = strlen(s) + 1;
81    char *result = malloc(need);
82    if (result == 0)
83	failed("strmalloc");
84    _nc_STRCPY(result, s, need);
85    return result;
86}
87
88/*
89 *	int hash_function(string)
90 *
91 *	Computes the hashing function on the given string.
92 *
93 *	The current hash function is the sum of each consecutive pair
94 *	of characters, taken as two-byte integers, mod HASHTABSIZE.
95 *
96 */
97
98static int
99hash_function(const char *string)
100{
101    long sum = 0;
102
103    while (*string) {
104	sum += (long) (UChar(*string) + (UChar(*(string + 1)) << 8));
105	string++;
106    }
107
108    return (int) (sum % HASHTABSIZE);
109}
110
111#define UNUSED -1
112
113static void
114_nc_make_hash_table(struct user_table_entry *table,
115		    HashValue * hash_table,
116		    unsigned tablesize)
117{
118    unsigned i;
119    int hashvalue;
120    int collisions = 0;
121
122    for (i = 0; i < HASHTABSIZE; i++) {
123	hash_table[i] = UNUSED;
124    }
125    for (i = 0; i < tablesize; i++) {
126	hashvalue = hash_function(table[i].ute_name);
127
128	if (hash_table[hashvalue] >= 0)
129	    collisions++;
130
131	if (hash_table[hashvalue] != UNUSED) {
132	    table[i].ute_link = hash_table[hashvalue];
133	}
134	hash_table[hashvalue] = (HashValue) i;
135    }
136
137    printf("/* %d collisions out of %d entries */\n", collisions, tablesize);
138}
139
140/*
141 * This filter reads from standard input a list of tab-delimited columns,
142 * (e.g., from Caps.filtered) computes the hash-value of a specified column and
143 * writes the hashed tables to standard output.
144 *
145 * By compiling the hash table at build time, we're able to make the entire
146 * set of terminfo and termcap tables readonly (and also provide some runtime
147 * performance enhancement).
148 */
149
150#define MAX_COLUMNS BUFSIZ	/* this _has_ to be worst-case */
151
152static int
153count_columns(char **list)
154{
155    int result = 0;
156    if (list != 0) {
157	while (*list++) {
158	    ++result;
159	}
160    }
161    return result;
162}
163
164static char **
165parse_columns(char *buffer)
166{
167    static char **list;
168
169    int col = 0;
170
171    if (buffer == 0) {
172	free(list);
173	list = 0;
174	return 0;
175    }
176
177    if (*buffer != '#') {
178	if (list == 0) {
179	    list = typeCalloc(char *, (MAX_COLUMNS + 1));
180	    if (list == 0)
181		return (0);
182	}
183	while (*buffer != '\0') {
184	    char *s;
185	    for (s = buffer; (*s != '\0') && !isspace(UChar(*s)); s++)
186		/*EMPTY */ ;
187	    if (s != buffer) {
188		char mark = *s;
189		*s = '\0';
190		if ((s - buffer) > 1
191		    && (*buffer == '"')
192		    && (s[-1] == '"')) {	/* strip the quotes */
193		    assert(s > buffer + 1);
194		    s[-1] = '\0';
195		    buffer++;
196		}
197		list[col] = buffer;
198		col++;
199		if (mark == '\0')
200		    break;
201		while (*++s && isspace(UChar(*s)))
202		    /*EMPTY */ ;
203		buffer = s;
204	    } else
205		break;
206	}
207    }
208    return col ? list : 0;
209}
210
211#define SetType(n,t) \
212	if (is_user) \
213	    name_table[n].ute_type |= (int)(1 << (t)); \
214	else \
215	    name_table[n].ute_type = (t)
216
217#define GetType(n) \
218	(is_user \
219	 ? get_type(name_table[n].ute_type) \
220	 : typenames[name_table[n].ute_type])
221
222static char *
223get_type(int type_mask)
224{
225    static char result[80];
226    unsigned n;
227    _nc_STRCPY(result, L_PAREN, sizeof(result));
228    for (n = 0; n < 3; ++n) {
229	if ((1 << n) & type_mask) {
230	    size_t want = 5 + strlen(typenames[n]);
231	    if (want > sizeof(result)) {
232		fprintf(stderr, "Buffer is not large enough for %s + %s\n",
233			result, typenames[n]);
234		exit(EXIT_FAILURE);
235	    }
236	    if (result[1])
237		_nc_STRCAT(result, "|", sizeof(result));
238	    _nc_STRCAT(result, "1<<", sizeof(result));
239	    _nc_STRCAT(result, typenames[n], sizeof(result));
240	}
241    }
242    _nc_STRCAT(result, R_PAREN, sizeof(result));
243    return result;
244}
245
246int
247main(int argc, char **argv)
248{
249    unsigned tablesize = CAPTABSIZE;
250    struct user_table_entry *name_table = typeCalloc(struct
251						     user_table_entry, tablesize);
252    HashValue *hash_table = typeCalloc(HashValue, HASHTABSIZE);
253    const char *root_name;
254    int column = 0;
255    int bigstring = 0;
256    unsigned n;
257    unsigned nn;
258    unsigned tableused = 0;
259    bool is_user;
260    const char *table_name;
261    char buffer[BUFSIZ];
262
263    short BoolCount = 0;
264    short NumCount = 0;
265    short StrCount = 0;
266
267    /* The first argument is the column-number (starting with 0).
268     * The second is the root name of the tables to generate.
269     */
270    if (argc <= 3
271	|| (column = atoi(argv[1])) <= 0
272	|| (column >= MAX_COLUMNS)
273	|| *(root_name = argv[2]) == 0
274	|| (bigstring = atoi(argv[3])) < 0
275	|| name_table == 0
276	|| hash_table == 0) {
277	fprintf(stderr, "usage: make_hash column root_name bigstring\n");
278	exit(EXIT_FAILURE);
279    }
280    is_user = (*root_name == 'u');
281    table_name = (is_user ? "user" : "name");
282
283    /*
284     * Read the table into our arrays.
285     */
286    for (n = 0; (n < tablesize) && fgets(buffer, BUFSIZ, stdin);) {
287	char **list;
288	char *nlp = strchr(buffer, '\n');
289	if (nlp)
290	    *nlp = '\0';
291	else
292	    buffer[sizeof(buffer) - 2] = '\0';
293	list = parse_columns(buffer);
294	if (list == 0)		/* blank or comment */
295	    continue;
296	if (is_user) {
297	    if (strcmp(list[0], "userdef"))
298		continue;
299	} else if (!strcmp(list[0], "userdef")) {
300	    continue;
301	}
302	if (column < 0 || column > count_columns(list)) {
303	    fprintf(stderr, "expected %d columns, have %d:\n%s\n",
304		    column,
305		    count_columns(list),
306		    buffer);
307	    exit(EXIT_FAILURE);
308	}
309	nn = tableused;
310	if (is_user) {
311	    unsigned j;
312	    for (j = 0; j < tableused; ++j) {
313		if (!strcmp(list[column], name_table[j].ute_name)) {
314		    nn = j;
315		    break;
316		}
317	    }
318	}
319	if (nn == tableused) {
320	    name_table[nn].ute_link = -1;	/* end-of-hash */
321	    name_table[nn].ute_name = strmalloc(list[column]);
322	    ++tableused;
323	}
324
325	if (!strcmp(list[2], "bool")) {
326	    SetType(nn, BOOLEAN);
327	    name_table[nn].ute_index = BoolCount++;
328	} else if (!strcmp(list[2], "num")) {
329	    SetType(nn, NUMBER);
330	    name_table[nn].ute_index = NumCount++;
331	} else if (!strcmp(list[2], "str")) {
332	    SetType(nn, STRING);
333	    name_table[nn].ute_index = StrCount++;
334	    if (is_user) {
335		if (*list[3] != '-') {
336		    unsigned j;
337		    name_table[nn].ute_argc = (unsigned) strlen(list[3]);
338		    for (j = 0; j < name_table[nn].ute_argc; ++j) {
339			if (list[3][j] == 's') {
340			    name_table[nn].ute_args |= (1U << j);
341			}
342		    }
343		}
344	    }
345	} else {
346	    fprintf(stderr, "Unknown type: %s\n", list[2]);
347	    exit(EXIT_FAILURE);
348	}
349	n++;
350    }
351    if (tablesize > tableused)
352	tablesize = tableused;
353    _nc_make_hash_table(name_table, hash_table, tablesize);
354
355    /*
356     * Write the compiled tables to standard output
357     */
358    if (bigstring) {
359	int len = 0;
360	int nxt;
361
362	printf("static const char %s_names_text[] = \\\n", root_name);
363	for (n = 0; n < tablesize; n++) {
364	    nxt = (int) strlen(name_table[n].ute_name) + 5;
365	    if (nxt + len > 72) {
366		printf("\\\n");
367		len = 0;
368	    }
369	    printf("\"%s\\0\" ", name_table[n].ute_name);
370	    len += nxt;
371	}
372	printf(";\n\n");
373
374	len = 0;
375	printf("static %s_table_data const %s_names_data[] =\n",
376	       table_name,
377	       root_name);
378	printf("%s\n", L_BRACE);
379	for (n = 0; n < tablesize; n++) {
380	    printf("\t%s %15d,\t%10s,", L_BRACE, len, GetType(n));
381	    if (is_user)
382		printf("\t%d,%d,",
383		       name_table[n].ute_argc,
384		       name_table[n].ute_args);
385	    printf("\t%3d, %3d %s%c\n",
386		   name_table[n].ute_index,
387		   name_table[n].ute_link,
388		   R_BRACE,
389		   n < tablesize - 1 ? ',' : ' ');
390	    len += (int) strlen(name_table[n].ute_name) + 1;
391	}
392	printf("%s;\n\n", R_BRACE);
393	printf("static struct %s_table_entry *_nc_%s_table = 0;\n\n",
394	       table_name,
395	       root_name);
396    } else {
397
398	printf("static struct %s_table_entry const _nc_%s_table[] =\n",
399	       table_name,
400	       root_name);
401	printf("%s\n", L_BRACE);
402	for (n = 0; n < tablesize; n++) {
403	    _nc_SPRINTF(buffer, _nc_SLIMIT(sizeof(buffer)) "\"%s\"",
404			name_table[n].ute_name);
405	    printf("\t%s %15s,\t%10s,", L_BRACE, buffer, GetType(n));
406	    if (is_user)
407		printf("\t%d,%d,",
408		       name_table[n].ute_argc,
409		       name_table[n].ute_args);
410	    printf("\t%3d, %3d %s%c\n",
411		   name_table[n].ute_index,
412		   name_table[n].ute_link,
413		   R_BRACE,
414		   n < tablesize - 1 ? ',' : ' ');
415	}
416	printf("%s;\n\n", R_BRACE);
417    }
418
419    printf("static const HashValue _nc_%s_hash_table[%d] =\n",
420	   root_name,
421	   HASHTABSIZE + 1);
422    printf("%s\n", L_BRACE);
423    for (n = 0; n < HASHTABSIZE; n++) {
424	printf("\t%3d,\n", hash_table[n]);
425    }
426    printf("\t0\t/* base-of-table */\n");
427    printf("%s;\n\n", R_BRACE);
428
429    if (!is_user) {
430	printf("#if (BOOLCOUNT!=%d)||(NUMCOUNT!=%d)||(STRCOUNT!=%d)\n",
431	       BoolCount, NumCount, StrCount);
432	printf("#error\t--> term.h and comp_captab.c disagree about the <--\n");
433	printf("#error\t--> numbers of booleans, numbers and/or strings <--\n");
434	printf("#endif\n\n");
435    }
436
437    free(hash_table);
438    for (n = 0; (n < tablesize); ++n) {
439	free((void *) name_table[n].ute_name);
440    }
441    free(name_table);
442    parse_columns(0);
443
444    return EXIT_SUCCESS;
445}
446