compile.c revision 1.20
1/* $NetBSD: compile.c,v 1.20 2020/03/28 15:45:56 christos Exp $ */
2
3/*
4 * Copyright (c) 2009, 2010, 2011, 2020 The NetBSD Foundation, Inc.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Roy Marples.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#if HAVE_NBTOOL_CONFIG_H
31#include "nbtool_config.h"
32#endif
33
34#include <sys/cdefs.h>
35__RCSID("$NetBSD: compile.c,v 1.20 2020/03/28 15:45:56 christos Exp $");
36
37#if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H
38#include <sys/endian.h>
39#endif
40
41#include <assert.h>
42#include <ctype.h>
43#include <err.h>
44#include <errno.h>
45#include <limits.h>
46#include <stdarg.h>
47#include <stdlib.h>
48#include <stdint.h>
49#include <stdio.h>
50#include <string.h>
51#include <term_private.h>
52#include <term.h>
53
54static void __printflike(2, 3)
55dowarn(int flags, const char *fmt, ...)
56{
57	va_list va;
58
59	errno = EINVAL;
60	if (flags & TIC_WARNING) {
61		va_start(va, fmt);
62		vwarnx(fmt, va);
63		va_end(va);
64	}
65}
66
67char *
68_ti_grow_tbuf(TBUF *tbuf, size_t len)
69{
70	char *buf;
71	size_t l;
72
73	_DIAGASSERT(tbuf != NULL);
74
75	l = tbuf->bufpos + len;
76	if (l > tbuf->buflen) {
77		if (tbuf->buflen == 0)
78			buf = malloc(l);
79		else
80			buf = realloc(tbuf->buf, l);
81		if (buf == NULL)
82			return NULL;
83		tbuf->buf = buf;
84		tbuf->buflen = l;
85	}
86	return tbuf->buf;
87}
88
89const char *
90_ti_find_cap(TIC *tic, TBUF *tbuf, char type, short ind)
91{
92	size_t n;
93	uint16_t num;
94	const char *cap;
95
96	_DIAGASSERT(tbuf != NULL);
97
98	cap = tbuf->buf;
99	for (n = tbuf->entries; n > 0; n--) {
100		num = _ti_decode_16(&cap);
101		if ((short)num == ind)
102			return cap;
103		switch (type) {
104		case 'f':
105			cap++;
106			break;
107		case 'n':
108			cap += _ti_numsize(tic);
109			break;
110		case 's':
111			num = _ti_decode_16(&cap);
112			cap += num;
113			break;
114		}
115	}
116
117	errno = ESRCH;
118	return NULL;
119}
120
121const char *
122_ti_find_extra(TIC *tic, TBUF *tbuf, const char *code)
123{
124	size_t n;
125	uint16_t num;
126	const char *cap;
127
128	_DIAGASSERT(tbuf != NULL);
129	_DIAGASSERT(code != NULL);
130
131	cap = tbuf->buf;
132	for (n = tbuf->entries; n > 0; n--) {
133		num = _ti_decode_16(&cap);
134		if (strcmp(cap, code) == 0)
135			return cap + num;
136		cap += num;
137		switch (*cap++) {
138		case 'f':
139			cap++;
140			break;
141		case 'n':
142			cap += _ti_numsize(tic);
143			break;
144		case 's':
145			num = _ti_decode_16(&cap);
146			cap += num;
147			break;
148		}
149	}
150
151	errno = ESRCH;
152	return NULL;
153}
154
155char *
156_ti_getname(int rtype, const char *orig)
157{
158	char *name;
159
160	if (rtype == TERMINFO_RTYPE) {
161		/* , and | are the two print characters now allowed
162		 * in terminfo aliases or long descriptions.
163		 * As | is generally used to delimit aliases inside the
164		 * description, we use a comma. */
165		if (asprintf(&name, "%s,v3", orig) < 0)
166			name = NULL;
167	} else {
168		name = strdup(orig);
169	}
170	return name;
171}
172
173size_t
174_ti_store_extra(TIC *tic, int wrn, const char *id, char type, char flag,
175    int num, const char *str, size_t strl, int flags)
176{
177	size_t l;
178
179	_DIAGASSERT(tic != NULL);
180
181	if (strcmp(id, "use") != 0) {
182		if (_ti_find_extra(tic, &tic->extras, id) != NULL)
183			return 0;
184		if (!(flags & TIC_EXTRA)) {
185			if (wrn != 0)
186				dowarn(flags, "%s: %s: unknown capability",
187				    tic->name, id);
188			return 0;
189		}
190	}
191
192	l = strlen(id) + 1;
193	if (l > UINT16_T_MAX) {
194		dowarn(flags, "%s: %s: cap name is too long", tic->name, id);
195		return 0;
196	}
197
198	if (!_ti_grow_tbuf(&tic->extras,
199		l + strl + sizeof(uint16_t) + _ti_numsize(tic) + 1))
200		return 0;
201	_ti_encode_buf_count_str(&tic->extras, id, l);
202	tic->extras.buf[tic->extras.bufpos++] = type;
203	switch (type) {
204	case 'f':
205		tic->extras.buf[tic->extras.bufpos++] = flag;
206		break;
207	case 'n':
208		_ti_encode_buf_num(&tic->extras, num, tic->rtype);
209		break;
210	case 's':
211		_ti_encode_buf_count_str(&tic->extras, str, strl);
212		break;
213	}
214	tic->extras.entries++;
215	return 1;
216}
217
218static void
219_ti_encode_buf(char **cap, const TBUF *buf)
220{
221	if (buf->entries == 0) {
222		_ti_encode_16(cap, 0);
223	} else {
224		_ti_encode_16(cap, buf->bufpos + sizeof(uint16_t));
225		_ti_encode_16(cap, buf->entries);
226		_ti_encode_str(cap, buf->buf, buf->bufpos);
227	}
228}
229
230ssize_t
231_ti_flatten(uint8_t **buf, const TIC *tic)
232{
233	size_t buflen, len, alen, dlen;
234	char *cap;
235
236	_DIAGASSERT(buf != NULL);
237	_DIAGASSERT(tic != NULL);
238
239	len = strlen(tic->name) + 1;
240	if (tic->alias == NULL)
241		alen = 0;
242	else
243		alen = strlen(tic->alias) + 1;
244	if (tic->desc == NULL)
245		dlen = 0;
246	else
247		dlen = strlen(tic->desc) + 1;
248
249	buflen = sizeof(char) +
250	    sizeof(uint16_t) + len +
251	    sizeof(uint16_t) + alen +
252	    sizeof(uint16_t) + dlen +
253	    (sizeof(uint16_t) * 2) + tic->flags.bufpos +
254	    (sizeof(uint16_t) * 2) + tic->nums.bufpos +
255	    (sizeof(uint16_t) * 2) + tic->strs.bufpos +
256	    (sizeof(uint16_t) * 2) + tic->extras.bufpos;
257
258	*buf = malloc(buflen);
259	if (*buf == NULL)
260		return -1;
261
262	cap = (char *)*buf;
263	*cap++ = tic->rtype;
264
265	_ti_encode_count_str(&cap, tic->name, len);
266	_ti_encode_count_str(&cap, tic->alias, alen);
267	_ti_encode_count_str(&cap, tic->desc, dlen);
268
269	_ti_encode_buf(&cap, &tic->flags);
270
271	_ti_encode_buf(&cap, &tic->nums);
272	_ti_encode_buf(&cap, &tic->strs);
273	_ti_encode_buf(&cap, &tic->extras);
274
275	return (uint8_t *)cap - *buf;
276}
277
278static int
279encode_string(const char *term, const char *cap, TBUF *tbuf, const char *str,
280    int flags)
281{
282	int slash, i, num;
283	char ch, *p, *s, last;
284
285	if (_ti_grow_tbuf(tbuf, strlen(str) + 1) == NULL)
286		return -1;
287	p = s = tbuf->buf + tbuf->bufpos;
288	slash = 0;
289	last = '\0';
290	/* Convert escape codes */
291	while ((ch = *str++) != '\0') {
292		if (ch == '\n') {
293			/* Following a newline, strip leading whitespace from
294			 * capability strings. */
295			while (isspace((unsigned char)*str))
296				str++;
297			continue;
298		}
299		if (slash == 0 && ch == '\\') {
300			slash = 1;
301			continue;
302		}
303		if (slash == 0) {
304			if (last != '%' && ch == '^') {
305				ch = *str++;
306				if (((unsigned char)ch) >= 128)
307					dowarn(flags,
308					    "%s: %s: illegal ^ character",
309					    term, cap);
310				if (ch == '\0')
311					break;
312				if (ch == '?')
313					ch = '\177';
314				else if ((ch &= 037) == 0)
315					ch = (char)128;
316			} else if (!isprint((unsigned char)ch))
317				dowarn(flags,
318				    "%s: %s: unprintable character",
319				    term, cap);
320			*p++ = ch;
321			last = ch;
322			continue;
323		}
324		slash = 0;
325		if (ch >= '0' && ch <= '7') {
326			num = ch - '0';
327			for (i = 0; i < 2; i++) {
328				if (*str < '0' || *str > '7') {
329					if (isdigit((unsigned char)*str))
330						dowarn(flags,
331						    "%s: %s: non octal"
332						    " digit", term, cap);
333					else
334						break;
335				}
336				num = num * 8 + *str++ - '0';
337			}
338			if (num == 0)
339				num = 0200;
340			*p++ = (char)num;
341			continue;
342		}
343		switch (ch) {
344		case 'a':
345			*p++ = '\a';
346			break;
347		case 'b':
348			*p++ = '\b';
349			break;
350		case 'e': /* FALLTHROUGH */
351		case 'E':
352			*p++ = '\033';
353			break;
354		case 'f':
355			*p++ = '\014';
356			break;
357		case 'l': /* FALLTHROUGH */
358		case 'n':
359			*p++ = '\n';
360			break;
361		case 'r':
362			*p++ = '\r';
363			break;
364		case 's':
365			*p++ = ' ';
366			break;
367		case 't':
368			*p++ = '\t';
369			break;
370		default:
371			/* We should warn here */
372		case '^':
373		case ',':
374		case ':':
375		case '|':
376			*p++ = ch;
377			break;
378		}
379		last = ch;
380	}
381	*p++ = '\0';
382	tbuf->bufpos += (size_t)(p - s);
383	return 0;
384}
385
386char *
387_ti_get_token(char **cap, char sep)
388{
389	char esc, *token;
390
391	while (isspace((unsigned char)**cap))
392		(*cap)++;
393	if (**cap == '\0')
394		return NULL;
395
396	/* We can't use stresep(3) as ^ we need two escape chars */
397	esc = '\0';
398	for (token = *cap;
399	     **cap != '\0' && (esc != '\0' || **cap != sep);
400	     (*cap)++)
401	{
402		if (esc == '\0') {
403			if (**cap == '\\' || **cap == '^')
404				esc = **cap;
405		} else {
406			/* termcap /E/ is valid */
407			if (sep == ':' && esc == '\\' && **cap == 'E')
408				esc = 'x';
409			else
410				esc = '\0';
411		}
412	}
413
414	if (**cap != '\0')
415		*(*cap)++ = '\0';
416
417	return token;
418}
419
420static int
421_ti_find_rtype(const char *cap)
422{
423	const char *ptr;
424
425	for (ptr = cap; (ptr = strchr(ptr, '#')) != NULL;) {
426		if (strtol(++ptr, NULL, 0) > SHRT_MAX) {
427			return TERMINFO_RTYPE;
428		}
429	}
430	return TERMINFO_RTYPE_O1;
431}
432
433int
434_ti_encode_buf_id_num(TBUF *tbuf, int ind, int num, size_t len)
435{
436	if (!_ti_grow_tbuf(tbuf, sizeof(uint16_t) + len))
437		return 0;
438	_ti_encode_buf_16(tbuf, ind);
439	if (len == sizeof(uint32_t))
440		_ti_encode_buf_32(tbuf, num);
441	else
442		_ti_encode_buf_16(tbuf, num);
443	tbuf->entries++;
444	return 1;
445}
446
447int
448_ti_encode_buf_id_count_str(TBUF *tbuf, int ind, const void *buf, size_t len)
449{
450	if (!_ti_grow_tbuf(tbuf, 2 * sizeof(uint16_t) + len))
451		return 0;
452	_ti_encode_buf_16(tbuf, ind);
453	_ti_encode_buf_count_str(tbuf, buf, len);
454	tbuf->entries++;
455	return 1;
456}
457
458int
459_ti_encode_buf_id_flags(TBUF *tbuf, int ind, int flag)
460{
461	if (!_ti_grow_tbuf(tbuf, sizeof(uint16_t) + 1))
462		return 0;
463	_ti_encode_buf_16(tbuf, ind);
464	tbuf->buf[tbuf->bufpos++] = flag;
465	tbuf->entries++;
466	return 1;
467}
468
469TIC *
470_ti_compile(char *cap, int flags)
471{
472	char *token, *p, *e, *name, *desc, *alias;
473	signed char flag;
474	long cnum;
475	short ind;
476	int num;
477	size_t len;
478	TBUF buf;
479	TIC *tic;
480
481	_DIAGASSERT(cap != NULL);
482
483	name = _ti_get_token(&cap, ',');
484	if (name == NULL) {
485		dowarn(flags, "no separator found: %s", cap);
486		return NULL;
487	}
488	desc = strrchr(name, '|');
489	if (desc != NULL)
490		*desc++ = '\0';
491	alias = strchr(name, '|');
492	if (alias != NULL)
493		*alias++ = '\0';
494
495	if (strlen(name) > UINT16_MAX - 1) {
496		dowarn(flags, "%s: name too long", name);
497		return NULL;
498	}
499	if (desc != NULL && strlen(desc) > UINT16_MAX - 1) {
500		dowarn(flags, "%s: description too long: %s", name, desc);
501		return NULL;
502	}
503	if (alias != NULL && strlen(alias) > UINT16_MAX - 1) {
504		dowarn(flags, "%s: alias too long: %s", name, alias);
505		return NULL;
506	}
507
508	tic = calloc(sizeof(*tic), 1);
509	if (tic == NULL)
510		return NULL;
511
512	tic->rtype = (flags & TIC_COMPAT_V1) ? TERMINFO_RTYPE_O1 :
513	    _ti_find_rtype(cap);
514	buf.buf = NULL;
515	buf.buflen = 0;
516
517	tic->name = _ti_getname(tic->rtype, name);
518	if (tic->name == NULL)
519		goto error;
520	if (alias != NULL && flags & TIC_ALIAS) {
521		tic->alias = _ti_getname(tic->rtype, alias);
522		if (tic->alias == NULL)
523			goto error;
524	}
525	if (desc != NULL && flags & TIC_DESCRIPTION) {
526		tic->desc = strdup(desc);
527		if (tic->desc == NULL)
528			goto error;
529	}
530
531	for (token = _ti_get_token(&cap, ',');
532	     token != NULL && *token != '\0';
533	     token = _ti_get_token(&cap, ','))
534	{
535		/* Skip commented caps */
536		if (!(flags & TIC_COMMENT) && token[0] == '.')
537			continue;
538
539		/* Obsolete entries */
540		if (token[0] == 'O' && token[1] == 'T') {
541			if (!(flags & TIC_EXTRA))
542				continue;
543			token += 2;
544		}
545
546		/* str cap */
547		p = strchr(token, '=');
548		if (p != NULL) {
549			*p++ = '\0';
550			/* Don't use the string if we already have it */
551			ind = (short)_ti_strindex(token);
552			if (ind != -1 &&
553			    _ti_find_cap(tic, &tic->strs, 's', ind) != NULL)
554				continue;
555
556			/* Encode the string to our scratch buffer */
557			buf.bufpos = 0;
558			if (encode_string(tic->name, token,
559				&buf, p, flags) == -1)
560				goto error;
561			if (buf.bufpos > UINT16_MAX - 1) {
562				dowarn(flags, "%s: %s: string is too long",
563				    tic->name, token);
564				continue;
565			}
566			if (!VALID_STRING(buf.buf)) {
567				dowarn(flags, "%s: %s: invalid string",
568				    tic->name, token);
569				continue;
570			}
571
572			if (ind == -1) {
573				if (!_ti_store_extra(tic, 1, token, 's', -1, -2,
574				    buf.buf, buf.bufpos, flags))
575					goto error;
576			} else {
577				if (!_ti_encode_buf_id_count_str(&tic->strs,
578				    ind, buf.buf, buf.bufpos))
579					goto error;
580			}
581			continue;
582		}
583
584		/* num cap */
585		p = strchr(token, '#');
586		if (p != NULL) {
587			*p++ = '\0';
588			/* Don't use the number if we already have it */
589			ind = (short)_ti_numindex(token);
590			if (ind != -1 &&
591			    _ti_find_cap(tic, &tic->nums, 'n', ind) != NULL)
592				continue;
593
594			cnum = strtol(p, &e, 0);
595			if (*e != '\0') {
596				dowarn(flags, "%s: %s: not a number",
597				    tic->name, token);
598				continue;
599			}
600			if (!VALID_NUMERIC(cnum) || cnum > INT32_MAX) {
601				dowarn(flags, "%s: %s: number %ld out of range",
602				    tic->name, token, cnum);
603				continue;
604			}
605
606			num = (int)cnum;
607			if (ind == -1) {
608				if (!_ti_store_extra(tic, 1, token, 'n', -1,
609				    num, NULL, 0, flags))
610					goto error;
611			} else {
612				if (!_ti_encode_buf_id_num(&tic->nums,
613				    ind, num, _ti_numsize(tic)))
614					    goto error;
615			}
616			continue;
617		}
618
619		flag = 1;
620		len = strlen(token) - 1;
621		if (token[len] == '@') {
622			flag = CANCELLED_BOOLEAN;
623			token[len] = '\0';
624		}
625		ind = (short)_ti_flagindex(token);
626		if (ind == -1 && flag == CANCELLED_BOOLEAN) {
627			if ((ind = (short)_ti_numindex(token)) != -1) {
628				if (_ti_find_cap(tic, &tic->nums, 'n', ind)
629				    != NULL)
630					continue;
631				if (!_ti_encode_buf_id_num(&tic->nums, ind,
632				    CANCELLED_NUMERIC, _ti_numsize(tic)))
633					goto error;
634				continue;
635			} else if ((ind = (short)_ti_strindex(token)) != -1) {
636				if (_ti_find_cap(tic, &tic->strs, 's', ind)
637				    != NULL)
638					continue;
639				if (!_ti_encode_buf_id_num(
640				    &tic->strs, ind, 0, sizeof(uint16_t)))
641					goto error;
642				continue;
643			}
644		}
645		if (ind == -1) {
646			if (!_ti_store_extra(tic, 1, token, 'f', flag, 0, NULL,
647			    0, flags))
648				goto error;
649		} else if (_ti_find_cap(tic, &tic->flags, 'f', ind) == NULL) {
650			if (!_ti_encode_buf_id_flags(&tic->flags, ind, flag))
651				goto error;
652		}
653	}
654
655	free(buf.buf);
656	return tic;
657
658error:
659	free(buf.buf);
660	_ti_freetic(tic);
661	return NULL;
662}
663
664void
665_ti_freetic(TIC *tic)
666{
667
668	if (tic != NULL) {
669		free(tic->name);
670		free(tic->alias);
671		free(tic->desc);
672		free(tic->extras.buf);
673		free(tic->flags.buf);
674		free(tic->nums.buf);
675		free(tic->strs.buf);
676		free(tic);
677	}
678}
679