compile.c revision 1.15
1/* $NetBSD: compile.c,v 1.15 2020/03/27 15:11:57 christos Exp $ */
2
3/*
4 * Copyright (c) 2009, 2010, 2011, 2020 The NetBSD Foundation, Inc.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Roy Marples.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#if HAVE_NBTOOL_CONFIG_H
31#include "nbtool_config.h"
32#endif
33
34#include <sys/cdefs.h>
35__RCSID("$NetBSD: compile.c,v 1.15 2020/03/27 15:11:57 christos Exp $");
36
37#if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H
38#include <sys/endian.h>
39#endif
40
41#include <assert.h>
42#include <ctype.h>
43#include <err.h>
44#include <errno.h>
45#include <limits.h>
46#include <stdarg.h>
47#include <stdlib.h>
48#include <stdint.h>
49#include <stdio.h>
50#include <string.h>
51#include <term_private.h>
52#include <term.h>
53
54static void __printflike(2, 3)
55dowarn(int flags, const char *fmt, ...)
56{
57	va_list va;
58
59	errno = EINVAL;
60	if (flags & TIC_WARNING) {
61		va_start(va, fmt);
62		vwarnx(fmt, va);
63		va_end(va);
64	}
65}
66
67char *
68_ti_grow_tbuf(TBUF *tbuf, size_t len)
69{
70	char *buf;
71	size_t l;
72
73	_DIAGASSERT(tbuf != NULL);
74
75	l = tbuf->bufpos + len;
76	if (l > tbuf->buflen) {
77		if (tbuf->buflen == 0)
78			buf = malloc(l);
79		else
80			buf = realloc(tbuf->buf, l);
81		if (buf == NULL)
82			return NULL;
83		tbuf->buf = buf;
84		tbuf->buflen = l;
85	}
86	return tbuf->buf;
87}
88
89char *
90_ti_find_cap(TIC *tic, TBUF *tbuf, char type, short ind)
91{
92	size_t n;
93	uint16_t num;
94	char *cap;
95
96	_DIAGASSERT(tbuf != NULL);
97
98	cap = tbuf->buf;
99	for (n = tbuf->entries; n > 0; n--) {
100		num = le16dec(cap);
101		cap += sizeof(uint16_t);
102		if ((short)num == ind)
103			return cap;
104		switch (type) {
105		case 'f':
106			cap++;
107			break;
108		case 'n':
109			cap += _ti_numsize(tic);
110			break;
111		case 's':
112			num = le16dec(cap);
113			cap += sizeof(uint16_t);
114			cap += num;
115			break;
116		}
117	}
118
119	errno = ESRCH;
120	return NULL;
121}
122
123char *
124_ti_find_extra(TIC *tic, TBUF *tbuf, const char *code)
125{
126	size_t n;
127	uint16_t num;
128	char *cap;
129
130	_DIAGASSERT(tbuf != NULL);
131	_DIAGASSERT(code != NULL);
132
133	cap = tbuf->buf;
134	for (n = tbuf->entries; n > 0; n--) {
135		num = le16dec(cap);
136		cap += sizeof(uint16_t);
137		if (strcmp(cap, code) == 0)
138			return cap + num;
139		cap += num;
140		switch (*cap++) {
141		case 'f':
142			cap++;
143			break;
144		case 'n':
145			cap += _ti_numsize(tic);
146			break;
147		case 's':
148			num = le16dec(cap);
149			cap += sizeof(uint16_t);
150			cap += num;
151			break;
152		}
153	}
154
155	errno = ESRCH;
156	return NULL;
157}
158
159void
160_ti_encode_num(TIC *tic, TBUF *rbuf, int num)
161{
162	if (_ti_numsize(tic) == sizeof(uint16_t)) {
163		if (num > SHRT_MAX)
164			num = SHRT_MAX;
165		le16enc(rbuf->buf + rbuf->bufpos, (uint16_t)num);
166	} else {
167		le32enc(rbuf->buf + rbuf->bufpos, (uint32_t)num);
168	}
169	rbuf->bufpos += _ti_numsize(tic);
170}
171
172int
173_ti_decode_num(int rtype, const char **cap)
174{
175	int rv;
176
177	if (rtype == TERMINFO_RTYPE_O1) {
178		rv = (int)le16dec(*cap);
179		*cap += sizeof(uint16_t);
180	} else {
181		rv = (int)le32dec(*cap);
182		*cap += sizeof(uint32_t);
183	}
184	return rv;
185}
186
187char *
188_ti_getname(int rtype, const char *orig)
189{
190	char *name;
191
192	if (rtype == TERMINFO_RTYPE) {
193		if (asprintf(&name, "%s@v3", orig) < 0)
194			name = NULL;
195	} else {
196		name = strdup(orig);
197	}
198	return name;
199}
200
201size_t
202_ti_store_extra(TIC *tic, int wrn, const char *id, char type, char flag,
203    int num, const char *str, size_t strl, int flags)
204{
205	size_t l;
206
207	_DIAGASSERT(tic != NULL);
208
209	if (strcmp(id, "use") != 0) {
210		if (_ti_find_extra(tic, &tic->extras, id) != NULL)
211			return 0;
212		if (!(flags & TIC_EXTRA)) {
213			if (wrn != 0)
214				dowarn(flags, "%s: %s: unknown capability",
215				    tic->name, id);
216			return 0;
217		}
218	}
219
220	l = strlen(id) + 1;
221	if (l > UINT16_T_MAX) {
222		dowarn(flags, "%s: %s: cap name is too long", tic->name, id);
223		return 0;
224	}
225
226	if (!_ti_grow_tbuf(&tic->extras,
227		l + strl + sizeof(uint16_t) + _ti_numsize(tic) + 1))
228		return 0;
229	le16enc(tic->extras.buf + tic->extras.bufpos, (uint16_t)l);
230	tic->extras.bufpos += sizeof(uint16_t);
231	memcpy(tic->extras.buf + tic->extras.bufpos, id, l);
232	tic->extras.bufpos += l;
233	tic->extras.buf[tic->extras.bufpos++] = type;
234	switch (type) {
235	case 'f':
236		tic->extras.buf[tic->extras.bufpos++] = flag;
237		break;
238	case 'n':
239		_ti_encode_num(tic, &tic->extras, num);
240		break;
241	case 's':
242		le16enc(tic->extras.buf + tic->extras.bufpos, (uint16_t)strl);
243		tic->extras.bufpos += sizeof(uint16_t);
244		memcpy(tic->extras.buf + tic->extras.bufpos, str, strl);
245		tic->extras.bufpos += strl;
246		break;
247	}
248	tic->extras.entries++;
249	return 1;
250}
251
252ssize_t
253_ti_flatten(uint8_t **buf, const TIC *tic)
254{
255	size_t buflen, len, alen, dlen;
256	uint8_t *cap;
257
258	_DIAGASSERT(buf != NULL);
259	_DIAGASSERT(tic != NULL);
260
261	len = strlen(tic->name) + 1;
262	if (tic->alias == NULL)
263		alen = 0;
264	else
265		alen = strlen(tic->alias) + 1;
266	if (tic->desc == NULL)
267		dlen = 0;
268	else
269		dlen = strlen(tic->desc) + 1;
270	buflen = sizeof(char) +
271	    sizeof(uint16_t) + len +
272	    sizeof(uint16_t) + alen +
273	    sizeof(uint16_t) + dlen +
274	    (sizeof(uint16_t) * 2) + tic->flags.bufpos +
275	    (sizeof(uint16_t) * 2) + tic->nums.bufpos +
276	    (sizeof(uint16_t) * 2) + tic->strs.bufpos +
277	    (sizeof(uint16_t) * 2) + tic->extras.bufpos;
278	*buf = malloc(buflen);
279	if (*buf == NULL)
280		return -1;
281
282	cap = *buf;
283	*cap++ = tic->rtype;
284	le16enc(cap, (uint16_t)len);
285	cap += sizeof(uint16_t);
286	memcpy(cap, tic->name, len);
287	cap += len;
288
289	le16enc(cap, (uint16_t)alen);
290	cap += sizeof(uint16_t);
291	if (tic->alias != NULL) {
292		memcpy(cap, tic->alias, alen);
293		cap += alen;
294	}
295	le16enc(cap, (uint16_t)dlen);
296	cap += sizeof(uint16_t);
297	if (tic->desc != NULL) {
298		memcpy(cap, tic->desc, dlen);
299		cap += dlen;
300	}
301
302	if (tic->flags.entries == 0) {
303		le16enc(cap, 0);
304		cap += sizeof(uint16_t);
305	} else {
306		le16enc(cap, (uint16_t)(tic->flags.bufpos + sizeof(uint16_t)));
307		cap += sizeof(uint16_t);
308		le16enc(cap, (uint16_t)tic->flags.entries);
309		cap += sizeof(uint16_t);
310		memcpy(cap, tic->flags.buf, tic->flags.bufpos);
311		cap += tic->flags.bufpos;
312	}
313
314	if (tic->nums.entries == 0) {
315		le16enc(cap, 0);
316		cap += sizeof(uint16_t);
317	} else {
318		le16enc(cap, (uint16_t)(tic->nums.bufpos + sizeof(uint16_t)));
319		cap += sizeof(uint16_t);
320		le16enc(cap, (uint16_t)tic->nums.entries);
321		cap += sizeof(uint16_t);
322		memcpy(cap, tic->nums.buf, tic->nums.bufpos);
323		cap += tic->nums.bufpos;
324	}
325
326	if (tic->strs.entries == 0) {
327		le16enc(cap, 0);
328		cap += sizeof(uint16_t);
329	} else {
330		le16enc(cap, (uint16_t)(tic->strs.bufpos + sizeof(uint16_t)));
331		cap += sizeof(uint16_t);
332		le16enc(cap, (uint16_t)tic->strs.entries);
333		cap += sizeof(uint16_t);
334		memcpy(cap, tic->strs.buf, tic->strs.bufpos);
335		cap += tic->strs.bufpos;
336	}
337
338	if (tic->extras.entries == 0) {
339		le16enc(cap, 0);
340		cap += sizeof(uint16_t);
341	} else {
342		le16enc(cap, (uint16_t)(tic->extras.bufpos + sizeof(uint16_t)));
343		cap += sizeof(uint16_t);
344		le16enc(cap, (uint16_t)tic->extras.entries);
345		cap += sizeof(uint16_t);
346		memcpy(cap, tic->extras.buf, tic->extras.bufpos);
347		cap += tic->extras.bufpos;
348	}
349
350	return cap - *buf;
351}
352
353static int
354encode_string(const char *term, const char *cap, TBUF *tbuf, const char *str,
355    int flags)
356{
357	int slash, i, num;
358	char ch, *p, *s, last;
359
360	if (_ti_grow_tbuf(tbuf, strlen(str) + 1) == NULL)
361		return -1;
362	p = s = tbuf->buf + tbuf->bufpos;
363	slash = 0;
364	last = '\0';
365	/* Convert escape codes */
366	while ((ch = *str++) != '\0') {
367		if (ch == '\n') {
368			/* Following a newline, strip leading whitespace from
369			 * capability strings. */
370			while (isspace((unsigned char)*str))
371				str++;
372			continue;
373		}
374		if (slash == 0 && ch == '\\') {
375			slash = 1;
376			continue;
377		}
378		if (slash == 0) {
379			if (last != '%' && ch == '^') {
380				ch = *str++;
381				if (((unsigned char)ch) >= 128)
382					dowarn(flags,
383					    "%s: %s: illegal ^ character",
384					    term, cap);
385				if (ch == '\0')
386					break;
387				if (ch == '?')
388					ch = '\177';
389				else if ((ch &= 037) == 0)
390					ch = (char)128;
391			} else if (!isprint((unsigned char)ch))
392				dowarn(flags,
393				    "%s: %s: unprintable character",
394				    term, cap);
395			*p++ = ch;
396			last = ch;
397			continue;
398		}
399		slash = 0;
400		if (ch >= '0' && ch <= '7') {
401			num = ch - '0';
402			for (i = 0; i < 2; i++) {
403				if (*str < '0' || *str > '7') {
404					if (isdigit((unsigned char)*str))
405						dowarn(flags,
406						    "%s: %s: non octal"
407						    " digit", term, cap);
408					else
409						break;
410				}
411				num = num * 8 + *str++ - '0';
412			}
413			if (num == 0)
414				num = 0200;
415			*p++ = (char)num;
416			continue;
417		}
418		switch (ch) {
419		case 'a':
420			*p++ = '\a';
421			break;
422		case 'b':
423			*p++ = '\b';
424			break;
425		case 'e': /* FALLTHROUGH */
426		case 'E':
427			*p++ = '\033';
428			break;
429		case 'f':
430			*p++ = '\014';
431			break;
432		case 'l': /* FALLTHROUGH */
433		case 'n':
434			*p++ = '\n';
435			break;
436		case 'r':
437			*p++ = '\r';
438			break;
439		case 's':
440			*p++ = ' ';
441			break;
442		case 't':
443			*p++ = '\t';
444			break;
445		default:
446			/* We should warn here */
447		case '^':
448		case ',':
449		case ':':
450		case '|':
451			*p++ = ch;
452			break;
453		}
454		last = ch;
455	}
456	*p++ = '\0';
457	tbuf->bufpos += (size_t)(p - s);
458	return 0;
459}
460
461char *
462_ti_get_token(char **cap, char sep)
463{
464	char esc, *token;
465
466	while (isspace((unsigned char)**cap))
467		(*cap)++;
468	if (**cap == '\0')
469		return NULL;
470
471	/* We can't use stresep(3) as ^ we need two escape chars */
472	esc = '\0';
473	for (token = *cap;
474	     **cap != '\0' && (esc != '\0' || **cap != sep);
475	     (*cap)++)
476	{
477		if (esc == '\0') {
478			if (**cap == '\\' || **cap == '^')
479				esc = **cap;
480		} else {
481			/* termcap /E/ is valid */
482			if (sep == ':' && esc == '\\' && **cap == 'E')
483				esc = 'x';
484			else
485				esc = '\0';
486		}
487	}
488
489	if (**cap != '\0')
490		*(*cap)++ = '\0';
491
492	return token;
493}
494
495static int
496_ti_find_rtype(const char *cap)
497{
498	for (const char *ptr = cap; (ptr = strchr(ptr, '#')) != NULL;) {
499		if (strtol(++ptr, NULL, 0) > SHRT_MAX) {
500			return TERMINFO_RTYPE;
501		}
502	}
503	return TERMINFO_RTYPE_O1;
504}
505
506TIC *
507_ti_compile(char *cap, int flags)
508{
509	char *token, *p, *e, *name, *desc, *alias;
510	signed char flag;
511	long cnum;
512	short ind;
513	int num;
514	size_t len;
515	TBUF buf;
516	TIC *tic;
517
518	_DIAGASSERT(cap != NULL);
519
520	name = _ti_get_token(&cap, ',');
521	if (name == NULL) {
522		dowarn(flags, "no separator found: %s", cap);
523		return NULL;
524	}
525	desc = strrchr(name, '|');
526	if (desc != NULL)
527		*desc++ = '\0';
528	alias = strchr(name, '|');
529	if (alias != NULL)
530		*alias++ = '\0';
531
532	if (strlen(name) > UINT16_MAX - 1) {
533		dowarn(flags, "%s: name too long", name);
534		return NULL;
535	}
536	if (desc != NULL && strlen(desc) > UINT16_MAX - 1) {
537		dowarn(flags, "%s: description too long: %s", name, desc);
538		return NULL;
539	}
540	if (alias != NULL && strlen(alias) > UINT16_MAX - 1) {
541		dowarn(flags, "%s: alias too long: %s", name, alias);
542		return NULL;
543	}
544
545	tic = calloc(sizeof(*tic), 1);
546	if (tic == NULL)
547		return NULL;
548
549	tic->rtype = (flags & TIC_COMPAT_V1) ? TERMINFO_RTYPE_O1 :
550	    _ti_find_rtype(cap);
551	buf.buf = NULL;
552	buf.buflen = 0;
553
554	tic->name = _ti_getname(tic->rtype, name);
555	if (tic->name == NULL)
556		goto error;
557	if (alias != NULL && flags & TIC_ALIAS) {
558		tic->alias = _ti_getname(tic->rtype, alias);
559		if (tic->alias == NULL)
560			goto error;
561	}
562	if (desc != NULL && flags & TIC_DESCRIPTION) {
563		tic->desc = strdup(desc);
564		if (tic->desc == NULL)
565			goto error;
566	}
567
568	for (token = _ti_get_token(&cap, ',');
569	     token != NULL && *token != '\0';
570	     token = _ti_get_token(&cap, ','))
571	{
572		/* Skip commented caps */
573		if (!(flags & TIC_COMMENT) && token[0] == '.')
574			continue;
575
576		/* Obsolete entries */
577		if (token[0] == 'O' && token[1] == 'T') {
578			if (!(flags & TIC_EXTRA))
579				continue;
580			token += 2;
581		}
582
583		/* str cap */
584		p = strchr(token, '=');
585		if (p != NULL) {
586			*p++ = '\0';
587			/* Don't use the string if we already have it */
588			ind = (short)_ti_strindex(token);
589			if (ind != -1 &&
590			    _ti_find_cap(tic, &tic->strs, 's', ind) != NULL)
591				continue;
592
593			/* Encode the string to our scratch buffer */
594			buf.bufpos = 0;
595			if (encode_string(tic->name, token,
596				&buf, p, flags) == -1)
597				goto error;
598			if (buf.bufpos > UINT16_MAX - 1) {
599				dowarn(flags, "%s: %s: string is too long",
600				    tic->name, token);
601				continue;
602			}
603			if (!VALID_STRING(buf.buf)) {
604				dowarn(flags, "%s: %s: invalid string",
605				    tic->name, token);
606				continue;
607			}
608
609			if (ind == -1)
610				_ti_store_extra(tic, 1, token, 's', -1, -2,
611				    buf.buf, buf.bufpos, flags);
612			else {
613				if (!_ti_grow_tbuf(&tic->strs,
614					(sizeof(uint16_t) * 2) + buf.bufpos))
615					goto error;
616				le16enc(tic->strs.buf + tic->strs.bufpos, (uint16_t)ind);
617				tic->strs.bufpos += sizeof(uint16_t);
618				le16enc(tic->strs.buf + tic->strs.bufpos,
619				    (uint16_t)buf.bufpos);
620				tic->strs.bufpos += sizeof(uint16_t);
621				memcpy(tic->strs.buf + tic->strs.bufpos,
622				    buf.buf, buf.bufpos);
623				tic->strs.bufpos += buf.bufpos;
624				tic->strs.entries++;
625			}
626			continue;
627		}
628
629		/* num cap */
630		p = strchr(token, '#');
631		if (p != NULL) {
632			*p++ = '\0';
633			/* Don't use the number if we already have it */
634			ind = (short)_ti_numindex(token);
635			if (ind != -1 &&
636			    _ti_find_cap(tic, &tic->nums, 'n', ind) != NULL)
637				continue;
638
639			cnum = strtol(p, &e, 0);
640			if (*e != '\0') {
641				dowarn(flags, "%s: %s: not a number",
642				    tic->name, token);
643				continue;
644			}
645			if (!VALID_NUMERIC(cnum) || cnum > INT32_MAX) {
646				dowarn(flags, "%s: %s: number %ld out of range",
647				    tic->name, token, cnum);
648				continue;
649			}
650
651			num = (int)cnum;
652			if (ind == -1)
653				_ti_store_extra(tic, 1, token, 'n', -1,
654				    num, NULL, 0, flags);
655			else {
656				if (_ti_grow_tbuf(&tic->nums,
657				    sizeof(uint16_t) + _ti_numsize(tic))==NULL)
658					goto error;
659				le16enc(tic->nums.buf + tic->nums.bufpos,
660				    (uint16_t)ind);
661				tic->nums.bufpos += sizeof(uint16_t);
662				_ti_encode_num(tic, &tic->nums, num);
663				tic->nums.entries++;
664			}
665			continue;
666		}
667
668		flag = 1;
669		len = strlen(token) - 1;
670		if (token[len] == '@') {
671			flag = CANCELLED_BOOLEAN;
672			token[len] = '\0';
673		}
674		ind = (short)_ti_flagindex(token);
675		if (ind == -1 && flag == CANCELLED_BOOLEAN) {
676			if ((ind = (short)_ti_numindex(token)) != -1) {
677				if (_ti_find_cap(tic, &tic->nums, 'n', ind)
678				    != NULL)
679					continue;
680				if (_ti_grow_tbuf(&tic->nums, sizeof(uint16_t)
681				    + _ti_numsize(tic)) == NULL)
682					goto error;
683				le16enc(tic->nums.buf + tic->nums.bufpos,
684				    (uint16_t)ind);
685				tic->nums.bufpos += sizeof(uint16_t);
686				_ti_encode_num(tic, &tic->nums,
687				    CANCELLED_NUMERIC);
688				tic->nums.entries++;
689				continue;
690			} else if ((ind = (short)_ti_strindex(token)) != -1) {
691				if (_ti_find_cap(tic, &tic->strs, 's', ind)
692				    != NULL)
693					continue;
694				if (_ti_grow_tbuf(&tic->strs,
695				    (sizeof(uint16_t) * 2) + 1) == NULL)
696					goto error;
697				le16enc(tic->strs.buf + tic->strs.bufpos, (uint16_t)ind);
698				tic->strs.bufpos += sizeof(uint16_t);
699				le16enc(tic->strs.buf + tic->strs.bufpos, 0);
700				tic->strs.bufpos += sizeof(uint16_t);
701				tic->strs.entries++;
702				continue;
703			}
704		}
705		if (ind == -1)
706			_ti_store_extra(tic, 1, token, 'f', flag, 0, NULL, 0,
707			    flags);
708		else if (_ti_find_cap(tic, &tic->flags, 'f', ind) == NULL) {
709			if (_ti_grow_tbuf(&tic->flags, sizeof(uint16_t) + 1)
710			    == NULL)
711				goto error;
712			le16enc(tic->flags.buf + tic->flags.bufpos,
713			    (uint16_t)ind);
714			tic->flags.bufpos += sizeof(uint16_t);
715			tic->flags.buf[tic->flags.bufpos++] = flag;
716			tic->flags.entries++;
717		}
718	}
719
720	free(buf.buf);
721	return tic;
722
723error:
724	free(buf.buf);
725	_ti_freetic(tic);
726	return NULL;
727}
728
729void
730_ti_freetic(TIC *tic)
731{
732
733	if (tic != NULL) {
734		free(tic->name);
735		free(tic->alias);
736		free(tic->desc);
737		free(tic->extras.buf);
738		free(tic->flags.buf);
739		free(tic->nums.buf);
740		free(tic->strs.buf);
741		free(tic);
742	}
743}
744