compile.c revision 1.20
1/* $NetBSD: compile.c,v 1.20 2020/03/28 15:45:56 christos Exp $ */ 2 3/* 4 * Copyright (c) 2009, 2010, 2011, 2020 The NetBSD Foundation, Inc. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Roy Marples. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#if HAVE_NBTOOL_CONFIG_H 31#include "nbtool_config.h" 32#endif 33 34#include <sys/cdefs.h> 35__RCSID("$NetBSD: compile.c,v 1.20 2020/03/28 15:45:56 christos Exp $"); 36 37#if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H 38#include <sys/endian.h> 39#endif 40 41#include <assert.h> 42#include <ctype.h> 43#include <err.h> 44#include <errno.h> 45#include <limits.h> 46#include <stdarg.h> 47#include <stdlib.h> 48#include <stdint.h> 49#include <stdio.h> 50#include <string.h> 51#include <term_private.h> 52#include <term.h> 53 54static void __printflike(2, 3) 55dowarn(int flags, const char *fmt, ...) 56{ 57 va_list va; 58 59 errno = EINVAL; 60 if (flags & TIC_WARNING) { 61 va_start(va, fmt); 62 vwarnx(fmt, va); 63 va_end(va); 64 } 65} 66 67char * 68_ti_grow_tbuf(TBUF *tbuf, size_t len) 69{ 70 char *buf; 71 size_t l; 72 73 _DIAGASSERT(tbuf != NULL); 74 75 l = tbuf->bufpos + len; 76 if (l > tbuf->buflen) { 77 if (tbuf->buflen == 0) 78 buf = malloc(l); 79 else 80 buf = realloc(tbuf->buf, l); 81 if (buf == NULL) 82 return NULL; 83 tbuf->buf = buf; 84 tbuf->buflen = l; 85 } 86 return tbuf->buf; 87} 88 89const char * 90_ti_find_cap(TIC *tic, TBUF *tbuf, char type, short ind) 91{ 92 size_t n; 93 uint16_t num; 94 const char *cap; 95 96 _DIAGASSERT(tbuf != NULL); 97 98 cap = tbuf->buf; 99 for (n = tbuf->entries; n > 0; n--) { 100 num = _ti_decode_16(&cap); 101 if ((short)num == ind) 102 return cap; 103 switch (type) { 104 case 'f': 105 cap++; 106 break; 107 case 'n': 108 cap += _ti_numsize(tic); 109 break; 110 case 's': 111 num = _ti_decode_16(&cap); 112 cap += num; 113 break; 114 } 115 } 116 117 errno = ESRCH; 118 return NULL; 119} 120 121const char * 122_ti_find_extra(TIC *tic, TBUF *tbuf, const char *code) 123{ 124 size_t n; 125 uint16_t num; 126 const char *cap; 127 128 _DIAGASSERT(tbuf != NULL); 129 _DIAGASSERT(code != NULL); 130 131 cap = tbuf->buf; 132 for (n = tbuf->entries; n > 0; n--) { 133 num = _ti_decode_16(&cap); 134 if (strcmp(cap, code) == 0) 135 return cap + num; 136 cap += num; 137 switch (*cap++) { 138 case 'f': 139 cap++; 140 break; 141 case 'n': 142 cap += _ti_numsize(tic); 143 break; 144 case 's': 145 num = _ti_decode_16(&cap); 146 cap += num; 147 break; 148 } 149 } 150 151 errno = ESRCH; 152 return NULL; 153} 154 155char * 156_ti_getname(int rtype, const char *orig) 157{ 158 char *name; 159 160 if (rtype == TERMINFO_RTYPE) { 161 /* , and | are the two print characters now allowed 162 * in terminfo aliases or long descriptions. 163 * As | is generally used to delimit aliases inside the 164 * description, we use a comma. */ 165 if (asprintf(&name, "%s,v3", orig) < 0) 166 name = NULL; 167 } else { 168 name = strdup(orig); 169 } 170 return name; 171} 172 173size_t 174_ti_store_extra(TIC *tic, int wrn, const char *id, char type, char flag, 175 int num, const char *str, size_t strl, int flags) 176{ 177 size_t l; 178 179 _DIAGASSERT(tic != NULL); 180 181 if (strcmp(id, "use") != 0) { 182 if (_ti_find_extra(tic, &tic->extras, id) != NULL) 183 return 0; 184 if (!(flags & TIC_EXTRA)) { 185 if (wrn != 0) 186 dowarn(flags, "%s: %s: unknown capability", 187 tic->name, id); 188 return 0; 189 } 190 } 191 192 l = strlen(id) + 1; 193 if (l > UINT16_T_MAX) { 194 dowarn(flags, "%s: %s: cap name is too long", tic->name, id); 195 return 0; 196 } 197 198 if (!_ti_grow_tbuf(&tic->extras, 199 l + strl + sizeof(uint16_t) + _ti_numsize(tic) + 1)) 200 return 0; 201 _ti_encode_buf_count_str(&tic->extras, id, l); 202 tic->extras.buf[tic->extras.bufpos++] = type; 203 switch (type) { 204 case 'f': 205 tic->extras.buf[tic->extras.bufpos++] = flag; 206 break; 207 case 'n': 208 _ti_encode_buf_num(&tic->extras, num, tic->rtype); 209 break; 210 case 's': 211 _ti_encode_buf_count_str(&tic->extras, str, strl); 212 break; 213 } 214 tic->extras.entries++; 215 return 1; 216} 217 218static void 219_ti_encode_buf(char **cap, const TBUF *buf) 220{ 221 if (buf->entries == 0) { 222 _ti_encode_16(cap, 0); 223 } else { 224 _ti_encode_16(cap, buf->bufpos + sizeof(uint16_t)); 225 _ti_encode_16(cap, buf->entries); 226 _ti_encode_str(cap, buf->buf, buf->bufpos); 227 } 228} 229 230ssize_t 231_ti_flatten(uint8_t **buf, const TIC *tic) 232{ 233 size_t buflen, len, alen, dlen; 234 char *cap; 235 236 _DIAGASSERT(buf != NULL); 237 _DIAGASSERT(tic != NULL); 238 239 len = strlen(tic->name) + 1; 240 if (tic->alias == NULL) 241 alen = 0; 242 else 243 alen = strlen(tic->alias) + 1; 244 if (tic->desc == NULL) 245 dlen = 0; 246 else 247 dlen = strlen(tic->desc) + 1; 248 249 buflen = sizeof(char) + 250 sizeof(uint16_t) + len + 251 sizeof(uint16_t) + alen + 252 sizeof(uint16_t) + dlen + 253 (sizeof(uint16_t) * 2) + tic->flags.bufpos + 254 (sizeof(uint16_t) * 2) + tic->nums.bufpos + 255 (sizeof(uint16_t) * 2) + tic->strs.bufpos + 256 (sizeof(uint16_t) * 2) + tic->extras.bufpos; 257 258 *buf = malloc(buflen); 259 if (*buf == NULL) 260 return -1; 261 262 cap = (char *)*buf; 263 *cap++ = tic->rtype; 264 265 _ti_encode_count_str(&cap, tic->name, len); 266 _ti_encode_count_str(&cap, tic->alias, alen); 267 _ti_encode_count_str(&cap, tic->desc, dlen); 268 269 _ti_encode_buf(&cap, &tic->flags); 270 271 _ti_encode_buf(&cap, &tic->nums); 272 _ti_encode_buf(&cap, &tic->strs); 273 _ti_encode_buf(&cap, &tic->extras); 274 275 return (uint8_t *)cap - *buf; 276} 277 278static int 279encode_string(const char *term, const char *cap, TBUF *tbuf, const char *str, 280 int flags) 281{ 282 int slash, i, num; 283 char ch, *p, *s, last; 284 285 if (_ti_grow_tbuf(tbuf, strlen(str) + 1) == NULL) 286 return -1; 287 p = s = tbuf->buf + tbuf->bufpos; 288 slash = 0; 289 last = '\0'; 290 /* Convert escape codes */ 291 while ((ch = *str++) != '\0') { 292 if (ch == '\n') { 293 /* Following a newline, strip leading whitespace from 294 * capability strings. */ 295 while (isspace((unsigned char)*str)) 296 str++; 297 continue; 298 } 299 if (slash == 0 && ch == '\\') { 300 slash = 1; 301 continue; 302 } 303 if (slash == 0) { 304 if (last != '%' && ch == '^') { 305 ch = *str++; 306 if (((unsigned char)ch) >= 128) 307 dowarn(flags, 308 "%s: %s: illegal ^ character", 309 term, cap); 310 if (ch == '\0') 311 break; 312 if (ch == '?') 313 ch = '\177'; 314 else if ((ch &= 037) == 0) 315 ch = (char)128; 316 } else if (!isprint((unsigned char)ch)) 317 dowarn(flags, 318 "%s: %s: unprintable character", 319 term, cap); 320 *p++ = ch; 321 last = ch; 322 continue; 323 } 324 slash = 0; 325 if (ch >= '0' && ch <= '7') { 326 num = ch - '0'; 327 for (i = 0; i < 2; i++) { 328 if (*str < '0' || *str > '7') { 329 if (isdigit((unsigned char)*str)) 330 dowarn(flags, 331 "%s: %s: non octal" 332 " digit", term, cap); 333 else 334 break; 335 } 336 num = num * 8 + *str++ - '0'; 337 } 338 if (num == 0) 339 num = 0200; 340 *p++ = (char)num; 341 continue; 342 } 343 switch (ch) { 344 case 'a': 345 *p++ = '\a'; 346 break; 347 case 'b': 348 *p++ = '\b'; 349 break; 350 case 'e': /* FALLTHROUGH */ 351 case 'E': 352 *p++ = '\033'; 353 break; 354 case 'f': 355 *p++ = '\014'; 356 break; 357 case 'l': /* FALLTHROUGH */ 358 case 'n': 359 *p++ = '\n'; 360 break; 361 case 'r': 362 *p++ = '\r'; 363 break; 364 case 's': 365 *p++ = ' '; 366 break; 367 case 't': 368 *p++ = '\t'; 369 break; 370 default: 371 /* We should warn here */ 372 case '^': 373 case ',': 374 case ':': 375 case '|': 376 *p++ = ch; 377 break; 378 } 379 last = ch; 380 } 381 *p++ = '\0'; 382 tbuf->bufpos += (size_t)(p - s); 383 return 0; 384} 385 386char * 387_ti_get_token(char **cap, char sep) 388{ 389 char esc, *token; 390 391 while (isspace((unsigned char)**cap)) 392 (*cap)++; 393 if (**cap == '\0') 394 return NULL; 395 396 /* We can't use stresep(3) as ^ we need two escape chars */ 397 esc = '\0'; 398 for (token = *cap; 399 **cap != '\0' && (esc != '\0' || **cap != sep); 400 (*cap)++) 401 { 402 if (esc == '\0') { 403 if (**cap == '\\' || **cap == '^') 404 esc = **cap; 405 } else { 406 /* termcap /E/ is valid */ 407 if (sep == ':' && esc == '\\' && **cap == 'E') 408 esc = 'x'; 409 else 410 esc = '\0'; 411 } 412 } 413 414 if (**cap != '\0') 415 *(*cap)++ = '\0'; 416 417 return token; 418} 419 420static int 421_ti_find_rtype(const char *cap) 422{ 423 const char *ptr; 424 425 for (ptr = cap; (ptr = strchr(ptr, '#')) != NULL;) { 426 if (strtol(++ptr, NULL, 0) > SHRT_MAX) { 427 return TERMINFO_RTYPE; 428 } 429 } 430 return TERMINFO_RTYPE_O1; 431} 432 433int 434_ti_encode_buf_id_num(TBUF *tbuf, int ind, int num, size_t len) 435{ 436 if (!_ti_grow_tbuf(tbuf, sizeof(uint16_t) + len)) 437 return 0; 438 _ti_encode_buf_16(tbuf, ind); 439 if (len == sizeof(uint32_t)) 440 _ti_encode_buf_32(tbuf, num); 441 else 442 _ti_encode_buf_16(tbuf, num); 443 tbuf->entries++; 444 return 1; 445} 446 447int 448_ti_encode_buf_id_count_str(TBUF *tbuf, int ind, const void *buf, size_t len) 449{ 450 if (!_ti_grow_tbuf(tbuf, 2 * sizeof(uint16_t) + len)) 451 return 0; 452 _ti_encode_buf_16(tbuf, ind); 453 _ti_encode_buf_count_str(tbuf, buf, len); 454 tbuf->entries++; 455 return 1; 456} 457 458int 459_ti_encode_buf_id_flags(TBUF *tbuf, int ind, int flag) 460{ 461 if (!_ti_grow_tbuf(tbuf, sizeof(uint16_t) + 1)) 462 return 0; 463 _ti_encode_buf_16(tbuf, ind); 464 tbuf->buf[tbuf->bufpos++] = flag; 465 tbuf->entries++; 466 return 1; 467} 468 469TIC * 470_ti_compile(char *cap, int flags) 471{ 472 char *token, *p, *e, *name, *desc, *alias; 473 signed char flag; 474 long cnum; 475 short ind; 476 int num; 477 size_t len; 478 TBUF buf; 479 TIC *tic; 480 481 _DIAGASSERT(cap != NULL); 482 483 name = _ti_get_token(&cap, ','); 484 if (name == NULL) { 485 dowarn(flags, "no separator found: %s", cap); 486 return NULL; 487 } 488 desc = strrchr(name, '|'); 489 if (desc != NULL) 490 *desc++ = '\0'; 491 alias = strchr(name, '|'); 492 if (alias != NULL) 493 *alias++ = '\0'; 494 495 if (strlen(name) > UINT16_MAX - 1) { 496 dowarn(flags, "%s: name too long", name); 497 return NULL; 498 } 499 if (desc != NULL && strlen(desc) > UINT16_MAX - 1) { 500 dowarn(flags, "%s: description too long: %s", name, desc); 501 return NULL; 502 } 503 if (alias != NULL && strlen(alias) > UINT16_MAX - 1) { 504 dowarn(flags, "%s: alias too long: %s", name, alias); 505 return NULL; 506 } 507 508 tic = calloc(sizeof(*tic), 1); 509 if (tic == NULL) 510 return NULL; 511 512 tic->rtype = (flags & TIC_COMPAT_V1) ? TERMINFO_RTYPE_O1 : 513 _ti_find_rtype(cap); 514 buf.buf = NULL; 515 buf.buflen = 0; 516 517 tic->name = _ti_getname(tic->rtype, name); 518 if (tic->name == NULL) 519 goto error; 520 if (alias != NULL && flags & TIC_ALIAS) { 521 tic->alias = _ti_getname(tic->rtype, alias); 522 if (tic->alias == NULL) 523 goto error; 524 } 525 if (desc != NULL && flags & TIC_DESCRIPTION) { 526 tic->desc = strdup(desc); 527 if (tic->desc == NULL) 528 goto error; 529 } 530 531 for (token = _ti_get_token(&cap, ','); 532 token != NULL && *token != '\0'; 533 token = _ti_get_token(&cap, ',')) 534 { 535 /* Skip commented caps */ 536 if (!(flags & TIC_COMMENT) && token[0] == '.') 537 continue; 538 539 /* Obsolete entries */ 540 if (token[0] == 'O' && token[1] == 'T') { 541 if (!(flags & TIC_EXTRA)) 542 continue; 543 token += 2; 544 } 545 546 /* str cap */ 547 p = strchr(token, '='); 548 if (p != NULL) { 549 *p++ = '\0'; 550 /* Don't use the string if we already have it */ 551 ind = (short)_ti_strindex(token); 552 if (ind != -1 && 553 _ti_find_cap(tic, &tic->strs, 's', ind) != NULL) 554 continue; 555 556 /* Encode the string to our scratch buffer */ 557 buf.bufpos = 0; 558 if (encode_string(tic->name, token, 559 &buf, p, flags) == -1) 560 goto error; 561 if (buf.bufpos > UINT16_MAX - 1) { 562 dowarn(flags, "%s: %s: string is too long", 563 tic->name, token); 564 continue; 565 } 566 if (!VALID_STRING(buf.buf)) { 567 dowarn(flags, "%s: %s: invalid string", 568 tic->name, token); 569 continue; 570 } 571 572 if (ind == -1) { 573 if (!_ti_store_extra(tic, 1, token, 's', -1, -2, 574 buf.buf, buf.bufpos, flags)) 575 goto error; 576 } else { 577 if (!_ti_encode_buf_id_count_str(&tic->strs, 578 ind, buf.buf, buf.bufpos)) 579 goto error; 580 } 581 continue; 582 } 583 584 /* num cap */ 585 p = strchr(token, '#'); 586 if (p != NULL) { 587 *p++ = '\0'; 588 /* Don't use the number if we already have it */ 589 ind = (short)_ti_numindex(token); 590 if (ind != -1 && 591 _ti_find_cap(tic, &tic->nums, 'n', ind) != NULL) 592 continue; 593 594 cnum = strtol(p, &e, 0); 595 if (*e != '\0') { 596 dowarn(flags, "%s: %s: not a number", 597 tic->name, token); 598 continue; 599 } 600 if (!VALID_NUMERIC(cnum) || cnum > INT32_MAX) { 601 dowarn(flags, "%s: %s: number %ld out of range", 602 tic->name, token, cnum); 603 continue; 604 } 605 606 num = (int)cnum; 607 if (ind == -1) { 608 if (!_ti_store_extra(tic, 1, token, 'n', -1, 609 num, NULL, 0, flags)) 610 goto error; 611 } else { 612 if (!_ti_encode_buf_id_num(&tic->nums, 613 ind, num, _ti_numsize(tic))) 614 goto error; 615 } 616 continue; 617 } 618 619 flag = 1; 620 len = strlen(token) - 1; 621 if (token[len] == '@') { 622 flag = CANCELLED_BOOLEAN; 623 token[len] = '\0'; 624 } 625 ind = (short)_ti_flagindex(token); 626 if (ind == -1 && flag == CANCELLED_BOOLEAN) { 627 if ((ind = (short)_ti_numindex(token)) != -1) { 628 if (_ti_find_cap(tic, &tic->nums, 'n', ind) 629 != NULL) 630 continue; 631 if (!_ti_encode_buf_id_num(&tic->nums, ind, 632 CANCELLED_NUMERIC, _ti_numsize(tic))) 633 goto error; 634 continue; 635 } else if ((ind = (short)_ti_strindex(token)) != -1) { 636 if (_ti_find_cap(tic, &tic->strs, 's', ind) 637 != NULL) 638 continue; 639 if (!_ti_encode_buf_id_num( 640 &tic->strs, ind, 0, sizeof(uint16_t))) 641 goto error; 642 continue; 643 } 644 } 645 if (ind == -1) { 646 if (!_ti_store_extra(tic, 1, token, 'f', flag, 0, NULL, 647 0, flags)) 648 goto error; 649 } else if (_ti_find_cap(tic, &tic->flags, 'f', ind) == NULL) { 650 if (!_ti_encode_buf_id_flags(&tic->flags, ind, flag)) 651 goto error; 652 } 653 } 654 655 free(buf.buf); 656 return tic; 657 658error: 659 free(buf.buf); 660 _ti_freetic(tic); 661 return NULL; 662} 663 664void 665_ti_freetic(TIC *tic) 666{ 667 668 if (tic != NULL) { 669 free(tic->name); 670 free(tic->alias); 671 free(tic->desc); 672 free(tic->extras.buf); 673 free(tic->flags.buf); 674 free(tic->nums.buf); 675 free(tic->strs.buf); 676 free(tic); 677 } 678} 679