1/* $NetBSD: gnum4.c,v 1.13 2023/05/24 22:14:31 christos Exp $ */ 2/* $OpenBSD: gnum4.c,v 1.39 2008/08/21 21:01:04 espie Exp $ */ 3 4/* 5 * Copyright (c) 1999 Marc Espie 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/* 30 * functions needed to support gnu-m4 extensions, including a fake freezing 31 */ 32#if HAVE_NBTOOL_CONFIG_H 33#include "nbtool_config.h" 34#endif 35#include <sys/cdefs.h> 36__RCSID("$NetBSD: gnum4.c,v 1.13 2023/05/24 22:14:31 christos Exp $"); 37 38#include <sys/param.h> 39#include <sys/types.h> 40#include <sys/wait.h> 41#include <ctype.h> 42#include <err.h> 43#include <paths.h> 44#include <regex.h> 45#include <stddef.h> 46#include <stdlib.h> 47#include <stdio.h> 48#include <string.h> 49#include <errno.h> 50#include <unistd.h> 51#include "mdef.h" 52#include "stdd.h" 53#include "extern.h" 54 55 56int mimic_gnu = 0; 57#ifndef SIZE_T_MAX 58#define SIZE_T_MAX (size_t)~0ull 59#endif 60 61/* 62 * Support for include path search 63 * First search in the current directory. 64 * If not found, and the path is not absolute, include path kicks in. 65 * First, -I options, in the order found on the command line. 66 * Then M4PATH env variable 67 */ 68 69struct path_entry { 70 char *name; 71 struct path_entry *next; 72} *first, *last; 73 74static struct path_entry *new_path_entry(const char *); 75static void ensure_m4path(void); 76static struct input_file *dopath(struct input_file *, const char *); 77 78static struct path_entry * 79new_path_entry(const char *dirname) 80{ 81 struct path_entry *n; 82 83 n = malloc(sizeof(struct path_entry)); 84 if (!n) 85 errx(1, "out of memory"); 86 n->name = strdup(dirname); 87 if (!n->name) 88 errx(1, "out of memory"); 89 n->next = 0; 90 return n; 91} 92 93void 94addtoincludepath(const char *dirname) 95{ 96 struct path_entry *n; 97 98 n = new_path_entry(dirname); 99 100 if (last) { 101 last->next = n; 102 last = n; 103 } 104 else 105 last = first = n; 106} 107 108static void 109ensure_m4path(void) 110{ 111 static int envpathdone = 0; 112 char *envpath; 113 char *sweep; 114 char *path; 115 116 if (envpathdone) 117 return; 118 envpathdone = TRUE; 119 envpath = getenv("M4PATH"); 120 if (!envpath) 121 return; 122 /* for portability: getenv result is read-only */ 123 envpath = strdup(envpath); 124 if (!envpath) 125 errx(1, "out of memory"); 126 for (sweep = envpath; 127 (path = strsep(&sweep, ":")) != NULL;) 128 addtoincludepath(path); 129 free(envpath); 130} 131 132static 133struct input_file * 134dopath(struct input_file *i, const char *filename) 135{ 136 char path[MAXPATHLEN]; 137 struct path_entry *pe; 138 FILE *f; 139 140 for (pe = first; pe; pe = pe->next) { 141 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 142 if ((f = fopen(path, "r")) != 0) { 143 set_input(i, f, path); 144 return i; 145 } 146 } 147 return NULL; 148} 149 150struct input_file * 151fopen_trypath(struct input_file *i, const char *filename) 152{ 153 FILE *f; 154 155 f = fopen(filename, "r"); 156 if (f != NULL) { 157 set_input(i, f, filename); 158 return i; 159 } 160 if (filename[0] == '/') 161 return NULL; 162 163 ensure_m4path(); 164 165 return dopath(i, filename); 166} 167 168void 169doindir(const char *argv[], int argc) 170{ 171 ndptr n; 172 struct macro_definition *p; 173 174 n = lookup(argv[2]); 175 if (n == NULL || (p = macro_getdef(n)) == NULL) 176 m4errx(1, "indir: undefined macro %s.", argv[2]); 177 argv[1] = p->defn; 178 179 eval(argv+1, argc-1, p->type, is_traced(n)); 180} 181 182void 183dobuiltin(const char *argv[], int argc) 184{ 185 ndptr p; 186 187 argv[1] = NULL; 188 p = macro_getbuiltin(argv[2]); 189 if (p != NULL) 190 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 191 else 192 m4errx(1, "unknown builtin %s.", argv[2]); 193} 194 195 196/* We need some temporary buffer space, as pb pushes BACK and substitution 197 * proceeds forward... */ 198static char *buffer; 199static size_t bufsize = 0; 200static size_t current = 0; 201 202static void addchars(const char *, size_t); 203static void addchar(int); 204static char *twiddle(const char *); 205static char *getstring(void); 206static void exit_regerror(int, const char *, regex_t *) __dead; 207static void do_subst(const char *, const char *, regex_t *, const char *, 208 regmatch_t *); 209static void do_regexpindex(const char *, const char *, regex_t *, regmatch_t *); 210static void do_regexp(const char *, const char *, regex_t *, const char *, regmatch_t *); 211static void add_sub(size_t, const char *, regex_t *, regmatch_t *); 212static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 213#define addconstantstring(s) addchars((s), sizeof(s)-1) 214 215static void 216addchars(const char *c, size_t n) 217{ 218 if (n == 0) 219 return; 220 while (current + n > bufsize) { 221 if (bufsize == 0) 222 bufsize = 1024; 223 else 224 bufsize *= 2; 225 buffer = xrealloc(buffer, bufsize, NULL); 226 } 227 memcpy(buffer+current, c, n); 228 current += n; 229} 230 231static void 232addchar(int c) 233{ 234 if (current +1 > bufsize) { 235 if (bufsize == 0) 236 bufsize = 1024; 237 else 238 bufsize *= 2; 239 buffer = xrealloc(buffer, bufsize, NULL); 240 } 241 buffer[current++] = c; 242} 243 244static char * 245getstring(void) 246{ 247 addchar('\0'); 248 current = 0; 249 return buffer; 250} 251 252 253static void 254exit_regerror(int er, const char *pat, regex_t *re) 255{ 256 size_t errlen; 257 char *errbuf; 258 259 errlen = regerror(er, re, NULL, 0); 260 errbuf = xalloc(errlen, 261 "malloc in regerror: %lu", (unsigned long)errlen); 262 regerror(er, re, errbuf, errlen); 263 m4errx(1, "regular expression error: %s for: `%s'", errbuf, pat); 264} 265 266static void 267add_sub(size_t n, const char *string, regex_t *re, regmatch_t *pm) 268{ 269 if (n > re->re_nsub) { 270 if (!quiet) 271 warnx("No subexpression %zu", n); 272 if (fatal_warnings) 273 exit(EXIT_FAILURE); 274 } 275 /* Subexpressions that did not match are 276 * not an error. */ 277 else if (pm[n].rm_so != -1 && 278 pm[n].rm_eo != -1) { 279 addchars(string + pm[n].rm_so, 280 pm[n].rm_eo - pm[n].rm_so); 281 } 282} 283 284/* Add replacement string to the output buffer, recognizing special 285 * constructs and replacing them with substrings of the original string. 286 */ 287static void 288add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 289{ 290 const char *p; 291 292 for (p = replace; *p != '\0'; p++) { 293 if (*p == '&' && !mimic_gnu) { 294 add_sub(0, string, re, pm); 295 continue; 296 } 297 if (*p == '\\') { 298 if (p[1] == '\\') { 299 addchar(p[1]); 300 p++; 301 continue; 302 } 303 if (p[1] == '&') { 304 if (mimic_gnu) 305 add_sub(0, string, re, pm); 306 else 307 addchar(p[1]); 308 p++; 309 continue; 310 } 311 if (isdigit((unsigned char)p[1])) { 312 add_sub(*(++p) - '0', string, re, pm); 313 continue; 314 } 315 } 316 addchar(*p); 317 } 318} 319 320static void 321do_subst(const char *pat, const char *string, regex_t *re, const char *replace, 322 regmatch_t *pm) 323{ 324 int error; 325 int flags = 0; 326 const char *last_match = NULL; 327 328 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 329 if (pm[0].rm_eo != 0) { 330 if (string[pm[0].rm_eo-1] == '\n') 331 flags = 0; 332 else 333 flags = REG_NOTBOL; 334 } 335 336 /* NULL length matches are special... We use the `vi-mode' 337 * rule: don't allow a NULL-match at the last match 338 * position. 339 */ 340 if (pm[0].rm_so == pm[0].rm_eo && 341 string + pm[0].rm_so == last_match) { 342 if (*string == '\0') 343 return; 344 addchar(*string); 345 if (*string++ == '\n') 346 flags = 0; 347 else 348 flags = REG_NOTBOL; 349 continue; 350 } 351 last_match = string + pm[0].rm_so; 352 addchars(string, pm[0].rm_so); 353 add_replace(string, re, replace, pm); 354 string += pm[0].rm_eo; 355 buffer[current] = '\0'; 356 } 357 while (*string) 358 addchar(*string++); 359 if (error != REG_NOMATCH) 360 exit_regerror(error, pat, re); 361 pbstr(string); 362} 363 364static void 365do_regexp(const char *pat, const char *string, regex_t *re, const char *replace, 366 regmatch_t *pm) 367{ 368 int error; 369 370 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 371 case 0: 372 add_replace(string, re, replace, pm); 373 pbstr(getstring()); 374 break; 375 case REG_NOMATCH: 376 break; 377 default: 378 exit_regerror(error, pat, re); 379 } 380} 381 382static void 383do_regexpindex(const char *pat, const char *string, regex_t *re, regmatch_t *pm) 384{ 385 int error; 386 387 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 388 case 0: 389 pbunsigned(pm[0].rm_so); 390 break; 391 case REG_NOMATCH: 392 pbnum(-1); 393 break; 394 default: 395 exit_regerror(error, pat, re); 396 } 397} 398 399/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 400 * says. So we twiddle with the regexp before passing it to regcomp. 401 */ 402static char * 403twiddle(const char *p) 404{ 405 /* + at start of regexp is a normal character for Gnu m4 */ 406 if (*p == '^') { 407 addchar(*p); 408 p++; 409 } 410 if (*p == '+') { 411 addchar('\\'); 412 } 413 /* This could use strcspn for speed... */ 414 while (*p != '\0') { 415 if (*p == '\\') { 416 switch(p[1]) { 417 case '(': 418 case ')': 419 case '|': 420 addchar(p[1]); 421 break; 422 case 'w': 423 addconstantstring("[_a-zA-Z0-9]"); 424 break; 425 case 'W': 426 addconstantstring("[^_a-zA-Z0-9]"); 427 break; 428 case '<': 429 addconstantstring("[[:<:]]"); 430 break; 431 case '>': 432 addconstantstring("[[:>:]]"); 433 break; 434 default: 435 addchars(p, 2); 436 break; 437 } 438 p+=2; 439 continue; 440 } 441 if (strchr("()|{}", *p) != NULL) 442 addchar('\\'); 443 444 addchar(*p); 445 p++; 446 } 447 return getstring(); 448} 449 450static int 451checkempty(const char *argv[], int argc) 452{ 453 const char *s; 454 size_t len; 455 456 if (argc != 3 && argv[3][0] != '\0') 457 return 0; 458 459 if (argc == 3) { 460 if (!quiet) 461 warnx("Too few arguments to patsubst"); 462 if (fatal_warnings) 463 exit(EXIT_FAILURE); 464 } 465 466 if (argv[4] && argc > 4) 467 len = strlen(argv[4]); 468 else 469 len = 0; 470 for (s = argv[2]; *s != '\0'; s++) { 471 addchars(argv[4], len); 472 addchar(*s); 473 } 474 return 1; 475} 476 477/* patsubst(string, regexp, opt replacement) */ 478/* argv[2]: string 479 * argv[3]: regexp 480 * argv[4]: opt rep 481 */ 482void 483dopatsubst(const char *argv[], int argc) 484{ 485 if (argc < 3) { 486 if (!quiet) 487 warnx("Too few arguments to patsubst"); 488 if (fatal_warnings) 489 exit(EXIT_FAILURE); 490 return; 491 } 492 /* special case: empty regexp */ 493 if (!checkempty(argv, argc)) { 494 495 const char *pat; 496 int error; 497 regex_t re; 498 regmatch_t *pmatch; 499 int mode = REG_EXTENDED; 500 size_t l = strlen(argv[3]); 501 502 if (!mimic_gnu || 503 (argv[3][0] == '^') || 504 (l > 0 && argv[3][l-1] == '$')) 505 mode |= REG_NEWLINE; 506 507 pat = mimic_gnu ? twiddle(argv[3]) : argv[3]; 508 error = regcomp(&re, pat, mode); 509 if (error != 0) 510 exit_regerror(error, pat, &re); 511 512 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 513 do_subst(pat, argv[2], &re, 514 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 515 free(pmatch); 516 regfree(&re); 517 } 518 pbstr(getstring()); 519} 520 521void 522doregexp(const char *argv[], int argc) 523{ 524 int error; 525 regex_t re; 526 regmatch_t *pmatch; 527 const char *pat; 528 529 if (argc < 3) { 530 if (!quiet) 531 warnx("Too few arguments to regexp"); 532 if (fatal_warnings) 533 exit(EXIT_FAILURE); 534 return; 535 } 536 if (checkempty(argv, argc)) { 537 return; 538 } 539 540 pat = mimic_gnu ? twiddle(argv[3]) : argv[3]; 541 error = regcomp(&re, pat, REG_EXTENDED); 542 if (error != 0) 543 exit_regerror(error, pat, &re); 544 545 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 546 if (argv[4] == NULL || argc == 4) 547 do_regexpindex(pat, argv[2], &re, pmatch); 548 else 549 do_regexp(pat, argv[2], &re, argv[4], pmatch); 550 free(pmatch); 551 regfree(&re); 552} 553 554void 555doformat(const char *argv[], int argc) 556{ 557 const char *format = argv[2]; 558 int pos = 3; 559 int left_padded; 560 long width; 561 size_t l; 562 const char *thisarg; 563 char temp[2]; 564 size_t extra; 565 566 while (*format != 0) { 567 if (*format != '%') { 568 addchar(*format++); 569 continue; 570 } 571 572 format++; 573 if (*format == '%') { 574 addchar(*format++); 575 continue; 576 } 577 if (*format == 0) { 578 addchar('%'); 579 break; 580 } 581 582 if (*format == '*') { 583 format++; 584 if (pos >= argc) 585 m4errx(1, 586 "Format with too many format specifiers."); 587 width = strtol(argv[pos++], NULL, 10); 588 } else { 589 char *eformat; 590 width = strtol(format, &eformat, 10); 591 format = eformat; 592 } 593 if (width < 0) { 594 left_padded = 1; 595 width = -width; 596 } else { 597 left_padded = 0; 598 } 599 if (*format == '.') { 600 format++; 601 if (*format == '*') { 602 format++; 603 if (pos >= argc) 604 m4errx(1, 605 "Format with too many format specifiers."); 606 extra = strtol(argv[pos++], NULL, 10); 607 } else { 608 char *eformat; 609 extra = strtol(format, &eformat, 10); 610 format = eformat; 611 } 612 } else { 613 extra = SIZE_T_MAX; 614 } 615 if (pos >= argc) 616 m4errx(1, "Format with too many format specifiers."); 617 switch(*format) { 618 case 's': 619 thisarg = argv[pos++]; 620 break; 621 case 'c': 622 temp[0] = strtoul(argv[pos++], NULL, 10); 623 temp[1] = 0; 624 thisarg = temp; 625 break; 626 default: 627 m4errx(1, "Unsupported format specification: %s.", 628 argv[2]); 629 } 630 format++; 631 l = strlen(thisarg); 632 if (l > extra) 633 l = extra; 634 if (!left_padded) { 635 while (l < (size_t)width--) 636 addchar(' '); 637 } 638 addchars(thisarg, l); 639 if (left_padded) { 640 while (l < (size_t)width--) 641 addchar(' '); 642 } 643 } 644 pbstr(getstring()); 645} 646 647void 648doesyscmd(const char *cmd) 649{ 650 int p[2]; 651 pid_t pid, cpid; 652 const char *argv[4]; 653 int cc; 654 int status; 655 656 /* Follow gnu m4 documentation: first flush buffers. */ 657 fflush(NULL); 658 659 argv[0] = "sh"; 660 argv[1] = "-c"; 661 argv[2] = cmd; 662 argv[3] = NULL; 663 664 /* Just set up standard output, share stderr and stdin with m4 */ 665 if (pipe(p) == -1) 666 err(1, "bad pipe"); 667 switch(cpid = fork()) { 668 case -1: 669 err(1, "bad fork"); 670 /* NOTREACHED */ 671 case 0: 672 (void) close(p[0]); 673 (void) dup2(p[1], 1); 674 (void) close(p[1]); 675 execv(_PATH_BSHELL, __UNCONST(argv)); 676 exit(1); 677 default: 678 /* Read result in two stages, since m4's buffer is 679 * pushback-only. */ 680 (void) close(p[1]); 681 do { 682 char result[BUFSIZE]; 683 cc = read(p[0], result, sizeof result); 684 if (cc > 0) 685 addchars(result, cc); 686 } while (cc > 0 || (cc == -1 && errno == EINTR)); 687 688 (void) close(p[0]); 689 while ((pid = wait(&status)) != cpid && pid >= 0) 690 continue; 691 pbstr(getstring()); 692 } 693} 694 695void 696getdivfile(const char *name) 697{ 698 FILE *f; 699 int c; 700 701 f = fopen(name, "r"); 702 if (!f) 703 return; 704 705 while ((c = getc(f))!= EOF) 706 putc(c, active); 707 (void) fclose(f); 708} 709 710#ifdef REAL_FREEZE 711void 712freeze_state(const char *fname) 713{ 714 FILE *f; 715 716 if ((f = fopen(fname, "wb")) == NULL) 717 m4errx(EXIT_FAILURE, "Can't open output freeze file `%s' (%s)", 718 fname, strerror(errno)); 719 fprintf(f, "# This is a frozen state file generated by %s\nV1\n", 720 getprogname()); 721 fprintf(f, "Q%zu,%zu\n%s%s\n", strlen(lquote), strlen(rquote), 722 lquote, rquote); 723 fprintf(f, "C%zu,%zu\n%s%s\n", strlen(scommt), strlen(ecommt), 724 scommt, ecommt); 725 dump_state(f); 726 /* XXX: diversions? */ 727 fprintf(f, "D-1,0\n"); 728 fprintf(f, "# End of frozen state file\n"); 729 fclose(f); 730} 731 732void 733thaw_state(const char *fname) 734{ 735 char *name = NULL; 736 size_t nl, namelen = 0; 737 char *defn = NULL; 738 size_t dl, defnlen = 0; 739 size_t lineno = 0; 740 char line[1024], *ptr, type; 741 FILE *f; 742 743 if ((f = fopen(fname, "rb")) == NULL) 744 m4errx(EXIT_FAILURE, "Can't open frozen file `%s' (%s)", 745 fname, strerror(errno)); 746 747#define GET() if (fgets(line, (int)sizeof(line), f) == NULL) goto out 748#define GETSTR(s, l) if (fread(s, 1, l, f) != l) goto out; else s[l] = '\0' 749 750 GET(); /* comment */ 751 GET(); /* version */ 752 if ((ptr = strrchr(line, '\n')) != NULL) 753 *ptr = '\0'; 754 if (strcmp(line, "V1") != 0) 755 m4errx(EXIT_FAILURE, "Bad frozen version `%s'", line); 756 757 for (;;) { 758 GET(); 759 lineno++; 760 switch (*line) { 761 case '\n': 762 continue; 763 case '#': 764 free(name); 765 free(defn); 766 fclose(f); 767 return; 768 default: 769 if (sscanf(line, "%c%zu,%zu\n", &type, &nl, &dl) != 3) 770 m4errx(EXIT_FAILURE, "%s, %zu: Bad line `%s'", 771 fname, lineno, line); 772 break; 773 } 774 775 switch (type) { 776 case 'Q': 777 if (nl >= sizeof(lquote) || dl >= sizeof(rquote)) 778 m4errx(EXIT_FAILURE, "%s, %zu: Quote too long", 779 fname, lineno); 780 GETSTR(lquote, nl); 781 GETSTR(rquote, dl); 782 break; 783 784 case 'C': 785 if (nl >= sizeof(scommt) || dl >= sizeof(ecommt)) 786 m4errx(EXIT_FAILURE, "%s, %zu: Comment too long", 787 fname, lineno); 788 GETSTR(scommt, nl); 789 GETSTR(ecommt, dl); 790 break; 791 792 case 'T': 793 case 'F': 794 if (nl >= namelen) 795 name = xrealloc(name, namelen = nl + 1, 796 "name grow"); 797 if (dl >= defnlen) 798 defn = xrealloc(defn, defnlen = dl + 1, 799 "defn grow"); 800 GETSTR(name, nl); 801 GETSTR(defn, dl); 802 macro_pushdef(name, defn); 803 break; 804 805 case 'D': 806 /* XXX: Not implemented */ 807 break; 808 809 default: 810 m4errx(EXIT_FAILURE, "%s, %zu: Unknown type %c", 811 fname, lineno,type); 812 } 813 } 814out: 815 m4errx(EXIT_FAILURE, "Unexpected end of file in `%s'", fname); 816} 817#endif 818