gnum4.c revision 241777
1201360Srdivacky/* $OpenBSD: gnum4.c,v 1.42 2011/11/06 12:25:43 espie Exp $ */ 2201360Srdivacky 3201360Srdivacky/* 4201360Srdivacky * Copyright (c) 1999 Marc Espie 5201360Srdivacky * 6201360Srdivacky * Redistribution and use in source and binary forms, with or without 7201360Srdivacky * modification, are permitted provided that the following conditions 8201360Srdivacky * are met: 9201360Srdivacky * 1. Redistributions of source code must retain the above copyright 10201360Srdivacky * notice, this list of conditions and the following disclaimer. 11201360Srdivacky * 2. Redistributions in binary form must reproduce the above copyright 12201360Srdivacky * notice, this list of conditions and the following disclaimer in the 13201360Srdivacky * documentation and/or other materials provided with the distribution. 14201360Srdivacky * 15201360Srdivacky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16201360Srdivacky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17201360Srdivacky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18201360Srdivacky * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19201360Srdivacky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20252723Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21201360Srdivacky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22252723Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23252723Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24226890Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25201360Srdivacky * SUCH DAMAGE. 26201360Srdivacky */ 27201360Srdivacky#include <sys/cdefs.h> 28201360Srdivacky__FBSDID("$FreeBSD: head/usr.bin/m4/gnum4.c 241777 2012-10-20 10:33:15Z ed $"); 29201360Srdivacky 30201360Srdivacky/* 31201360Srdivacky * functions needed to support gnu-m4 extensions, including a fake freezing 32201360Srdivacky */ 33201360Srdivacky 34201360Srdivacky#include <sys/param.h> 35201360Srdivacky#include <sys/types.h> 36201360Srdivacky#include <sys/wait.h> 37201360Srdivacky#include <ctype.h> 38201360Srdivacky#include <err.h> 39201360Srdivacky#include <paths.h> 40201360Srdivacky#include <regex.h> 41201360Srdivacky#include <stddef.h> 42201360Srdivacky#include <stdlib.h> 43201360Srdivacky#include <stdio.h> 44201360Srdivacky#include <string.h> 45201360Srdivacky#include <errno.h> 46201360Srdivacky#include <unistd.h> 47201360Srdivacky#include "mdef.h" 48201360Srdivacky#include "stdd.h" 49201360Srdivacky#include "extern.h" 50201360Srdivacky 51235633Sdim 52235633Sdimint mimic_gnu = 0; 53201360Srdivacky 54201360Srdivacky/* 55221345Sdim * Support for include path search 56221345Sdim * First search in the current directory. 57210299Sed * If not found, and the path is not absolute, include path kicks in. 58210299Sed * First, -I options, in the order found on the command line. 59235633Sdim * Then M4PATH env variable 60235633Sdim */ 61221345Sdim 62221345Sdimstatic struct path_entry { 63221345Sdim char *name; 64221345Sdim struct path_entry *next; 65235633Sdim} *first, *last; 66235633Sdim 67235633Sdimstatic struct path_entry *new_path_entry(const char *); 68226890Sdimstatic void ensure_m4path(void); 69263509Sdimstatic struct input_file *dopath(struct input_file *, const char *); 70263509Sdim 71263509Sdimstatic struct path_entry * 72263509Sdimnew_path_entry(const char *dirname) 73263509Sdim{ 74263509Sdim struct path_entry *n; 75263509Sdim 76263509Sdim n = malloc(sizeof(struct path_entry)); 77263509Sdim if (!n) 78263509Sdim errx(1, "out of memory"); 79201360Srdivacky n->name = strdup(dirname); 80201360Srdivacky if (!n->name) 81201360Srdivacky errx(1, "out of memory"); 82201360Srdivacky n->next = 0; 83226890Sdim return n; 84224145Sdim} 85226890Sdim 86226890Sdimvoid 87235633Sdimaddtoincludepath(const char *dirname) 88201360Srdivacky{ 89201360Srdivacky struct path_entry *n; 90201360Srdivacky 91201360Srdivacky n = new_path_entry(dirname); 92201360Srdivacky 93201360Srdivacky if (last) { 94201360Srdivacky last->next = n; 95201360Srdivacky last = n; 96201360Srdivacky } 97201360Srdivacky else 98201360Srdivacky last = first = n; 99201360Srdivacky} 100201360Srdivacky 101201360Srdivackystatic void 102201360Srdivackyensure_m4path(void) 103201360Srdivacky{ 104201360Srdivacky static int envpathdone = 0; 105218893Sdim char *envpath; 106218893Sdim char *sweep; 107201360Srdivacky char *path; 108201360Srdivacky 109201360Srdivacky if (envpathdone) 110201360Srdivacky return; 111201360Srdivacky envpathdone = TRUE; 112201360Srdivacky envpath = getenv("M4PATH"); 113201360Srdivacky if (!envpath) 114201360Srdivacky return; 115201360Srdivacky /* for portability: getenv result is read-only */ 116201360Srdivacky envpath = strdup(envpath); 117201360Srdivacky if (!envpath) 118201360Srdivacky errx(1, "out of memory"); 119201360Srdivacky for (sweep = envpath; 120201360Srdivacky (path = strsep(&sweep, ":")) != NULL;) 121201360Srdivacky addtoincludepath(path); 122201360Srdivacky free(envpath); 123201360Srdivacky} 124201360Srdivacky 125221345Sdimstatic 126201360Srdivackystruct input_file * 127201360Srdivackydopath(struct input_file *i, const char *filename) 128201360Srdivacky{ 129201360Srdivacky char path[MAXPATHLEN]; 130201360Srdivacky struct path_entry *pe; 131201360Srdivacky FILE *f; 132201360Srdivacky 133201360Srdivacky for (pe = first; pe; pe = pe->next) { 134201360Srdivacky snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 135221345Sdim if ((f = fopen(path, "r")) != 0) { 136221345Sdim set_input(i, f, path); 137221345Sdim return i; 138245431Sdim } 139201360Srdivacky } 140201360Srdivacky return NULL; 141201360Srdivacky} 142201360Srdivacky 143201360Srdivackystruct input_file * 144201360Srdivackyfopen_trypath(struct input_file *i, const char *filename) 145201360Srdivacky{ 146201360Srdivacky FILE *f; 147201360Srdivacky 148201360Srdivacky f = fopen(filename, "r"); 149201360Srdivacky if (f != NULL) { 150201360Srdivacky set_input(i, f, filename); 151245431Sdim return i; 152201360Srdivacky } 153201360Srdivacky if (filename[0] == '/') 154201360Srdivacky return NULL; 155201360Srdivacky 156201360Srdivacky ensure_m4path(); 157201360Srdivacky 158201360Srdivacky return dopath(i, filename); 159201360Srdivacky} 160201360Srdivacky 161201360Srdivackyvoid 162201360Srdivackydoindir(const char *argv[], int argc) 163201360Srdivacky{ 164201360Srdivacky ndptr n; 165201360Srdivacky struct macro_definition *p = NULL; 166201360Srdivacky 167201360Srdivacky n = lookup(argv[2]); 168201360Srdivacky if (n == NULL || (p = macro_getdef(n)) == NULL) 169201360Srdivacky m4errx(1, "indir: undefined macro %s.", argv[2]); 170201360Srdivacky argv[1] = p->defn; 171201360Srdivacky 172201360Srdivacky eval(argv+1, argc-1, p->type, is_traced(n)); 173201360Srdivacky} 174201360Srdivacky 175201360Srdivackyvoid 176201360Srdivackydobuiltin(const char *argv[], int argc) 177201360Srdivacky{ 178201360Srdivacky ndptr p; 179201360Srdivacky 180201360Srdivacky argv[1] = NULL; 181201360Srdivacky p = macro_getbuiltin(argv[2]); 182201360Srdivacky if (p != NULL) 183201360Srdivacky eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 184201360Srdivacky else 185201360Srdivacky m4errx(1, "unknown builtin %s.", argv[2]); 186201360Srdivacky} 187221345Sdim 188221345Sdim 189263509Sdim/* We need some temporary buffer space, as pb pushes BACK and substitution 190263509Sdim * proceeds forward... */ 191201360Srdivackystatic char *buffer; 192201360Srdivackystatic size_t bufsize = 0; 193201360Srdivackystatic size_t current = 0; 194201360Srdivacky 195201360Srdivackystatic void addchars(const char *, size_t); 196201360Srdivackystatic void addchar(int); 197201360Srdivackystatic char *twiddle(const char *); 198201360Srdivackystatic char *getstring(void); 199201360Srdivackystatic void exit_regerror(int, regex_t *); 200201360Srdivackystatic void do_subst(const char *, regex_t *, const char *, regmatch_t *); 201201360Srdivackystatic void do_regexpindex(const char *, regex_t *, regmatch_t *); 202201360Srdivackystatic void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 203201360Srdivackystatic void add_sub(int, const char *, regex_t *, regmatch_t *); 204201360Srdivackystatic void add_replace(const char *, regex_t *, const char *, regmatch_t *); 205201360Srdivacky#define addconstantstring(s) addchars((s), sizeof(s)-1) 206201360Srdivacky 207201360Srdivackystatic void 208201360Srdivackyaddchars(const char *c, size_t n) 209201360Srdivacky{ 210201360Srdivacky if (n == 0) 211201360Srdivacky return; 212201360Srdivacky while (current + n > bufsize) { 213201360Srdivacky if (bufsize == 0) 214245431Sdim bufsize = 1024; 215201360Srdivacky else 216201360Srdivacky bufsize *= 2; 217201360Srdivacky buffer = xrealloc(buffer, bufsize, NULL); 218201360Srdivacky } 219201360Srdivacky memcpy(buffer+current, c, n); 220201360Srdivacky current += n; 221201360Srdivacky} 222201360Srdivacky 223201360Srdivackystatic void 224201360Srdivackyaddchar(int c) 225201360Srdivacky{ 226201360Srdivacky if (current +1 > bufsize) { 227201360Srdivacky if (bufsize == 0) 228201360Srdivacky bufsize = 1024; 229201360Srdivacky else 230201360Srdivacky bufsize *= 2; 231201360Srdivacky buffer = xrealloc(buffer, bufsize, NULL); 232201360Srdivacky } 233201360Srdivacky buffer[current++] = c; 234201360Srdivacky} 235245431Sdim 236201360Srdivackystatic char * 237201360Srdivackygetstring(void) 238201360Srdivacky{ 239201360Srdivacky addchar('\0'); 240201360Srdivacky current = 0; 241201360Srdivacky return buffer; 242245431Sdim} 243201360Srdivacky 244201360Srdivacky 245201360Srdivackystatic void 246201360Srdivackyexit_regerror(int er, regex_t *re) 247201360Srdivacky{ 248201360Srdivacky size_t errlen; 249245431Sdim char *errbuf; 250245431Sdim 251245431Sdim errlen = regerror(er, re, NULL, 0); 252201360Srdivacky errbuf = xalloc(errlen, 253201360Srdivacky "malloc in regerror: %lu", (unsigned long)errlen); 254201360Srdivacky regerror(er, re, errbuf, errlen); 255201360Srdivacky m4errx(1, "regular expression error: %s.", errbuf); 256201360Srdivacky} 257201360Srdivacky 258201360Srdivackystatic void 259245431Sdimadd_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 260201360Srdivacky{ 261245431Sdim if (n > (int)re->re_nsub) 262201360Srdivacky warnx("No subexpression %d", n); 263245431Sdim /* Subexpressions that did not match are 264201360Srdivacky * not an error. */ 265201360Srdivacky else if (pm[n].rm_so != -1 && 266201360Srdivacky pm[n].rm_eo != -1) { 267201360Srdivacky addchars(string + pm[n].rm_so, 268201360Srdivacky pm[n].rm_eo - pm[n].rm_so); 269201360Srdivacky } 270201360Srdivacky} 271201360Srdivacky 272201360Srdivacky/* Add replacement string to the output buffer, recognizing special 273201360Srdivacky * constructs and replacing them with substrings of the original string. 274 */ 275static void 276add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 277{ 278 const char *p; 279 280 for (p = replace; *p != '\0'; p++) { 281 if (*p == '&' && !mimic_gnu) { 282 add_sub(0, string, re, pm); 283 continue; 284 } 285 if (*p == '\\') { 286 if (p[1] == '\\') { 287 addchar(p[1]); 288 p++; 289 continue; 290 } 291 if (p[1] == '&') { 292 if (mimic_gnu) 293 add_sub(0, string, re, pm); 294 else 295 addchar(p[1]); 296 p++; 297 continue; 298 } 299 if (isdigit(p[1])) { 300 add_sub(*(++p) - '0', string, re, pm); 301 continue; 302 } 303 } 304 addchar(*p); 305 } 306} 307 308static void 309do_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 310{ 311 int error; 312 int flags = 0; 313 const char *last_match = NULL; 314 315 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 316 if (pm[0].rm_eo != 0) { 317 if (string[pm[0].rm_eo-1] == '\n') 318 flags = 0; 319 else 320 flags = REG_NOTBOL; 321 } 322 323 /* NULL length matches are special... We use the `vi-mode' 324 * rule: don't allow a NULL-match at the last match 325 * position. 326 */ 327 if (pm[0].rm_so == pm[0].rm_eo && 328 string + pm[0].rm_so == last_match) { 329 if (*string == '\0') 330 return; 331 addchar(*string); 332 if (*string++ == '\n') 333 flags = 0; 334 else 335 flags = REG_NOTBOL; 336 continue; 337 } 338 last_match = string + pm[0].rm_so; 339 addchars(string, pm[0].rm_so); 340 add_replace(string, re, replace, pm); 341 string += pm[0].rm_eo; 342 } 343 if (error != REG_NOMATCH) 344 exit_regerror(error, re); 345 pbstr(string); 346} 347 348static void 349do_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 350{ 351 int error; 352 353 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 354 case 0: 355 add_replace(string, re, replace, pm); 356 pbstr(getstring()); 357 break; 358 case REG_NOMATCH: 359 break; 360 default: 361 exit_regerror(error, re); 362 } 363} 364 365static void 366do_regexpindex(const char *string, regex_t *re, regmatch_t *pm) 367{ 368 int error; 369 370 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 371 case 0: 372 pbunsigned(pm[0].rm_so); 373 break; 374 case REG_NOMATCH: 375 pbnum(-1); 376 break; 377 default: 378 exit_regerror(error, re); 379 } 380} 381 382/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 383 * says. So we twiddle with the regexp before passing it to regcomp. 384 */ 385static char * 386twiddle(const char *p) 387{ 388 /* + at start of regexp is a normal character for Gnu m4 */ 389 if (*p == '^') { 390 addchar(*p); 391 p++; 392 } 393 if (*p == '+') { 394 addchar('\\'); 395 } 396 /* This could use strcspn for speed... */ 397 while (*p != '\0') { 398 if (*p == '\\') { 399 switch(p[1]) { 400 case '(': 401 case ')': 402 case '|': 403 addchar(p[1]); 404 break; 405 case 'w': 406 addconstantstring("[_a-zA-Z0-9]"); 407 break; 408 case 'W': 409 addconstantstring("[^_a-zA-Z0-9]"); 410 break; 411 case '<': 412 addconstantstring("[[:<:]]"); 413 break; 414 case '>': 415 addconstantstring("[[:>:]]"); 416 break; 417 default: 418 addchars(p, 2); 419 break; 420 } 421 p+=2; 422 continue; 423 } 424 if (*p == '(' || *p == ')' || *p == '|') 425 addchar('\\'); 426 427 addchar(*p); 428 p++; 429 } 430 return getstring(); 431} 432 433/* patsubst(string, regexp, opt replacement) */ 434/* argv[2]: string 435 * argv[3]: regexp 436 * argv[4]: opt rep 437 */ 438void 439dopatsubst(const char *argv[], int argc) 440{ 441 if (argc <= 3) { 442 warnx("Too few arguments to patsubst"); 443 return; 444 } 445 /* special case: empty regexp */ 446 if (argv[3][0] == '\0') { 447 const char *s; 448 size_t len; 449 if (argc > 4 && argv[4]) 450 len = strlen(argv[4]); 451 else 452 len = 0; 453 for (s = argv[2]; *s != '\0'; s++) { 454 addchars(argv[4], len); 455 addchar(*s); 456 } 457 } else { 458 int error; 459 regex_t re; 460 regmatch_t *pmatch; 461 int mode = REG_EXTENDED; 462 size_t l = strlen(argv[3]); 463 464 if (!mimic_gnu || 465 (argv[3][0] == '^') || 466 (l > 0 && argv[3][l-1] == '$')) 467 mode |= REG_NEWLINE; 468 469 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 470 mode); 471 if (error != 0) 472 exit_regerror(error, &re); 473 474 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 475 do_subst(argv[2], &re, 476 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 477 free(pmatch); 478 regfree(&re); 479 } 480 pbstr(getstring()); 481} 482 483void 484doregexp(const char *argv[], int argc) 485{ 486 int error; 487 regex_t re; 488 regmatch_t *pmatch; 489 490 if (argc <= 3) { 491 warnx("Too few arguments to regexp"); 492 return; 493 } 494 /* special gnu case */ 495 if (argv[3][0] == '\0' && mimic_gnu) { 496 if (argc == 4 || argv[4] == NULL) 497 return; 498 else 499 pbstr(argv[4]); 500 } 501 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 502 REG_EXTENDED|REG_NEWLINE); 503 if (error != 0) 504 exit_regerror(error, &re); 505 506 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 507 if (argc == 4 || argv[4] == NULL) 508 do_regexpindex(argv[2], &re, pmatch); 509 else 510 do_regexp(argv[2], &re, argv[4], pmatch); 511 free(pmatch); 512 regfree(&re); 513} 514 515void 516doformat(const char *argv[], int argc) 517{ 518 const char *format = argv[2]; 519 int pos = 3; 520 int left_padded; 521 long width; 522 size_t l; 523 const char *thisarg = NULL; 524 char temp[2]; 525 long extra; 526 527 while (*format != 0) { 528 if (*format != '%') { 529 addchar(*format++); 530 continue; 531 } 532 533 format++; 534 if (*format == '%') { 535 addchar(*format++); 536 continue; 537 } 538 if (*format == 0) { 539 addchar('%'); 540 break; 541 } 542 543 if (*format == '*') { 544 format++; 545 if (pos >= argc) 546 m4errx(1, 547 "Format with too many format specifiers."); 548 width = strtol(argv[pos++], NULL, 10); 549 } else { 550 width = strtol(format, __DECONST(char **,&format), 10); 551 } 552 if (width < 0) { 553 left_padded = 1; 554 width = -width; 555 } else { 556 left_padded = 0; 557 } 558 if (*format == '.') { 559 format++; 560 if (*format == '*') { 561 format++; 562 if (pos >= argc) 563 m4errx(1, 564 "Format with too many format specifiers."); 565 extra = strtol(argv[pos++], NULL, 10); 566 } else { 567 extra = strtol(format, __DECONST(char **, &format), 10); 568 } 569 } else { 570 extra = LONG_MAX; 571 } 572 if (pos >= argc) 573 m4errx(1, "Format with too many format specifiers."); 574 switch(*format) { 575 case 's': 576 thisarg = argv[pos++]; 577 break; 578 case 'c': 579 temp[0] = strtoul(argv[pos++], NULL, 10); 580 temp[1] = 0; 581 thisarg = temp; 582 break; 583 default: 584 m4errx(1, "Unsupported format specification: %s.", 585 argv[2]); 586 } 587 format++; 588 l = strlen(thisarg); 589 if ((long)l > extra) 590 l = extra; 591 if (!left_padded) { 592 while ((long)l < width--) 593 addchar(' '); 594 } 595 addchars(thisarg, l); 596 if (left_padded) { 597 while ((long)l < width--) 598 addchar(' '); 599 } 600 } 601 pbstr(getstring()); 602} 603 604void 605doesyscmd(const char *cmd) 606{ 607 int p[2]; 608 pid_t pid, cpid; 609 char *argv[4]; 610 int cc; 611 int status; 612 613 /* Follow gnu m4 documentation: first flush buffers. */ 614 fflush(NULL); 615 616 argv[0] = __DECONST(char *, "sh"); 617 argv[1] = __DECONST(char *, "-c"); 618 argv[2] = __DECONST(char *, cmd); 619 argv[3] = NULL; 620 621 /* Just set up standard output, share stderr and stdin with m4 */ 622 if (pipe(p) == -1) 623 err(1, "bad pipe"); 624 switch(cpid = fork()) { 625 case -1: 626 err(1, "bad fork"); 627 /* NOTREACHED */ 628 case 0: 629 (void) close(p[0]); 630 (void) dup2(p[1], 1); 631 (void) close(p[1]); 632 execv(_PATH_BSHELL, argv); 633 exit(1); 634 default: 635 /* Read result in two stages, since m4's buffer is 636 * pushback-only. */ 637 (void) close(p[1]); 638 do { 639 char result[BUFSIZE]; 640 cc = read(p[0], result, sizeof result); 641 if (cc > 0) 642 addchars(result, cc); 643 } while (cc > 0 || (cc == -1 && errno == EINTR)); 644 645 (void) close(p[0]); 646 while ((pid = wait(&status)) != cpid && pid >= 0) 647 continue; 648 pbstr(getstring()); 649 } 650} 651 652void 653getdivfile(const char *name) 654{ 655 FILE *f; 656 int c; 657 658 f = fopen(name, "r"); 659 if (!f) 660 return; 661 662 while ((c = getc(f))!= EOF) 663 putc(c, active); 664 (void) fclose(f); 665} 666