gnum4.c revision 218909
195887Sjmallett/* $OpenBSD: gnum4.c,v 1.18 2002/04/26 16:15:16 espie Exp $ */ 290744Sjmallett 390744Sjmallett/* 490744Sjmallett * Copyright (c) 1999 Marc Espie 590744Sjmallett * 690744Sjmallett * Redistribution and use in source and binary forms, with or without 790744Sjmallett * modification, are permitted provided that the following conditions 890744Sjmallett * are met: 990744Sjmallett * 1. Redistributions of source code must retain the above copyright 1090744Sjmallett * notice, this list of conditions and the following disclaimer. 1190744Sjmallett * 2. Redistributions in binary form must reproduce the above copyright 1290744Sjmallett * notice, this list of conditions and the following disclaimer in the 1390744Sjmallett * documentation and/or other materials provided with the distribution. 1490744Sjmallett * 1590744Sjmallett * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1690744Sjmallett * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1790744Sjmallett * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1890744Sjmallett * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 1990744Sjmallett * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2090744Sjmallett * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2190744Sjmallett * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2290744Sjmallett * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2390744Sjmallett * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2490744Sjmallett * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2590744Sjmallett * SUCH DAMAGE. 2690744Sjmallett */ 2790744Sjmallett 2895061Sjmallett#include <sys/cdefs.h> 2995061Sjmallett__FBSDID("$FreeBSD: head/usr.bin/m4/gnum4.c 218909 2011-02-21 09:01:34Z brucec $"); 3095061Sjmallett 31100014Sjmallett/* 3290744Sjmallett * functions needed to support gnu-m4 extensions, including a fake freezing 3390744Sjmallett */ 3490744Sjmallett 3590744Sjmallett#include <sys/param.h> 3690744Sjmallett#include <sys/types.h> 3790744Sjmallett#include <sys/wait.h> 3890744Sjmallett#include <ctype.h> 3990744Sjmallett#include <paths.h> 4090744Sjmallett#include <regex.h> 4190744Sjmallett#include <stddef.h> 4290744Sjmallett#include <stdlib.h> 4390744Sjmallett#include <stdio.h> 4490744Sjmallett#include <string.h> 4590744Sjmallett#include <err.h> 4690744Sjmallett#include <errno.h> 4790744Sjmallett#include <unistd.h> 4890744Sjmallett#include "mdef.h" 4990744Sjmallett#include "stdd.h" 5090744Sjmallett#include "extern.h" 5190744Sjmallett 5290744Sjmallett 5390744Sjmallettint mimic_gnu = 0; 5490744Sjmallett 5590744Sjmallett/* 5690744Sjmallett * Support for include path search 57218909Sbrucec * First search in the current directory. 5890744Sjmallett * If not found, and the path is not absolute, include path kicks in. 5990744Sjmallett * First, -I options, in the order found on the command line. 6090744Sjmallett * Then M4PATH env variable 6190744Sjmallett */ 6290744Sjmallett 6390744Sjmallettstruct path_entry { 6490744Sjmallett char *name; 6590744Sjmallett struct path_entry *next; 6690744Sjmallett} *first, *last; 6790744Sjmallett 6890744Sjmallettstatic struct path_entry *new_path_entry(const char *); 6990744Sjmallettstatic void ensure_m4path(void); 7090744Sjmallettstatic struct input_file *dopath(struct input_file *, const char *); 7190744Sjmallett 7290744Sjmallettstatic struct path_entry * 7395887Sjmallettnew_path_entry(const char *dirname) 7490744Sjmallett{ 7590744Sjmallett struct path_entry *n; 7690744Sjmallett 7790744Sjmallett n = malloc(sizeof(struct path_entry)); 7890744Sjmallett if (!n) 7990744Sjmallett errx(1, "out of memory"); 8090744Sjmallett n->name = strdup(dirname); 8190744Sjmallett if (!n->name) 8290744Sjmallett errx(1, "out of memory"); 8390744Sjmallett n->next = 0; 8490744Sjmallett return n; 8590744Sjmallett} 86100014Sjmallett 87100014Sjmallettvoid 8895887Sjmallettaddtoincludepath(const char *dirname) 8990744Sjmallett{ 9090744Sjmallett struct path_entry *n; 9190744Sjmallett 9290744Sjmallett n = new_path_entry(dirname); 9390744Sjmallett 9490744Sjmallett if (last) { 9590744Sjmallett last->next = n; 9690744Sjmallett last = n; 9790744Sjmallett } 9890744Sjmallett else 9990744Sjmallett last = first = n; 10090744Sjmallett} 10190744Sjmallett 10290744Sjmallettstatic void 10399939Sjmallettensure_m4path(void) 10490744Sjmallett{ 10590744Sjmallett static int envpathdone = 0; 10690744Sjmallett char *envpath; 10790744Sjmallett char *sweep; 10890744Sjmallett char *path; 10990744Sjmallett 11090744Sjmallett if (envpathdone) 11190744Sjmallett return; 11290744Sjmallett envpathdone = TRUE; 11390744Sjmallett envpath = getenv("M4PATH"); 114100014Sjmallett if (!envpath) 11590744Sjmallett return; 11690744Sjmallett /* for portability: getenv result is read-only */ 11790744Sjmallett envpath = strdup(envpath); 11890744Sjmallett if (!envpath) 11990744Sjmallett errx(1, "out of memory"); 120100014Sjmallett for (sweep = envpath; 12190744Sjmallett (path = strsep(&sweep, ":")) != NULL;) 12290744Sjmallett addtoincludepath(path); 12390744Sjmallett free(envpath); 12490744Sjmallett} 12590744Sjmallett 12690744Sjmallettstatic 12790744Sjmallettstruct input_file * 12895887Sjmallettdopath(struct input_file *i, const char *filename) 12990744Sjmallett{ 13090744Sjmallett char path[MAXPATHLEN]; 13190744Sjmallett struct path_entry *pe; 13290744Sjmallett FILE *f; 13390744Sjmallett 13490744Sjmallett for (pe = first; pe; pe = pe->next) { 13590744Sjmallett snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 136172261Skevlo if ((f = fopen(path, "r")) != NULL) { 13790744Sjmallett set_input(i, f, path); 13890744Sjmallett return i; 13990744Sjmallett } 14090744Sjmallett } 14190744Sjmallett return NULL; 14290744Sjmallett} 14390744Sjmallett 14490744Sjmallettstruct input_file * 14595887Sjmallettfopen_trypath(struct input_file *i, const char *filename) 14690744Sjmallett{ 14790744Sjmallett FILE *f; 14890744Sjmallett 14990744Sjmallett f = fopen(filename, "r"); 15090744Sjmallett if (f != NULL) { 15190744Sjmallett set_input(i, f, filename); 15290744Sjmallett return i; 15390744Sjmallett } 15490744Sjmallett if (filename[0] == '/') 15590744Sjmallett return NULL; 15690744Sjmallett 15790744Sjmallett ensure_m4path(); 15890744Sjmallett 15990744Sjmallett return dopath(i, filename); 16090744Sjmallett} 16190744Sjmallett 162100014Sjmallettvoid 16395887Sjmallettdoindir(const char *argv[], int argc) 16490744Sjmallett{ 16590744Sjmallett ndptr p; 16690744Sjmallett 16790744Sjmallett p = lookup(argv[2]); 16890744Sjmallett if (p == NULL) 16990744Sjmallett errx(1, "undefined macro %s", argv[2]); 17090744Sjmallett argv[1] = p->defn; 17190744Sjmallett eval(argv+1, argc-1, p->type); 17290744Sjmallett} 17390744Sjmallett 174100014Sjmallettvoid 17595887Sjmallettdobuiltin(const char *argv[], int argc) 17690744Sjmallett{ 17790744Sjmallett int n; 17890744Sjmallett argv[1] = NULL; 17990744Sjmallett n = builtin_type(argv[2]); 18090744Sjmallett if (n != -1) 18190744Sjmallett eval(argv+1, argc-1, n); 18290744Sjmallett else 18390744Sjmallett errx(1, "unknown builtin %s", argv[2]); 184100014Sjmallett} 18590744Sjmallett 18690744Sjmallett 18790744Sjmallett/* We need some temporary buffer space, as pb pushes BACK and substitution 18890744Sjmallett * proceeds forward... */ 18990744Sjmallettstatic char *buffer; 19090744Sjmallettstatic size_t bufsize = 0; 19190744Sjmallettstatic size_t current = 0; 19290744Sjmallett 19390744Sjmallettstatic void addchars(const char *, size_t); 19495887Sjmallettstatic void addchar(int); 19590744Sjmallettstatic char *twiddle(const char *); 19690744Sjmallettstatic char *getstring(void); 19790744Sjmallettstatic void exit_regerror(int, regex_t *); 19890744Sjmallettstatic void do_subst(const char *, regex_t *, const char *, regmatch_t *); 19990744Sjmallettstatic void do_regexpindex(const char *, regex_t *, regmatch_t *); 20090744Sjmallettstatic void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 20195095Sjmallettstatic void add_sub(size_t, const char *, regex_t *, regmatch_t *); 20290744Sjmallettstatic void add_replace(const char *, regex_t *, const char *, regmatch_t *); 20390744Sjmallett#define addconstantstring(s) addchars((s), sizeof(s)-1) 20490744Sjmallett 205100014Sjmallettstatic void 20695887Sjmallettaddchars(const char *c, size_t n) 20790744Sjmallett{ 20890744Sjmallett if (n == 0) 20990744Sjmallett return; 21090744Sjmallett while (current + n > bufsize) { 21190744Sjmallett if (bufsize == 0) 21290744Sjmallett bufsize = 1024; 21390744Sjmallett else 21490744Sjmallett bufsize *= 2; 21590744Sjmallett buffer = realloc(buffer, bufsize); 21690744Sjmallett if (buffer == NULL) 21790744Sjmallett errx(1, "out of memory"); 21890744Sjmallett } 21990744Sjmallett memcpy(buffer+current, c, n); 22090744Sjmallett current += n; 22190744Sjmallett} 22290744Sjmallett 223100014Sjmallettstatic void 22495887Sjmallettaddchar(int c) 22590744Sjmallett{ 22690744Sjmallett if (current +1 > bufsize) { 22790744Sjmallett if (bufsize == 0) 22890744Sjmallett bufsize = 1024; 22990744Sjmallett else 23090744Sjmallett bufsize *= 2; 23190744Sjmallett buffer = realloc(buffer, bufsize); 23290744Sjmallett if (buffer == NULL) 23390744Sjmallett errx(1, "out of memory"); 23490744Sjmallett } 23590744Sjmallett buffer[current++] = c; 23690744Sjmallett} 23790744Sjmallett 23890744Sjmallettstatic char * 23999939Sjmallettgetstring(void) 24090744Sjmallett{ 24190744Sjmallett addchar('\0'); 24290744Sjmallett current = 0; 24390744Sjmallett return buffer; 24490744Sjmallett} 24590744Sjmallett 24690744Sjmallett 247100014Sjmallettstatic void 24895887Sjmallettexit_regerror(int er, regex_t *re) 24990744Sjmallett{ 25090744Sjmallett size_t errlen; 25190744Sjmallett char *errbuf; 25290744Sjmallett 25390744Sjmallett errlen = regerror(er, re, NULL, 0); 25490744Sjmallett errbuf = xalloc(errlen); 25590744Sjmallett regerror(er, re, errbuf, errlen); 25690744Sjmallett errx(1, "regular expression error: %s", errbuf); 25790744Sjmallett} 25890744Sjmallett 25990744Sjmallettstatic void 26095887Sjmallettadd_sub(size_t n, const char *string, regex_t *re, regmatch_t *pm) 26190744Sjmallett{ 26290744Sjmallett if (n > re->re_nsub) 26395164Sjmallett warnx("No subexpression %zu", n); 26490744Sjmallett /* Subexpressions that did not match are 26590744Sjmallett * not an error. */ 26690744Sjmallett else if (pm[n].rm_so != -1 && 26790744Sjmallett pm[n].rm_eo != -1) { 26890744Sjmallett addchars(string + pm[n].rm_so, 26990744Sjmallett pm[n].rm_eo - pm[n].rm_so); 27090744Sjmallett } 27190744Sjmallett} 27290744Sjmallett 27390744Sjmallett/* Add replacement string to the output buffer, recognizing special 27490744Sjmallett * constructs and replacing them with substrings of the original string. 27590744Sjmallett */ 276100014Sjmallettstatic void 27795887Sjmallettadd_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 27890744Sjmallett{ 27990744Sjmallett const char *p; 28090744Sjmallett 28190744Sjmallett for (p = replace; *p != '\0'; p++) { 28290744Sjmallett if (*p == '&' && !mimic_gnu) { 28390744Sjmallett add_sub(0, string, re, pm); 28490744Sjmallett continue; 28590744Sjmallett } 28690744Sjmallett if (*p == '\\') { 28790744Sjmallett if (p[1] == '\\') { 28890744Sjmallett addchar(p[1]); 28990744Sjmallett p++; 29090744Sjmallett continue; 29190744Sjmallett } 29290744Sjmallett if (p[1] == '&') { 29390744Sjmallett if (mimic_gnu) 29490744Sjmallett add_sub(0, string, re, pm); 29590744Sjmallett else 29690744Sjmallett addchar(p[1]); 29790744Sjmallett p++; 29890744Sjmallett continue; 29990744Sjmallett } 30090744Sjmallett if (isdigit(p[1])) { 30190744Sjmallett add_sub(*(++p) - '0', string, re, pm); 30290744Sjmallett continue; 30390744Sjmallett } 30490744Sjmallett } 30590744Sjmallett addchar(*p); 30690744Sjmallett } 30790744Sjmallett} 30890744Sjmallett 309100014Sjmallettstatic void 31095887Sjmallettdo_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 31190744Sjmallett{ 31290744Sjmallett int error; 31390744Sjmallett int flags = 0; 31490744Sjmallett const char *last_match = NULL; 31590744Sjmallett 31690744Sjmallett while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 31790744Sjmallett if (pm[0].rm_eo != 0) { 31890744Sjmallett if (string[pm[0].rm_eo-1] == '\n') 31990744Sjmallett flags = 0; 32090744Sjmallett else 32190744Sjmallett flags = REG_NOTBOL; 32290744Sjmallett } 32390744Sjmallett 324100014Sjmallett /* NULL length matches are special... We use the `vi-mode' 32590744Sjmallett * rule: don't allow a NULL-match at the last match 326100014Sjmallett * position. 32790744Sjmallett */ 328100014Sjmallett if (pm[0].rm_so == pm[0].rm_eo && 32990744Sjmallett string + pm[0].rm_so == last_match) { 33090744Sjmallett if (*string == '\0') 33190744Sjmallett return; 33290744Sjmallett addchar(*string); 33390744Sjmallett if (*string++ == '\n') 33490744Sjmallett flags = 0; 33590744Sjmallett else 33690744Sjmallett flags = REG_NOTBOL; 33790744Sjmallett continue; 33890744Sjmallett } 33990744Sjmallett last_match = string + pm[0].rm_so; 34090744Sjmallett addchars(string, pm[0].rm_so); 34190744Sjmallett add_replace(string, re, replace, pm); 34290744Sjmallett string += pm[0].rm_eo; 34390744Sjmallett } 34490744Sjmallett if (error != REG_NOMATCH) 34590744Sjmallett exit_regerror(error, re); 34690744Sjmallett pbstr(string); 34790744Sjmallett} 34890744Sjmallett 349100014Sjmallettstatic void 35095887Sjmallettdo_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 35190744Sjmallett{ 35290744Sjmallett int error; 35390744Sjmallett 35490744Sjmallett switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 355100014Sjmallett case 0: 35690744Sjmallett add_replace(string, re, replace, pm); 35790744Sjmallett pbstr(getstring()); 35890744Sjmallett break; 35990744Sjmallett case REG_NOMATCH: 36090744Sjmallett break; 36190744Sjmallett default: 36290744Sjmallett exit_regerror(error, re); 36390744Sjmallett } 36490744Sjmallett} 36590744Sjmallett 366100014Sjmallettstatic void 36795887Sjmallettdo_regexpindex(const char *string, regex_t *re, regmatch_t *pm) 36890744Sjmallett{ 36990744Sjmallett int error; 37090744Sjmallett 37190744Sjmallett switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 37290744Sjmallett case 0: 37390744Sjmallett pbunsigned(pm[0].rm_so); 37490744Sjmallett break; 37590744Sjmallett case REG_NOMATCH: 37690744Sjmallett pbnum(-1); 37790744Sjmallett break; 37890744Sjmallett default: 37990744Sjmallett exit_regerror(error, re); 38090744Sjmallett } 38190744Sjmallett} 38290744Sjmallett 38390744Sjmallett/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 38490744Sjmallett * says. So we twiddle with the regexp before passing it to regcomp. 38590744Sjmallett */ 38690744Sjmallettstatic char * 38795887Sjmalletttwiddle(const char *p) 38890744Sjmallett{ 38990744Sjmallett /* This could use strcspn for speed... */ 39090744Sjmallett while (*p != '\0') { 39190744Sjmallett if (*p == '\\') { 39290744Sjmallett switch(p[1]) { 39390744Sjmallett case '(': 39490744Sjmallett case ')': 39590744Sjmallett case '|': 39690744Sjmallett addchar(p[1]); 39790744Sjmallett break; 39890744Sjmallett case 'w': 39990744Sjmallett addconstantstring("[_a-zA-Z0-9]"); 40090744Sjmallett break; 40190744Sjmallett case 'W': 40290744Sjmallett addconstantstring("[^_a-zA-Z0-9]"); 40390744Sjmallett break; 40490744Sjmallett case '<': 40590744Sjmallett addconstantstring("[[:<:]]"); 40690744Sjmallett break; 40790744Sjmallett case '>': 40890744Sjmallett addconstantstring("[[:>:]]"); 40990744Sjmallett break; 41090744Sjmallett default: 41190744Sjmallett addchars(p, 2); 41290744Sjmallett break; 41390744Sjmallett } 41490744Sjmallett p+=2; 41590744Sjmallett continue; 41690744Sjmallett } 41790744Sjmallett if (*p == '(' || *p == ')' || *p == '|') 41890744Sjmallett addchar('\\'); 41990744Sjmallett 42090744Sjmallett addchar(*p); 42190744Sjmallett p++; 42290744Sjmallett } 42390744Sjmallett return getstring(); 42490744Sjmallett} 42590744Sjmallett 42690744Sjmallett/* patsubst(string, regexp, opt replacement) */ 42790744Sjmallett/* argv[2]: string 42890744Sjmallett * argv[3]: regexp 42990744Sjmallett * argv[4]: opt rep 43090744Sjmallett */ 43190744Sjmallettvoid 43295887Sjmallettdopatsubst(const char *argv[], int argc) 43390744Sjmallett{ 43490744Sjmallett int error; 43590744Sjmallett regex_t re; 43690744Sjmallett regmatch_t *pmatch; 43790744Sjmallett 43890744Sjmallett if (argc <= 3) { 43990744Sjmallett warnx("Too few arguments to patsubst"); 44090744Sjmallett return; 44190744Sjmallett } 442100014Sjmallett error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 44390744Sjmallett REG_NEWLINE | REG_EXTENDED); 44490744Sjmallett if (error != 0) 44590744Sjmallett exit_regerror(error, &re); 446100014Sjmallett 44790744Sjmallett pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 448100014Sjmallett do_subst(argv[2], &re, 44990744Sjmallett argc != 4 && argv[4] != NULL ? argv[4] : "", pmatch); 45090744Sjmallett pbstr(getstring()); 45190744Sjmallett free(pmatch); 45290744Sjmallett regfree(&re); 45390744Sjmallett} 45490744Sjmallett 45590744Sjmallettvoid 45695887Sjmallettdoregexp(const char *argv[], int argc) 45790744Sjmallett{ 45890744Sjmallett int error; 45990744Sjmallett regex_t re; 46090744Sjmallett regmatch_t *pmatch; 46190744Sjmallett 46290744Sjmallett if (argc <= 3) { 46390744Sjmallett warnx("Too few arguments to regexp"); 46490744Sjmallett return; 46590744Sjmallett } 466100014Sjmallett error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 46790744Sjmallett REG_EXTENDED); 46890744Sjmallett if (error != 0) 46990744Sjmallett exit_regerror(error, &re); 470100014Sjmallett 47190744Sjmallett pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 47290744Sjmallett if (argv[4] == NULL || argc == 4) 47390744Sjmallett do_regexpindex(argv[2], &re, pmatch); 47490744Sjmallett else 47590744Sjmallett do_regexp(argv[2], &re, argv[4], pmatch); 47690744Sjmallett free(pmatch); 47790744Sjmallett regfree(&re); 47890744Sjmallett} 47990744Sjmallett 48090744Sjmallettvoid 48195887Sjmallettdoesyscmd(const char *cmd) 48290744Sjmallett{ 48390744Sjmallett int p[2]; 48490744Sjmallett pid_t pid, cpid; 48590744Sjmallett int cc; 48690744Sjmallett int status; 48790744Sjmallett 48890744Sjmallett /* Follow gnu m4 documentation: first flush buffers. */ 48990744Sjmallett fflush(NULL); 49090744Sjmallett 49190744Sjmallett /* Just set up standard output, share stderr and stdin with m4 */ 49290744Sjmallett if (pipe(p) == -1) 49390744Sjmallett err(1, "bad pipe"); 49490744Sjmallett switch(cpid = fork()) { 49590744Sjmallett case -1: 49690744Sjmallett err(1, "bad fork"); 49790744Sjmallett /* NOTREACHED */ 49890744Sjmallett case 0: 49990744Sjmallett (void) close(p[0]); 50090744Sjmallett (void) dup2(p[1], 1); 50190744Sjmallett (void) close(p[1]); 502129392Sstefanf execl(_PATH_BSHELL, "sh", "-c", cmd, (char *)NULL); 50390744Sjmallett exit(1); 50490744Sjmallett default: 50590744Sjmallett /* Read result in two stages, since m4's buffer is 50690744Sjmallett * pushback-only. */ 50790744Sjmallett (void) close(p[1]); 50890744Sjmallett do { 50990744Sjmallett char result[BUFSIZE]; 51090744Sjmallett cc = read(p[0], result, sizeof result); 51190744Sjmallett if (cc > 0) 51290744Sjmallett addchars(result, cc); 51390744Sjmallett } while (cc > 0 || (cc == -1 && errno == EINTR)); 51490744Sjmallett 51590744Sjmallett (void) close(p[0]); 51690744Sjmallett while ((pid = wait(&status)) != cpid && pid >= 0) 51790744Sjmallett continue; 51890744Sjmallett pbstr(getstring()); 51990744Sjmallett } 52090744Sjmallett} 521