1/* $NetBSD: gnum4.c,v 1.13 2023/05/24 22:14:31 christos Exp $ */
2/* $OpenBSD: gnum4.c,v 1.39 2008/08/21 21:01:04 espie Exp $ */
3
4/*
5 * Copyright (c) 1999 Marc Espie
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * functions needed to support gnu-m4 extensions, including a fake freezing
31 */
32#if HAVE_NBTOOL_CONFIG_H
33#include "nbtool_config.h"
34#endif
35#include <sys/cdefs.h>
36__RCSID("$NetBSD: gnum4.c,v 1.13 2023/05/24 22:14:31 christos Exp $");
37
38#include <sys/param.h>
39#include <sys/types.h>
40#include <sys/wait.h>
41#include <ctype.h>
42#include <err.h>
43#include <paths.h>
44#include <regex.h>
45#include <stddef.h>
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <errno.h>
50#include <unistd.h>
51#include "mdef.h"
52#include "stdd.h"
53#include "extern.h"
54
55
56int mimic_gnu = 0;
57#ifndef SIZE_T_MAX
58#define SIZE_T_MAX (size_t)~0ull
59#endif
60
61/*
62 * Support for include path search
63 * First search in the current directory.
64 * If not found, and the path is not absolute, include path kicks in.
65 * First, -I options, in the order found on the command line.
66 * Then M4PATH env variable
67 */
68
69struct path_entry {
70	char *name;
71	struct path_entry *next;
72} *first, *last;
73
74static struct path_entry *new_path_entry(const char *);
75static void ensure_m4path(void);
76static struct input_file *dopath(struct input_file *, const char *);
77
78static struct path_entry *
79new_path_entry(const char *dirname)
80{
81	struct path_entry *n;
82
83	n = malloc(sizeof(struct path_entry));
84	if (!n)
85		errx(1, "out of memory");
86	n->name = strdup(dirname);
87	if (!n->name)
88		errx(1, "out of memory");
89	n->next = 0;
90	return n;
91}
92
93void
94addtoincludepath(const char *dirname)
95{
96	struct path_entry *n;
97
98	n = new_path_entry(dirname);
99
100	if (last) {
101		last->next = n;
102		last = n;
103	}
104	else
105		last = first = n;
106}
107
108static void
109ensure_m4path(void)
110{
111	static int envpathdone = 0;
112	char *envpath;
113	char *sweep;
114	char *path;
115
116	if (envpathdone)
117		return;
118	envpathdone = TRUE;
119	envpath = getenv("M4PATH");
120	if (!envpath)
121		return;
122	/* for portability: getenv result is read-only */
123	envpath = strdup(envpath);
124	if (!envpath)
125		errx(1, "out of memory");
126	for (sweep = envpath;
127	    (path = strsep(&sweep, ":")) != NULL;)
128	    addtoincludepath(path);
129	free(envpath);
130}
131
132static
133struct input_file *
134dopath(struct input_file *i, const char *filename)
135{
136	char path[MAXPATHLEN];
137	struct path_entry *pe;
138	FILE *f;
139
140	for (pe = first; pe; pe = pe->next) {
141		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
142		if ((f = fopen(path, "r")) != 0) {
143			set_input(i, f, path);
144			return i;
145		}
146	}
147	return NULL;
148}
149
150struct input_file *
151fopen_trypath(struct input_file *i, const char *filename)
152{
153	FILE *f;
154
155	f = fopen(filename, "r");
156	if (f != NULL) {
157		set_input(i, f, filename);
158		return i;
159	}
160	if (filename[0] == '/')
161		return NULL;
162
163	ensure_m4path();
164
165	return dopath(i, filename);
166}
167
168void
169doindir(const char *argv[], int argc)
170{
171	ndptr n;
172	struct macro_definition *p;
173
174	n = lookup(argv[2]);
175	if (n == NULL || (p = macro_getdef(n)) == NULL)
176		m4errx(1, "indir: undefined macro %s.", argv[2]);
177	argv[1] = p->defn;
178
179	eval(argv+1, argc-1, p->type, is_traced(n));
180}
181
182void
183dobuiltin(const char *argv[], int argc)
184{
185	ndptr p;
186
187	argv[1] = NULL;
188	p = macro_getbuiltin(argv[2]);
189	if (p != NULL)
190		eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
191	else
192		m4errx(1, "unknown builtin %s.", argv[2]);
193}
194
195
196/* We need some temporary buffer space, as pb pushes BACK and substitution
197 * proceeds forward... */
198static char *buffer;
199static size_t bufsize = 0;
200static size_t current = 0;
201
202static void addchars(const char *, size_t);
203static void addchar(int);
204static char *twiddle(const char *);
205static char *getstring(void);
206static void exit_regerror(int, const char *, regex_t *) __dead;
207static void do_subst(const char *, const char *, regex_t *, const char *,
208    regmatch_t *);
209static void do_regexpindex(const char *, const char *, regex_t *, regmatch_t *);
210static void do_regexp(const char *, const char *, regex_t *, const char *, regmatch_t *);
211static void add_sub(size_t, const char *, regex_t *, regmatch_t *);
212static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
213#define addconstantstring(s) addchars((s), sizeof(s)-1)
214
215static void
216addchars(const char *c, size_t n)
217{
218	if (n == 0)
219		return;
220	while (current + n > bufsize) {
221		if (bufsize == 0)
222			bufsize = 1024;
223		else
224			bufsize *= 2;
225		buffer = xrealloc(buffer, bufsize, NULL);
226	}
227	memcpy(buffer+current, c, n);
228	current += n;
229}
230
231static void
232addchar(int c)
233{
234	if (current +1 > bufsize) {
235		if (bufsize == 0)
236			bufsize = 1024;
237		else
238			bufsize *= 2;
239		buffer = xrealloc(buffer, bufsize, NULL);
240	}
241	buffer[current++] = c;
242}
243
244static char *
245getstring(void)
246{
247	addchar('\0');
248	current = 0;
249	return buffer;
250}
251
252
253static void
254exit_regerror(int er, const char *pat, regex_t *re)
255{
256	size_t 	errlen;
257	char 	*errbuf;
258
259	errlen = regerror(er, re, NULL, 0);
260	errbuf = xalloc(errlen,
261	    "malloc in regerror: %lu", (unsigned long)errlen);
262	regerror(er, re, errbuf, errlen);
263	m4errx(1, "regular expression error: %s for: `%s'", errbuf, pat);
264}
265
266static void
267add_sub(size_t n, const char *string, regex_t *re, regmatch_t *pm)
268{
269	if (n > re->re_nsub) {
270		if (!quiet)
271			warnx("No subexpression %zu", n);
272		if (fatal_warnings)
273			exit(EXIT_FAILURE);
274	}
275	/* Subexpressions that did not match are
276	 * not an error.  */
277	else if (pm[n].rm_so != -1 &&
278	    pm[n].rm_eo != -1) {
279		addchars(string + pm[n].rm_so,
280			pm[n].rm_eo - pm[n].rm_so);
281	}
282}
283
284/* Add replacement string to the output buffer, recognizing special
285 * constructs and replacing them with substrings of the original string.
286 */
287static void
288add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
289{
290	const char *p;
291
292	for (p = replace; *p != '\0'; p++) {
293		if (*p == '&' && !mimic_gnu) {
294			add_sub(0, string, re, pm);
295			continue;
296		}
297		if (*p == '\\') {
298			if (p[1] == '\\') {
299				addchar(p[1]);
300				p++;
301				continue;
302			}
303			if (p[1] == '&') {
304				if (mimic_gnu)
305					add_sub(0, string, re, pm);
306				else
307					addchar(p[1]);
308				p++;
309				continue;
310			}
311			if (isdigit((unsigned char)p[1])) {
312				add_sub(*(++p) - '0', string, re, pm);
313				continue;
314			}
315		}
316	    	addchar(*p);
317	}
318}
319
320static void
321do_subst(const char *pat, const char *string, regex_t *re, const char *replace,
322    regmatch_t *pm)
323{
324	int error;
325	int flags = 0;
326	const char *last_match = NULL;
327
328	while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
329		if (pm[0].rm_eo != 0) {
330			if (string[pm[0].rm_eo-1] == '\n')
331				flags = 0;
332			else
333				flags = REG_NOTBOL;
334		}
335
336		/* NULL length matches are special... We use the `vi-mode'
337		 * rule: don't allow a NULL-match at the last match
338		 * position.
339		 */
340		if (pm[0].rm_so == pm[0].rm_eo &&
341		    string + pm[0].rm_so == last_match) {
342			if (*string == '\0')
343				return;
344			addchar(*string);
345			if (*string++ == '\n')
346				flags = 0;
347			else
348				flags = REG_NOTBOL;
349			continue;
350		}
351		last_match = string + pm[0].rm_so;
352		addchars(string, pm[0].rm_so);
353		add_replace(string, re, replace, pm);
354		string += pm[0].rm_eo;
355		buffer[current] = '\0';
356	}
357	while (*string)
358		addchar(*string++);
359	if (error != REG_NOMATCH)
360		exit_regerror(error, pat, re);
361	pbstr(string);
362}
363
364static void
365do_regexp(const char *pat, const char *string, regex_t *re, const char *replace,
366    regmatch_t *pm)
367{
368	int error;
369
370	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
371	case 0:
372		add_replace(string, re, replace, pm);
373		pbstr(getstring());
374		break;
375	case REG_NOMATCH:
376		break;
377	default:
378		exit_regerror(error, pat, re);
379	}
380}
381
382static void
383do_regexpindex(const char *pat, const char *string, regex_t *re, regmatch_t *pm)
384{
385	int error;
386
387	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
388	case 0:
389		pbunsigned(pm[0].rm_so);
390		break;
391	case REG_NOMATCH:
392		pbnum(-1);
393		break;
394	default:
395		exit_regerror(error, pat, re);
396	}
397}
398
399/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
400 * says. So we twiddle with the regexp before passing it to regcomp.
401 */
402static char *
403twiddle(const char *p)
404{
405	/* + at start of regexp is a normal character for Gnu m4 */
406	if (*p == '^') {
407		addchar(*p);
408		p++;
409	}
410	if (*p == '+') {
411		addchar('\\');
412	}
413	/* This could use strcspn for speed... */
414	while (*p != '\0') {
415		if (*p == '\\') {
416			switch(p[1]) {
417			case '(':
418			case ')':
419			case '|':
420				addchar(p[1]);
421				break;
422			case 'w':
423				addconstantstring("[_a-zA-Z0-9]");
424				break;
425			case 'W':
426				addconstantstring("[^_a-zA-Z0-9]");
427				break;
428			case '<':
429				addconstantstring("[[:<:]]");
430				break;
431			case '>':
432				addconstantstring("[[:>:]]");
433				break;
434			default:
435				addchars(p, 2);
436				break;
437			}
438			p+=2;
439			continue;
440		}
441		if (strchr("()|{}", *p) != NULL)
442			addchar('\\');
443
444		addchar(*p);
445		p++;
446	}
447	return getstring();
448}
449
450static int
451checkempty(const char *argv[], int argc)
452{
453	const char *s;
454	size_t len;
455
456	if (argc != 3 && argv[3][0] != '\0')
457		return 0;
458
459	if (argc == 3) {
460		if (!quiet)
461			warnx("Too few arguments to patsubst");
462		if (fatal_warnings)
463			exit(EXIT_FAILURE);
464	}
465
466	if (argv[4] && argc > 4)
467		len = strlen(argv[4]);
468	else
469		len = 0;
470	for (s = argv[2]; *s != '\0'; s++) {
471		addchars(argv[4], len);
472		addchar(*s);
473	}
474	return 1;
475}
476
477/* patsubst(string, regexp, opt replacement) */
478/* argv[2]: string
479 * argv[3]: regexp
480 * argv[4]: opt rep
481 */
482void
483dopatsubst(const char *argv[], int argc)
484{
485	if (argc < 3) {
486		if (!quiet)
487			warnx("Too few arguments to patsubst");
488		if (fatal_warnings)
489			exit(EXIT_FAILURE);
490		return;
491	}
492	/* special case: empty regexp */
493	if (!checkempty(argv, argc)) {
494
495		const char *pat;
496		int error;
497		regex_t re;
498		regmatch_t *pmatch;
499		int mode = REG_EXTENDED;
500		size_t l = strlen(argv[3]);
501
502		if (!mimic_gnu ||
503		    (argv[3][0] == '^') ||
504		    (l > 0 && argv[3][l-1] == '$'))
505			mode |= REG_NEWLINE;
506
507		pat = mimic_gnu ? twiddle(argv[3]) : argv[3];
508		error = regcomp(&re, pat, mode);
509		if (error != 0)
510			exit_regerror(error, pat, &re);
511
512		pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL);
513		do_subst(pat, argv[2], &re,
514		    argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
515		free(pmatch);
516		regfree(&re);
517	}
518	pbstr(getstring());
519}
520
521void
522doregexp(const char *argv[], int argc)
523{
524	int error;
525	regex_t re;
526	regmatch_t *pmatch;
527	const char *pat;
528
529	if (argc < 3) {
530		if (!quiet)
531			warnx("Too few arguments to regexp");
532		if (fatal_warnings)
533			exit(EXIT_FAILURE);
534		return;
535	}
536	if (checkempty(argv, argc)) {
537		return;
538	}
539
540	pat = mimic_gnu ? twiddle(argv[3]) : argv[3];
541	error = regcomp(&re, pat, REG_EXTENDED);
542	if (error != 0)
543		exit_regerror(error, pat, &re);
544
545	pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL);
546	if (argv[4] == NULL || argc == 4)
547		do_regexpindex(pat, argv[2], &re, pmatch);
548	else
549		do_regexp(pat, argv[2], &re, argv[4], pmatch);
550	free(pmatch);
551	regfree(&re);
552}
553
554void
555doformat(const char *argv[], int argc)
556{
557	const char *format = argv[2];
558	int pos = 3;
559	int left_padded;
560	long width;
561	size_t l;
562	const char *thisarg;
563	char temp[2];
564	size_t extra;
565
566	while (*format != 0) {
567		if (*format != '%') {
568			addchar(*format++);
569			continue;
570		}
571
572		format++;
573		if (*format == '%') {
574			addchar(*format++);
575			continue;
576		}
577		if (*format == 0) {
578			addchar('%');
579			break;
580		}
581
582		if (*format == '*') {
583			format++;
584			if (pos >= argc)
585				m4errx(1,
586				    "Format with too many format specifiers.");
587			width = strtol(argv[pos++], NULL, 10);
588		} else {
589			char *eformat;
590			width = strtol(format, &eformat, 10);
591			format = eformat;
592		}
593		if (width < 0) {
594			left_padded = 1;
595			width = -width;
596		} else {
597			left_padded = 0;
598		}
599		if (*format == '.') {
600			format++;
601			if (*format == '*') {
602				format++;
603				if (pos >= argc)
604					m4errx(1,
605					    "Format with too many format specifiers.");
606				extra = strtol(argv[pos++], NULL, 10);
607			} else {
608				char *eformat;
609				extra = strtol(format, &eformat, 10);
610				format = eformat;
611			}
612		} else {
613			extra = SIZE_T_MAX;
614		}
615		if (pos >= argc)
616			m4errx(1, "Format with too many format specifiers.");
617		switch(*format) {
618		case 's':
619			thisarg = argv[pos++];
620			break;
621		case 'c':
622			temp[0] = strtoul(argv[pos++], NULL, 10);
623			temp[1] = 0;
624			thisarg = temp;
625			break;
626		default:
627			m4errx(1, "Unsupported format specification: %s.",
628			    argv[2]);
629		}
630		format++;
631		l = strlen(thisarg);
632		if (l > extra)
633			l = extra;
634		if (!left_padded) {
635			while (l < (size_t)width--)
636				addchar(' ');
637		}
638		addchars(thisarg, l);
639		if (left_padded) {
640			while (l < (size_t)width--)
641				addchar(' ');
642		}
643	}
644	pbstr(getstring());
645}
646
647void
648doesyscmd(const char *cmd)
649{
650	int p[2];
651	pid_t pid, cpid;
652	const char *argv[4];
653	int cc;
654	int status;
655
656	/* Follow gnu m4 documentation: first flush buffers. */
657	fflush(NULL);
658
659	argv[0] = "sh";
660	argv[1] = "-c";
661	argv[2] = cmd;
662	argv[3] = NULL;
663
664	/* Just set up standard output, share stderr and stdin with m4 */
665	if (pipe(p) == -1)
666		err(1, "bad pipe");
667	switch(cpid = fork()) {
668	case -1:
669		err(1, "bad fork");
670		/* NOTREACHED */
671	case 0:
672		(void) close(p[0]);
673		(void) dup2(p[1], 1);
674		(void) close(p[1]);
675		execv(_PATH_BSHELL, __UNCONST(argv));
676		exit(1);
677	default:
678		/* Read result in two stages, since m4's buffer is
679		 * pushback-only. */
680		(void) close(p[1]);
681		do {
682			char result[BUFSIZE];
683			cc = read(p[0], result, sizeof result);
684			if (cc > 0)
685				addchars(result, cc);
686		} while (cc > 0 || (cc == -1 && errno == EINTR));
687
688		(void) close(p[0]);
689		while ((pid = wait(&status)) != cpid && pid >= 0)
690			continue;
691		pbstr(getstring());
692	}
693}
694
695void
696getdivfile(const char *name)
697{
698	FILE *f;
699	int c;
700
701	f = fopen(name, "r");
702	if (!f)
703		return;
704
705	while ((c = getc(f))!= EOF)
706		putc(c, active);
707	(void) fclose(f);
708}
709
710#ifdef REAL_FREEZE
711void
712freeze_state(const char *fname)
713{
714	FILE *f;
715
716	if ((f = fopen(fname, "wb")) == NULL)
717		m4errx(EXIT_FAILURE, "Can't open output freeze file `%s' (%s)",
718		    fname, strerror(errno));
719	fprintf(f, "# This is a frozen state file generated by %s\nV1\n",
720	    getprogname());
721	fprintf(f, "Q%zu,%zu\n%s%s\n", strlen(lquote), strlen(rquote),
722	    lquote, rquote);
723	fprintf(f, "C%zu,%zu\n%s%s\n", strlen(scommt), strlen(ecommt),
724	    scommt, ecommt);
725	dump_state(f);
726	/* XXX: diversions? */
727	fprintf(f, "D-1,0\n");
728	fprintf(f, "# End of frozen state file\n");
729	fclose(f);
730}
731
732void
733thaw_state(const char *fname)
734{
735	char *name = NULL;
736	size_t nl, namelen = 0;
737	char *defn = NULL;
738	size_t dl, defnlen = 0;
739	size_t lineno = 0;
740	char line[1024], *ptr, type;
741	FILE *f;
742
743	if ((f = fopen(fname, "rb")) == NULL)
744		m4errx(EXIT_FAILURE, "Can't open frozen file `%s' (%s)",
745		    fname, strerror(errno));
746
747#define GET() if (fgets(line, (int)sizeof(line), f) == NULL) goto out
748#define GETSTR(s, l) if (fread(s, 1, l, f) != l) goto out; else s[l] = '\0'
749
750	GET();	/* comment */
751	GET();	/* version */
752	if ((ptr = strrchr(line, '\n')) != NULL)
753		*ptr = '\0';
754	if (strcmp(line, "V1") != 0)
755		m4errx(EXIT_FAILURE, "Bad frozen version `%s'", line);
756
757	for (;;) {
758		GET();
759		lineno++;
760		switch (*line) {
761		case '\n':
762			continue;
763		case '#':
764			free(name);
765			free(defn);
766			fclose(f);
767			return;
768		default:
769			if (sscanf(line, "%c%zu,%zu\n", &type, &nl, &dl) != 3)
770				m4errx(EXIT_FAILURE, "%s, %zu: Bad line `%s'",
771				    fname, lineno, line);
772			break;
773		}
774
775		switch (type) {
776		case 'Q':
777			if (nl >= sizeof(lquote) || dl >= sizeof(rquote))
778				m4errx(EXIT_FAILURE, "%s, %zu: Quote too long",
779				    fname, lineno);
780			GETSTR(lquote, nl);
781			GETSTR(rquote, dl);
782			break;
783
784		case 'C':
785			if (nl >= sizeof(scommt) || dl >= sizeof(ecommt))
786				m4errx(EXIT_FAILURE, "%s, %zu: Comment too long",
787				    fname, lineno);
788			GETSTR(scommt, nl);
789			GETSTR(ecommt, dl);
790			break;
791
792		case 'T':
793		case 'F':
794			if (nl >= namelen)
795				name = xrealloc(name, namelen = nl + 1,
796					"name grow");
797			if (dl >= defnlen)
798				defn = xrealloc(defn, defnlen = dl + 1,
799					"defn grow");
800			GETSTR(name, nl);
801			GETSTR(defn, dl);
802			macro_pushdef(name, defn);
803			break;
804
805		case 'D':
806			/* XXX: Not implemented */
807			break;
808
809		default:
810			m4errx(EXIT_FAILURE, "%s, %zu: Unknown type %c",
811			    fname, lineno,type);
812		}
813	}
814out:
815	m4errx(EXIT_FAILURE, "Unexpected end of file in `%s'", fname);
816}
817#endif
818