main.c revision 133858
1/*	$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $	*/
2/*	$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Ozan Yigit at York University.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40#ifndef lint
41static char copyright[] =
42"@(#) Copyright (c) 1989, 1993\n\
43	The Regents of the University of California.  All rights reserved.\n";
44#endif /* not lint */
45
46#ifndef lint
47#if 0
48static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
49#else
50#if 0
51static char rcsid[] = "$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $";
52#endif
53#endif
54#endif /* not lint */
55
56#include <sys/cdefs.h>
57__FBSDID("$FreeBSD: head/usr.bin/m4/main.c 133858 2004-08-16 14:18:22Z tjr $");
58
59/*
60 * main.c
61 * Facility: m4 macro processor
62 * by: oz
63 */
64
65#include <sys/types.h>
66#include <assert.h>
67#include <signal.h>
68#include <errno.h>
69#include <unistd.h>
70#include <stdio.h>
71#include <ctype.h>
72#include <string.h>
73#include <stddef.h>
74#include <stdlib.h>
75#include <err.h>
76#include <locale.h>
77#include "mdef.h"
78#include "stdd.h"
79#include "extern.h"
80#include "pathnames.h"
81
82ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
83stae *mstack;		 	/* stack of m4 machine         */
84char *sstack;		 	/* shadow stack, for string space extension */
85static size_t STACKMAX;		/* current maximum size of stack */
86int sp; 			/* current m4  stack pointer   */
87int fp; 			/* m4 call frame pointer       */
88struct input_file infile[MAXINP];/* input file stack (0=stdin)  */
89char *inname[MAXINP];		/* names of these input files */
90int inlineno[MAXINP];		/* current number in each input file */
91FILE **outfile;			/* diversion array(0=bitbucket)*/
92int maxout;
93FILE *active;			/* active output file pointer  */
94int ilevel = 0; 		/* input file stack pointer    */
95int oindex = 0; 		/* diversion index..	       */
96char null[] = "";		/* as it says.. just a null..  */
97const char *m4wraps = "";       /* m4wrap string default..     */
98char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
99char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
100char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
101char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
102int synccpp;			/* Line synchronisation for C preprocessor */
103
104struct keyblk keywrds[] = {	/* m4 keywords to be installed */
105	{ "include",      INCLTYPE },
106	{ "sinclude",     SINCTYPE },
107	{ "define",       DEFITYPE },
108	{ "defn",         DEFNTYPE },
109	{ "divert",       DIVRTYPE | NOARGS },
110	{ "expr",         EXPRTYPE },
111	{ "eval",         EXPRTYPE },
112	{ "substr",       SUBSTYPE },
113	{ "ifelse",       IFELTYPE },
114	{ "ifdef",        IFDFTYPE },
115	{ "len",          LENGTYPE },
116	{ "incr",         INCRTYPE },
117	{ "decr",         DECRTYPE },
118	{ "dnl",          DNLNTYPE | NOARGS },
119	{ "changequote",  CHNQTYPE | NOARGS },
120	{ "changecom",    CHNCTYPE | NOARGS },
121	{ "index",        INDXTYPE },
122#ifdef EXTENDED
123	{ "paste",        PASTTYPE },
124	{ "spaste",       SPASTYPE },
125    	/* Newer extensions, needed to handle gnu-m4 scripts */
126	{ "indir",        INDIRTYPE},
127	{ "builtin",      BUILTINTYPE},
128	{ "patsubst",	  PATSTYPE},
129	{ "regexp",	  REGEXPTYPE},
130	{ "esyscmd",	  ESYSCMDTYPE},
131	{ "__file__",	  FILENAMETYPE | NOARGS},
132	{ "__line__",	  LINETYPE | NOARGS},
133#endif
134	{ "popdef",       POPDTYPE },
135	{ "pushdef",      PUSDTYPE },
136	{ "dumpdef",      DUMPTYPE | NOARGS },
137	{ "shift",        SHIFTYPE | NOARGS },
138	{ "translit",     TRNLTYPE },
139	{ "undefine",     UNDFTYPE },
140	{ "undivert",     UNDVTYPE | NOARGS },
141	{ "divnum",       DIVNTYPE | NOARGS },
142	{ "maketemp",     MKTMTYPE },
143	{ "errprint",     ERRPTYPE | NOARGS },
144	{ "m4wrap",       M4WRTYPE | NOARGS },
145	{ "m4exit",       EXITTYPE | NOARGS },
146	{ "syscmd",       SYSCTYPE },
147	{ "sysval",       SYSVTYPE | NOARGS },
148	{ "traceon",	  TRACEONTYPE | NOARGS },
149	{ "traceoff",	  TRACEOFFTYPE | NOARGS },
150
151#if defined(unix) || defined(__unix__)
152	{ "unix",         SELFTYPE | NOARGS },
153#else
154#ifdef vms
155	{ "vms",          SELFTYPE | NOARGS },
156#endif
157#endif
158};
159
160#define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
161
162#define MAXRECORD 50
163static struct position {
164	char *name;
165	unsigned long line;
166} quotes[MAXRECORD], paren[MAXRECORD];
167
168static void record(struct position *, int);
169static void dump_stack(struct position *, int);
170
171static void macro(void);
172static void initkwds(void);
173static ndptr inspect(int, char *);
174static int do_look_ahead(int, const char *);
175
176static void enlarge_stack(void);
177
178int
179main(int argc, char *argv[])
180{
181	int c;
182	int n;
183	int rval;
184	char *p;
185
186	setlocale(LC_ALL, "");
187
188	traceout = stderr;
189
190	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
191		signal(SIGINT, onintr);
192
193	initkwds();
194	initspaces();
195	STACKMAX = INITSTACKMAX;
196
197	mstack = (stae *)xalloc(sizeof(stae) * STACKMAX);
198	sstack = (char *)xalloc(STACKMAX);
199
200	maxout = 0;
201	outfile = NULL;
202	resizedivs(MAXOUT);
203
204	while ((c = getopt(argc, argv, "gst:d:D:U:o:I:")) != -1)
205		switch(c) {
206		case 'D':               /* define something..*/
207			for (p = optarg; *p; p++)
208				if (*p == '=')
209					break;
210			if (p == optarg)
211				errx(1, "null variable cannot be defined");
212			if (*p)
213				*p++ = EOS;
214			dodefine(optarg, p);
215			break;
216		case 'I':
217			addtoincludepath(optarg);
218			break;
219		case 'U':               /* undefine...       */
220			remhash(optarg, TOP);
221			break;
222		case 'g':
223			mimic_gnu = 1;
224			break;
225		case 'd':
226			set_trace_flags(optarg);
227			break;
228		case 's':
229			synccpp = 1;
230			break;
231		case 't':
232			mark_traced(optarg, 1);
233			break;
234		case 'o':
235			trace_file(optarg);
236                        break;
237		case '?':
238		default:
239			usage();
240		}
241
242        argc -= optind;
243        argv += optind;
244
245	rval = 0;
246	active = stdout;		/* default active output     */
247	bbase[0] = bufbase;
248        if (!argc) {
249 		sp = -1;		/* stack pointer initialized */
250		fp = 0; 		/* frame pointer initialized */
251		set_input(infile+0, stdin, "stdin");
252					/* default input (naturally) */
253		if ((inname[0] = strdup("-")) == NULL)
254			err(1, NULL);
255		inlineno[0] = 1;
256		emitline();
257		macro();
258	} else
259		for (; argc--; ++argv) {
260			p = *argv;
261			if (p[0] == '-' && p[1] == EOS)
262				set_input(infile, stdin, "stdin");
263			else if (fopen_trypath(infile, p) == NULL) {
264				warn("%s", p);
265				rval = 1;
266				continue;
267			}
268			sp = -1;
269			fp = 0;
270			if ((inname[0] = strdup(p)) == NULL)
271				err(1, NULL);
272			inlineno[0] = 1;
273			emitline();
274			macro();
275		    	release_input(infile);
276		}
277
278	if (*m4wraps) { 		/* anything for rundown ??   */
279		ilevel = 0;		/* in case m4wrap includes.. */
280		bufbase = bp = buf;	/* use the entire buffer   */
281		pbstr(m4wraps); 	/* user-defined wrapup act   */
282		macro();		/* last will and testament   */
283	}
284
285	if (active != stdout)
286		active = stdout;	/* reset output just in case */
287	for (n = 1; n < maxout; n++)	/* default wrap-up: undivert */
288		if (outfile[n] != NULL)
289			getdiv(n);
290					/* remove bitbucket if used  */
291	if (outfile[0] != NULL) {
292		(void) fclose(outfile[0]);
293	}
294
295	exit(rval);
296}
297
298/*
299 * Look ahead for `token'.
300 * (on input `t == token[0]')
301 * Used for comment and quoting delimiters.
302 * Returns 1 if `token' present; copied to output.
303 *         0 if `token' not found; all characters pushed back
304 */
305static int
306do_look_ahead(int t, const char *token)
307{
308	int i;
309
310	assert((unsigned char)t == (unsigned char)token[0]);
311
312	for (i = 1; *++token; i++) {
313		t = gpbc();
314		if (t == EOF || (unsigned char)t != (unsigned char)*token) {
315			putback(t);
316			while (--i)
317				putback(*--token);
318			return 0;
319		}
320	}
321	return 1;
322}
323
324#define LOOK_AHEAD(t, token) (t != EOF && 		\
325    (unsigned char)(t)==(unsigned char)(token)[0] && 	\
326    do_look_ahead(t,token))
327
328/*
329 * macro - the work horse..
330 */
331static void
332macro(void)
333{
334	char token[MAXTOK+1];
335	int t, l;
336	ndptr p;
337	int  nlpar;
338
339	cycle {
340		t = gpbc();
341		if (t == '_' || isalpha(t)) {
342			p = inspect(t, token);
343			if (p != nil)
344				putback(l = gpbc());
345			if (p == nil || (l != LPAREN &&
346			    (p->type & NEEDARGS) != 0))
347				outputstr(token);
348			else {
349		/*
350		 * real thing.. First build a call frame:
351		 */
352				pushf(fp);	/* previous call frm */
353				pushf(p->type); /* type of the call  */
354				pushf(0);	/* parenthesis level */
355				fp = sp;	/* new frame pointer */
356		/*
357		 * now push the string arguments:
358		 */
359				pushs1(p->defn);	/* defn string */
360				pushs1(p->name);	/* macro name  */
361				pushs(ep);	      	/* start next..*/
362
363				if (l != LPAREN && PARLEV == 0)  {
364				    /* no bracks  */
365					chrsave(EOS);
366
367					if ((uintptr_t)sp == STACKMAX)
368						errx(1, "internal stack overflow");
369					eval((const char **) mstack+fp+1, 2,
370					    CALTYP);
371
372					ep = PREVEP;	/* flush strspace */
373					sp = PREVSP;	/* previous sp..  */
374					fp = PREVFP;	/* rewind stack...*/
375				}
376			}
377		} else if (t == EOF) {
378			if (sp > -1) {
379				warnx( "unexpected end of input, unclosed parenthesis:");
380				dump_stack(paren, PARLEV);
381				exit(1);
382			}
383			if (ilevel <= 0)
384				break;			/* all done thanks.. */
385			release_input(infile+ilevel--);
386			free(inname[ilevel+1]);
387			bufbase = bbase[ilevel];
388			emitline();
389			continue;
390		}
391	/*
392	 * non-alpha token possibly seen..
393	 * [the order of else if .. stmts is important.]
394	 */
395		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
396			nlpar = 0;
397			record(quotes, nlpar++);
398			/*
399			 * Opening quote: scan forward until matching
400			 * closing quote has been found.
401			 */
402			do {
403
404				l = gpbc();
405				if (LOOK_AHEAD(l,rquote)) {
406					if (--nlpar > 0)
407						outputstr(rquote);
408				} else if (LOOK_AHEAD(l,lquote)) {
409					record(quotes, nlpar++);
410					outputstr(lquote);
411				} else if (l == EOF) {
412					if (nlpar == 1)
413						warnx("unclosed quote:");
414					else
415						warnx("%d unclosed quotes:", nlpar);
416					dump_stack(quotes, nlpar);
417					exit(1);
418				} else {
419					if (nlpar > 0) {
420						if (sp < 0)
421							putc(l, active);
422						else
423							CHRSAVE(l);
424					}
425				}
426			}
427			while (nlpar != 0);
428		}
429
430		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
431			fputs(scommt, active);
432
433			for(;;) {
434				t = gpbc();
435				if (LOOK_AHEAD(t, ecommt)) {
436					fputs(ecommt, active);
437					break;
438				}
439				if (t == EOF)
440					break;
441				putc(t, active);
442			}
443		}
444
445		else if (sp < 0) {		/* not in a macro at all */
446			putc(t, active);	/* output directly..	 */
447		}
448
449		else switch(t) {
450
451		case LPAREN:
452			if (PARLEV > 0)
453				chrsave(t);
454			while (isspace(l = gpbc()))
455				;		/* skip blank, tab, nl.. */
456			putback(l);
457			record(paren, PARLEV++);
458			break;
459
460		case RPAREN:
461			if (--PARLEV > 0)
462				chrsave(t);
463			else {			/* end of argument list */
464				chrsave(EOS);
465
466				if ((uintptr_t)sp == STACKMAX)
467					errx(1, "internal stack overflow");
468
469				eval((const char **) mstack+fp+1, sp-fp,
470				    CALTYP);
471
472				ep = PREVEP;	/* flush strspace */
473				sp = PREVSP;	/* previous sp..  */
474				fp = PREVFP;	/* rewind stack...*/
475			}
476			break;
477
478		case COMMA:
479			if (PARLEV == 1) {
480				chrsave(EOS);		/* new argument   */
481				while (isspace(l = gpbc()))
482					;
483				putback(l);
484				pushs(ep);
485			} else
486				chrsave(t);
487			break;
488
489		default:
490			if (LOOK_AHEAD(t, scommt)) {
491				char *pc;
492				for (pc = scommt; *pc; pc++)
493					chrsave(*pc);
494				for(;;) {
495					t = gpbc();
496					if (LOOK_AHEAD(t, ecommt)) {
497						for (pc = ecommt; *pc; pc++)
498							chrsave(*pc);
499						break;
500					}
501					if (t == EOF)
502					    break;
503					CHRSAVE(t);
504				}
505			} else
506				CHRSAVE(t);		/* stack the char */
507			break;
508		}
509	}
510}
511
512/*
513 * output string directly, without pushing it for reparses.
514 */
515void
516outputstr(const char *s)
517{
518	if (sp < 0)
519		while (*s)
520			putc(*s++, active);
521	else
522		while (*s)
523			CHRSAVE(*s++);
524}
525
526/*
527 * build an input token..
528 * consider only those starting with _ or A-Za-z. This is a
529 * combo with lookup to speed things up.
530 */
531static ndptr
532inspect(int c, char *tp)
533{
534	char *name = tp;
535	char *etp = tp+MAXTOK;
536	ndptr p;
537	unsigned int h;
538
539	h = *tp++ = c;
540
541	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
542		h = (h << 5) + h + (*tp++ = c);
543	if (c != EOF)
544		PUTBACK(c);
545	*tp = EOS;
546	/* token is too long, it won't match anything, but it can still
547	 * be output. */
548	if (tp == ep) {
549		outputstr(name);
550		while (isalnum(c = gpbc()) || c == '_') {
551			if (sp < 0)
552				putc(c, active);
553			else
554				CHRSAVE(c);
555		}
556		*name = EOS;
557		return nil;
558	}
559
560	for (p = hashtab[h % HASHSIZE]; p != nil; p = p->nxtptr)
561		if (h == p->hv && STREQ(name, p->name))
562			break;
563	return p;
564}
565
566/*
567 * initkwds - initialise m4 keywords as fast as possible.
568 * This very similar to install, but without certain overheads,
569 * such as calling lookup. Malloc is not used for storing the
570 * keyword strings, since we simply use the static pointers
571 * within keywrds block.
572 */
573static void
574initkwds(void)
575{
576	size_t i;
577	unsigned int h;
578	ndptr p;
579
580	for (i = 0; i < MAXKEYS; i++) {
581		h = hash(keywrds[i].knam);
582		p = (ndptr) xalloc(sizeof(struct ndblock));
583		p->nxtptr = hashtab[h % HASHSIZE];
584		hashtab[h % HASHSIZE] = p;
585		p->name = xstrdup(keywrds[i].knam);
586		p->defn = null;
587		p->hv = h;
588		p->type = keywrds[i].ktyp & TYPEMASK;
589		if ((keywrds[i].ktyp & NOARGS) == 0)
590			p->type |= NEEDARGS;
591	}
592}
593
594/* Look up a builtin type, even if overridden by the user */
595int
596builtin_type(const char *key)
597{
598	int i;
599
600	for (i = 0; i != MAXKEYS; i++)
601		if (STREQ(keywrds[i].knam, key))
602			return keywrds[i].ktyp;
603	return -1;
604}
605
606const char *
607builtin_realname(int n)
608{
609	int i;
610
611	for (i = 0; i != MAXKEYS; i++)
612		if (((keywrds[i].ktyp ^ n) & TYPEMASK) == 0)
613			return keywrds[i].knam;
614	return NULL;
615}
616
617static void
618record(struct position *t, int lev)
619{
620	if (lev < MAXRECORD) {
621		t[lev].name = CURRENT_NAME;
622		t[lev].line = CURRENT_LINE;
623	}
624}
625
626static void
627dump_stack(struct position *t, int lev)
628{
629	int i;
630
631	for (i = 0; i < lev; i++) {
632		if (i == MAXRECORD) {
633			fprintf(stderr, "   ...\n");
634			break;
635		}
636		fprintf(stderr, "   %s at line %lu\n",
637			t[i].name, t[i].line);
638	}
639}
640
641
642static void
643enlarge_stack(void)
644{
645	STACKMAX *= 2;
646	mstack = realloc(mstack, sizeof(stae) * STACKMAX);
647	sstack = realloc(sstack, STACKMAX);
648	if (mstack == NULL || sstack == NULL)
649		errx(1, "Evaluation stack overflow (%lu)",
650		    (unsigned long)STACKMAX);
651}
652
653/* Emit preprocessor #line directive if -s option used. */
654void
655emitline(void)
656{
657
658	if (synccpp)
659		fprintf(active, "#line %d \"%s\"\n", inlineno[ilevel],
660			inname[ilevel]);
661}
662