main.c revision 99941
1/*	$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $	*/
2/*	$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Ozan Yigit at York University.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40#ifndef lint
41static char copyright[] =
42"@(#) Copyright (c) 1989, 1993\n\
43	The Regents of the University of California.  All rights reserved.\n";
44#endif /* not lint */
45
46#ifndef lint
47#if 0
48static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
49#else
50#if 0
51static char rcsid[] = "$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $";
52#endif
53#endif
54#endif /* not lint */
55
56#include <sys/cdefs.h>
57__FBSDID("$FreeBSD: head/usr.bin/m4/main.c 99941 2002-07-14 02:34:37Z jmallett $");
58
59/*
60 * main.c
61 * Facility: m4 macro processor
62 * by: oz
63 */
64
65#include <sys/types.h>
66#include <assert.h>
67#include <signal.h>
68#include <errno.h>
69#include <unistd.h>
70#include <stdio.h>
71#include <ctype.h>
72#include <string.h>
73#include <stddef.h>
74#include <stdlib.h>
75#include <err.h>
76#include "mdef.h"
77#include "stdd.h"
78#include "extern.h"
79#include "pathnames.h"
80
81ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
82stae *mstack;		 	/* stack of m4 machine         */
83char *sstack;		 	/* shadow stack, for string space extension */
84static size_t STACKMAX;		/* current maximum size of stack */
85int sp; 			/* current m4  stack pointer   */
86int fp; 			/* m4 call frame pointer       */
87struct input_file infile[MAXINP];/* input file stack (0=stdin)  */
88char *inname[MAXINP];		/* names of these input files */
89int inlineno[MAXINP];		/* current number in each input file */
90FILE **outfile;			/* diversion array(0=bitbucket)*/
91int maxout;
92FILE *active;			/* active output file pointer  */
93int ilevel = 0; 		/* input file stack pointer    */
94int oindex = 0; 		/* diversion index..	       */
95const char *null = "";          /* as it says.. just a null..  */
96const char *m4wraps = "";       /* m4wrap string default..     */
97char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
98char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
99char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
100char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
101int synccpp;			/* Line synchronisation for C preprocessor */
102
103struct keyblk keywrds[] = {	/* m4 keywords to be installed */
104	{ "include",      INCLTYPE },
105	{ "sinclude",     SINCTYPE },
106	{ "define",       DEFITYPE },
107	{ "defn",         DEFNTYPE },
108	{ "divert",       DIVRTYPE | NOARGS },
109	{ "expr",         EXPRTYPE },
110	{ "eval",         EXPRTYPE },
111	{ "substr",       SUBSTYPE },
112	{ "ifelse",       IFELTYPE },
113	{ "ifdef",        IFDFTYPE },
114	{ "len",          LENGTYPE },
115	{ "incr",         INCRTYPE },
116	{ "decr",         DECRTYPE },
117	{ "dnl",          DNLNTYPE | NOARGS },
118	{ "changequote",  CHNQTYPE | NOARGS },
119	{ "changecom",    CHNCTYPE | NOARGS },
120	{ "index",        INDXTYPE },
121#ifdef EXTENDED
122	{ "paste",        PASTTYPE },
123	{ "spaste",       SPASTYPE },
124    	/* Newer extensions, needed to handle gnu-m4 scripts */
125	{ "indir",        INDIRTYPE},
126	{ "builtin",      BUILTINTYPE},
127	{ "patsubst",	  PATSTYPE},
128	{ "regexp",	  REGEXPTYPE},
129	{ "esyscmd",	  ESYSCMDTYPE},
130	{ "__file__",	  FILENAMETYPE | NOARGS},
131	{ "__line__",	  LINETYPE | NOARGS},
132#endif
133	{ "popdef",       POPDTYPE },
134	{ "pushdef",      PUSDTYPE },
135	{ "dumpdef",      DUMPTYPE | NOARGS },
136	{ "shift",        SHIFTYPE | NOARGS },
137	{ "translit",     TRNLTYPE },
138	{ "undefine",     UNDFTYPE },
139	{ "undivert",     UNDVTYPE | NOARGS },
140	{ "divnum",       DIVNTYPE | NOARGS },
141	{ "maketemp",     MKTMTYPE },
142	{ "errprint",     ERRPTYPE | NOARGS },
143	{ "m4wrap",       M4WRTYPE | NOARGS },
144	{ "m4exit",       EXITTYPE | NOARGS },
145	{ "syscmd",       SYSCTYPE },
146	{ "sysval",       SYSVTYPE | NOARGS },
147	{ "traceon",	  TRACEONTYPE | NOARGS },
148	{ "traceoff",	  TRACEOFFTYPE | NOARGS },
149
150#if defined(unix) || defined(__unix__)
151	{ "unix",         SELFTYPE | NOARGS },
152#else
153#ifdef vms
154	{ "vms",          SELFTYPE | NOARGS },
155#endif
156#endif
157};
158
159#define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
160
161#define MAXRECORD 50
162static struct position {
163	char *name;
164	unsigned long line;
165} quotes[MAXRECORD], paren[MAXRECORD];
166
167static void record(struct position *, int);
168static void dump_stack(struct position *, int);
169
170static void macro(void);
171static void initkwds(void);
172static ndptr inspect(int, char *);
173static int do_look_ahead(int, const char *);
174
175static void enlarge_stack(void);
176
177int
178main(int argc, char *argv[])
179{
180	int c;
181	int n;
182	int rval;
183	char *p;
184
185	traceout = stderr;
186
187	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
188		signal(SIGINT, onintr);
189
190	initkwds();
191	initspaces();
192	STACKMAX = INITSTACKMAX;
193
194	mstack = (stae *)xalloc(sizeof(stae) * STACKMAX);
195	sstack = (char *)xalloc(STACKMAX);
196
197	maxout = 0;
198	outfile = NULL;
199	resizedivs(MAXOUT);
200
201	while ((c = getopt(argc, argv, "gst:d:D:U:o:I:")) != -1)
202		switch(c) {
203		case 'D':               /* define something..*/
204			for (p = optarg; *p; p++)
205				if (*p == '=')
206					break;
207			if (*p)
208				*p++ = EOS;
209			dodefine(optarg, p);
210			break;
211		case 'I':
212			addtoincludepath(optarg);
213			break;
214		case 'U':               /* undefine...       */
215			remhash(optarg, TOP);
216			break;
217		case 'g':
218			mimic_gnu = 1;
219			break;
220		case 'd':
221			set_trace_flags(optarg);
222			break;
223		case 's':
224			synccpp = 1;
225			break;
226		case 't':
227			mark_traced(optarg, 1);
228			break;
229		case 'o':
230			trace_file(optarg);
231                        break;
232		case '?':
233		default:
234			usage();
235		}
236
237        argc -= optind;
238        argv += optind;
239
240	rval = 0;
241	active = stdout;		/* default active output     */
242	bbase[0] = bufbase;
243        if (!argc) {
244 		sp = -1;		/* stack pointer initialized */
245		fp = 0; 		/* frame pointer initialized */
246		set_input(infile+0, stdin, "stdin");
247					/* default input (naturally) */
248		if ((inname[0] = strdup("-")) == NULL)
249			err(1, NULL);
250		inlineno[0] = 1;
251		emitline();
252		macro();
253	} else
254		for (; argc--; ++argv) {
255			p = *argv;
256			if (p[0] == '-' && p[1] == EOS)
257				set_input(infile, stdin, "stdin");
258			else if (fopen_trypath(infile, p) == NULL) {
259				warn("%s", p);
260				rval = 1;
261				continue;
262			}
263			sp = -1;
264			fp = 0;
265			if ((inname[0] = strdup(p)) == NULL)
266				err(1, NULL);
267			inlineno[0] = 1;
268			emitline();
269			macro();
270		    	release_input(infile);
271		}
272
273	if (*m4wraps) { 		/* anything for rundown ??   */
274		ilevel = 0;		/* in case m4wrap includes.. */
275		bufbase = bp = buf;	/* use the entire buffer   */
276		pbstr(m4wraps); 	/* user-defined wrapup act   */
277		macro();		/* last will and testament   */
278	}
279
280	if (active != stdout)
281		active = stdout;	/* reset output just in case */
282	for (n = 1; n < maxout; n++)	/* default wrap-up: undivert */
283		if (outfile[n] != NULL)
284			getdiv(n);
285					/* remove bitbucket if used  */
286	if (outfile[0] != NULL) {
287		(void) fclose(outfile[0]);
288	}
289
290	exit(rval);
291}
292
293/*
294 * Look ahead for `token'.
295 * (on input `t == token[0]')
296 * Used for comment and quoting delimiters.
297 * Returns 1 if `token' present; copied to output.
298 *         0 if `token' not found; all characters pushed back
299 */
300static int
301do_look_ahead(int t, const char *token)
302{
303	int i;
304
305	assert((unsigned char)t == (unsigned char)token[0]);
306
307	for (i = 1; *++token; i++) {
308		t = gpbc();
309		if (t == EOF || (unsigned char)t != (unsigned char)*token) {
310			putback(t);
311			while (--i)
312				putback(*--token);
313			return 0;
314		}
315	}
316	return 1;
317}
318
319#define LOOK_AHEAD(t, token) (t != EOF && 		\
320    (unsigned char)(t)==(unsigned char)(token)[0] && 	\
321    do_look_ahead(t,token))
322
323/*
324 * macro - the work horse..
325 */
326static void
327macro(void)
328{
329	char token[MAXTOK+1];
330	int t, l;
331	ndptr p;
332	int  nlpar;
333
334	cycle {
335		t = gpbc();
336		if (t == '_' || isalpha(t)) {
337			p = inspect(t, token);
338			if (p != nil)
339				putback(l = gpbc());
340			if (p == nil || (l != LPAREN &&
341			    (p->type & NEEDARGS) != 0))
342				outputstr(token);
343			else {
344		/*
345		 * real thing.. First build a call frame:
346		 */
347				pushf(fp);	/* previous call frm */
348				pushf(p->type); /* type of the call  */
349				pushf(0);	/* parenthesis level */
350				fp = sp;	/* new frame pointer */
351		/*
352		 * now push the string arguments:
353		 */
354				pushs1(p->defn);	/* defn string */
355				pushs1(p->name);	/* macro name  */
356				pushs(ep);	      	/* start next..*/
357
358				if (l != LPAREN && PARLEV == 0)  {
359				    /* no bracks  */
360					chrsave(EOS);
361
362					if ((uintptr_t)sp == STACKMAX)
363						errx(1, "internal stack overflow");
364					eval((const char **) mstack+fp+1, 2,
365					    CALTYP);
366
367					ep = PREVEP;	/* flush strspace */
368					sp = PREVSP;	/* previous sp..  */
369					fp = PREVFP;	/* rewind stack...*/
370				}
371			}
372		} else if (t == EOF) {
373			if (sp > -1) {
374				warnx( "unexpected end of input, unclosed parenthesis:");
375				dump_stack(paren, PARLEV);
376				exit(1);
377			}
378			if (ilevel <= 0)
379				break;			/* all done thanks.. */
380			release_input(infile+ilevel--);
381			free(inname[ilevel+1]);
382			bufbase = bbase[ilevel];
383			emitline();
384			continue;
385		}
386	/*
387	 * non-alpha token possibly seen..
388	 * [the order of else if .. stmts is important.]
389	 */
390		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
391			nlpar = 0;
392			record(quotes, nlpar++);
393			/*
394			 * Opening quote: scan forward until matching
395			 * closing quote has been found.
396			 */
397			do {
398
399				l = gpbc();
400				if (LOOK_AHEAD(l,rquote)) {
401					if (--nlpar > 0)
402						outputstr(rquote);
403				} else if (LOOK_AHEAD(l,lquote)) {
404					record(quotes, nlpar++);
405					outputstr(lquote);
406				} else if (l == EOF) {
407					if (nlpar == 1)
408						warnx("unclosed quote:");
409					else
410						warnx("%d unclosed quotes:", nlpar);
411					dump_stack(quotes, nlpar);
412					exit(1);
413				} else {
414					if (nlpar > 0) {
415						if (sp < 0)
416							putc(l, active);
417						else
418							CHRSAVE(l);
419					}
420				}
421			}
422			while (nlpar != 0);
423		}
424
425		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
426			fputs(scommt, active);
427
428			for(;;) {
429				t = gpbc();
430				if (LOOK_AHEAD(t, ecommt)) {
431					fputs(ecommt, active);
432					break;
433				}
434				if (t == EOF)
435					break;
436				putc(t, active);
437			}
438		}
439
440		else if (sp < 0) {		/* not in a macro at all */
441			putc(t, active);	/* output directly..	 */
442		}
443
444		else switch(t) {
445
446		case LPAREN:
447			if (PARLEV > 0)
448				chrsave(t);
449			while (isspace(l = gpbc()))
450				;		/* skip blank, tab, nl.. */
451			putback(l);
452			record(paren, PARLEV++);
453			break;
454
455		case RPAREN:
456			if (--PARLEV > 0)
457				chrsave(t);
458			else {			/* end of argument list */
459				chrsave(EOS);
460
461				if ((uintptr_t)sp == STACKMAX)
462					errx(1, "internal stack overflow");
463
464				eval((const char **) mstack+fp+1, sp-fp,
465				    CALTYP);
466
467				ep = PREVEP;	/* flush strspace */
468				sp = PREVSP;	/* previous sp..  */
469				fp = PREVFP;	/* rewind stack...*/
470			}
471			break;
472
473		case COMMA:
474			if (PARLEV == 1) {
475				chrsave(EOS);		/* new argument   */
476				while (isspace(l = gpbc()))
477					;
478				putback(l);
479				pushs(ep);
480			} else
481				chrsave(t);
482			break;
483
484		default:
485			if (LOOK_AHEAD(t, scommt)) {
486				char *pc;
487				for (pc = scommt; *pc; pc++)
488					chrsave(*pc);
489				for(;;) {
490					t = gpbc();
491					if (LOOK_AHEAD(t, ecommt)) {
492						for (pc = ecommt; *pc; pc++)
493							chrsave(*pc);
494						break;
495					}
496					if (t == EOF)
497					    break;
498					CHRSAVE(t);
499				}
500			} else
501				CHRSAVE(t);		/* stack the char */
502			break;
503		}
504	}
505}
506
507/*
508 * output string directly, without pushing it for reparses.
509 */
510void
511outputstr(const char *s)
512{
513	if (sp < 0)
514		while (*s)
515			putc(*s++, active);
516	else
517		while (*s)
518			CHRSAVE(*s++);
519}
520
521/*
522 * build an input token..
523 * consider only those starting with _ or A-Za-z. This is a
524 * combo with lookup to speed things up.
525 */
526static ndptr
527inspect(int c, char *tp)
528{
529	char *name = tp;
530	char *etp = tp+MAXTOK;
531	ndptr p;
532	unsigned int h;
533
534	h = *tp++ = c;
535
536	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
537		h = (h << 5) + h + (*tp++ = c);
538	if (c != EOF)
539		PUTBACK(c);
540	*tp = EOS;
541	/* token is too long, it won't match anything, but it can still
542	 * be output. */
543	if (tp == ep) {
544		outputstr(name);
545		while (isalnum(c = gpbc()) || c == '_') {
546			if (sp < 0)
547				putc(c, active);
548			else
549				CHRSAVE(c);
550		}
551		*name = EOS;
552		return nil;
553	}
554
555	for (p = hashtab[h % HASHSIZE]; p != nil; p = p->nxtptr)
556		if (h == p->hv && STREQ(name, p->name))
557			break;
558	return p;
559}
560
561/*
562 * initkwds - initialise m4 keywords as fast as possible.
563 * This very similar to install, but without certain overheads,
564 * such as calling lookup. Malloc is not used for storing the
565 * keyword strings, since we simply use the static pointers
566 * within keywrds block.
567 */
568static void
569initkwds(void)
570{
571	size_t i;
572	unsigned int h;
573	ndptr p;
574
575	for (i = 0; i < MAXKEYS; i++) {
576		h = hash(keywrds[i].knam);
577		p = (ndptr) xalloc(sizeof(struct ndblock));
578		p->nxtptr = hashtab[h % HASHSIZE];
579		hashtab[h % HASHSIZE] = p;
580		p->name = xstrdup(keywrds[i].knam);
581		p->defn = xstrdup(null);
582		p->hv = h;
583		p->type = keywrds[i].ktyp & TYPEMASK;
584		if ((keywrds[i].ktyp & NOARGS) == 0)
585			p->type |= NEEDARGS;
586	}
587}
588
589/* Look up a builtin type, even if overridden by the user */
590int
591builtin_type(const char *key)
592{
593	int i;
594
595	for (i = 0; i != MAXKEYS; i++)
596		if (STREQ(keywrds[i].knam, key))
597			return keywrds[i].ktyp;
598	return -1;
599}
600
601const char *
602builtin_realname(int n)
603{
604	int i;
605
606	for (i = 0; i != MAXKEYS; i++)
607		if (((keywrds[i].ktyp ^ n) & TYPEMASK) == 0)
608			return keywrds[i].knam;
609	return NULL;
610}
611
612static void
613record(struct position *t, int lev)
614{
615	if (lev < MAXRECORD) {
616		t[lev].name = CURRENT_NAME;
617		t[lev].line = CURRENT_LINE;
618	}
619}
620
621static void
622dump_stack(struct position *t, int lev)
623{
624	int i;
625
626	for (i = 0; i < lev; i++) {
627		if (i == MAXRECORD) {
628			fprintf(stderr, "   ...\n");
629			break;
630		}
631		fprintf(stderr, "   %s at line %lu\n",
632			t[i].name, t[i].line);
633	}
634}
635
636
637static void
638enlarge_stack(void)
639{
640	STACKMAX *= 2;
641	mstack = realloc(mstack, sizeof(stae) * STACKMAX);
642	sstack = realloc(sstack, STACKMAX);
643	if (mstack == NULL || sstack == NULL)
644		errx(1, "Evaluation stack overflow (%lu)",
645		    (unsigned long)STACKMAX);
646}
647
648/* Emit preprocessor #line directive if -s option used. */
649void
650emitline(void)
651{
652
653	if (synccpp)
654		fprintf(active, "#line %d \"%s\"\n", inlineno[ilevel],
655			inname[ilevel]);
656}
657