main.c revision 225736
1/*	$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $	*/
2/*	$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Ozan Yigit at York University.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#ifndef lint
37static char copyright[] =
38"@(#) Copyright (c) 1989, 1993\n\
39	The Regents of the University of California.  All rights reserved.\n";
40#endif /* not lint */
41
42#ifndef lint
43#if 0
44static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
45#else
46#if 0
47static char rcsid[] = "$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $";
48#endif
49#endif
50#endif /* not lint */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD: stable/9/usr.bin/m4/main.c 216370 2010-12-11 08:32:16Z joel $");
54
55/*
56 * main.c
57 * Facility: m4 macro processor
58 * by: oz
59 */
60
61#include <sys/types.h>
62#include <assert.h>
63#include <signal.h>
64#include <errno.h>
65#include <unistd.h>
66#include <stdio.h>
67#include <ctype.h>
68#include <string.h>
69#include <stddef.h>
70#include <stdlib.h>
71#include <err.h>
72#include <locale.h>
73#include "mdef.h"
74#include "stdd.h"
75#include "extern.h"
76#include "pathnames.h"
77
78ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
79stae *mstack;		 	/* stack of m4 machine         */
80char *sstack;		 	/* shadow stack, for string space extension */
81static size_t STACKMAX;		/* current maximum size of stack */
82int sp; 			/* current m4  stack pointer   */
83int fp; 			/* m4 call frame pointer       */
84struct input_file infile[MAXINP];/* input file stack (0=stdin)  */
85char *inname[MAXINP];		/* names of these input files */
86int inlineno[MAXINP];		/* current number in each input file */
87FILE **outfile;			/* diversion array(0=bitbucket)*/
88int maxout;
89FILE *active;			/* active output file pointer  */
90int ilevel = 0; 		/* input file stack pointer    */
91int oindex = 0; 		/* diversion index..	       */
92char null[] = "";		/* as it says.. just a null..  */
93const char *m4wraps = "";       /* m4wrap string default..     */
94char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
95char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
96char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
97char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
98int synccpp;			/* Line synchronisation for C preprocessor */
99
100struct keyblk keywrds[] = {	/* m4 keywords to be installed */
101	{ "include",      INCLTYPE },
102	{ "sinclude",     SINCTYPE },
103	{ "define",       DEFITYPE },
104	{ "defn",         DEFNTYPE },
105	{ "divert",       DIVRTYPE | NOARGS },
106	{ "expr",         EXPRTYPE },
107	{ "eval",         EXPRTYPE },
108	{ "substr",       SUBSTYPE },
109	{ "ifelse",       IFELTYPE },
110	{ "ifdef",        IFDFTYPE },
111	{ "len",          LENGTYPE },
112	{ "incr",         INCRTYPE },
113	{ "decr",         DECRTYPE },
114	{ "dnl",          DNLNTYPE | NOARGS },
115	{ "changequote",  CHNQTYPE | NOARGS },
116	{ "changecom",    CHNCTYPE | NOARGS },
117	{ "index",        INDXTYPE },
118#ifdef EXTENDED
119	{ "paste",        PASTTYPE },
120	{ "spaste",       SPASTYPE },
121    	/* Newer extensions, needed to handle gnu-m4 scripts */
122	{ "indir",        INDIRTYPE},
123	{ "builtin",      BUILTINTYPE},
124	{ "patsubst",	  PATSTYPE},
125	{ "regexp",	  REGEXPTYPE},
126	{ "esyscmd",	  ESYSCMDTYPE},
127	{ "__file__",	  FILENAMETYPE | NOARGS},
128	{ "__line__",	  LINETYPE | NOARGS},
129#endif
130	{ "popdef",       POPDTYPE },
131	{ "pushdef",      PUSDTYPE },
132	{ "dumpdef",      DUMPTYPE | NOARGS },
133	{ "shift",        SHIFTYPE | NOARGS },
134	{ "translit",     TRNLTYPE },
135	{ "undefine",     UNDFTYPE },
136	{ "undivert",     UNDVTYPE | NOARGS },
137	{ "divnum",       DIVNTYPE | NOARGS },
138	{ "maketemp",     MKTMTYPE },
139	{ "errprint",     ERRPTYPE | NOARGS },
140	{ "m4wrap",       M4WRTYPE | NOARGS },
141	{ "m4exit",       EXITTYPE | NOARGS },
142	{ "syscmd",       SYSCTYPE },
143	{ "sysval",       SYSVTYPE | NOARGS },
144	{ "traceon",	  TRACEONTYPE | NOARGS },
145	{ "traceoff",	  TRACEOFFTYPE | NOARGS },
146
147#if defined(unix) || defined(__unix__)
148	{ "unix",         SELFTYPE | NOARGS },
149#else
150#ifdef vms
151	{ "vms",          SELFTYPE | NOARGS },
152#endif
153#endif
154};
155
156#define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
157
158#define MAXRECORD 50
159static struct position {
160	char *name;
161	unsigned long line;
162} quotes[MAXRECORD], paren[MAXRECORD];
163
164static void record(struct position *, int);
165static void dump_stack(struct position *, int);
166
167static void macro(void);
168static void initkwds(void);
169static ndptr inspect(int, char *);
170static int do_look_ahead(int, const char *);
171
172static void enlarge_stack(void);
173
174int
175main(int argc, char *argv[])
176{
177	int c;
178	int n;
179	int rval;
180	char *p;
181
182	setlocale(LC_ALL, "");
183
184	traceout = stderr;
185
186	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
187		signal(SIGINT, onintr);
188
189	initkwds();
190	initspaces();
191	STACKMAX = INITSTACKMAX;
192
193	mstack = (stae *)xalloc(sizeof(stae) * STACKMAX);
194	sstack = (char *)xalloc(STACKMAX);
195
196	maxout = 0;
197	outfile = NULL;
198	resizedivs(MAXOUT);
199
200	while ((c = getopt(argc, argv, "gst:d:D:U:o:I:")) != -1)
201		switch(c) {
202		case 'D':               /* define something..*/
203			for (p = optarg; *p; p++)
204				if (*p == '=')
205					break;
206			if (p == optarg)
207				errx(1, "null variable cannot be defined");
208			if (*p)
209				*p++ = EOS;
210			dodefine(optarg, p);
211			break;
212		case 'I':
213			addtoincludepath(optarg);
214			break;
215		case 'U':               /* undefine...       */
216			remhash(optarg, TOP);
217			break;
218		case 'g':
219			mimic_gnu = 1;
220			break;
221		case 'd':
222			set_trace_flags(optarg);
223			break;
224		case 's':
225			synccpp = 1;
226			break;
227		case 't':
228			mark_traced(optarg, 1);
229			break;
230		case 'o':
231			trace_file(optarg);
232                        break;
233		case '?':
234		default:
235			usage();
236		}
237
238        argc -= optind;
239        argv += optind;
240
241	rval = 0;
242	active = stdout;		/* default active output     */
243	bbase[0] = bufbase;
244        if (!argc) {
245 		sp = -1;		/* stack pointer initialized */
246		fp = 0; 		/* frame pointer initialized */
247		set_input(infile+0, stdin, "stdin");
248					/* default input (naturally) */
249		if ((inname[0] = strdup("-")) == NULL)
250			err(1, NULL);
251		inlineno[0] = 1;
252		emitline();
253		macro();
254	} else
255		for (; argc--; ++argv) {
256			p = *argv;
257			if (p[0] == '-' && p[1] == EOS)
258				set_input(infile, stdin, "stdin");
259			else if (fopen_trypath(infile, p) == NULL) {
260				warn("%s", p);
261				rval = 1;
262				continue;
263			}
264			sp = -1;
265			fp = 0;
266			if ((inname[0] = strdup(p)) == NULL)
267				err(1, NULL);
268			inlineno[0] = 1;
269			emitline();
270			macro();
271		    	release_input(infile);
272		}
273
274	if (*m4wraps) { 		/* anything for rundown ??   */
275		ilevel = 0;		/* in case m4wrap includes.. */
276		bufbase = bp = buf;	/* use the entire buffer   */
277		pbstr(m4wraps); 	/* user-defined wrapup act   */
278		macro();		/* last will and testament   */
279	}
280
281	if (active != stdout)
282		active = stdout;	/* reset output just in case */
283	for (n = 1; n < maxout; n++)	/* default wrap-up: undivert */
284		if (outfile[n] != NULL)
285			getdiv(n);
286					/* remove bitbucket if used  */
287	if (outfile[0] != NULL) {
288		(void) fclose(outfile[0]);
289	}
290
291	exit(rval);
292}
293
294/*
295 * Look ahead for `token'.
296 * (on input `t == token[0]')
297 * Used for comment and quoting delimiters.
298 * Returns 1 if `token' present; copied to output.
299 *         0 if `token' not found; all characters pushed back
300 */
301static int
302do_look_ahead(int t, const char *token)
303{
304	int i;
305
306	assert((unsigned char)t == (unsigned char)token[0]);
307
308	for (i = 1; *++token; i++) {
309		t = gpbc();
310		if (t == EOF || (unsigned char)t != (unsigned char)*token) {
311			putback(t);
312			while (--i)
313				putback(*--token);
314			return 0;
315		}
316	}
317	return 1;
318}
319
320#define LOOK_AHEAD(t, token) (t != EOF && 		\
321    (unsigned char)(t)==(unsigned char)(token)[0] && 	\
322    do_look_ahead(t,token))
323
324/*
325 * macro - the work horse..
326 */
327static void
328macro(void)
329{
330	char token[MAXTOK+1];
331	int t, l;
332	ndptr p;
333	int  nlpar;
334
335	cycle {
336		t = gpbc();
337		if (t == '_' || isalpha(t)) {
338			p = inspect(t, token);
339			if (p != nil)
340				putback(l = gpbc());
341			if (p == nil || (l != LPAREN &&
342			    (p->type & NEEDARGS) != 0))
343				outputstr(token);
344			else {
345		/*
346		 * real thing.. First build a call frame:
347		 */
348				pushf(fp);	/* previous call frm */
349				pushf(p->type); /* type of the call  */
350				pushf(0);	/* parenthesis level */
351				fp = sp;	/* new frame pointer */
352		/*
353		 * now push the string arguments:
354		 */
355				pushs1(p->defn);	/* defn string */
356				pushs1(p->name);	/* macro name  */
357				pushs(ep);	      	/* start next..*/
358
359				if (l != LPAREN && PARLEV == 0)  {
360				    /* no bracks  */
361					chrsave(EOS);
362
363					if ((uintptr_t)sp == STACKMAX)
364						errx(1, "internal stack overflow");
365					eval((const char **) mstack+fp+1, 2,
366					    CALTYP);
367
368					ep = PREVEP;	/* flush strspace */
369					sp = PREVSP;	/* previous sp..  */
370					fp = PREVFP;	/* rewind stack...*/
371				}
372			}
373		} else if (t == EOF) {
374			if (sp > -1) {
375				warnx( "unexpected end of input, unclosed parenthesis:");
376				dump_stack(paren, PARLEV);
377				exit(1);
378			}
379			if (ilevel <= 0)
380				break;			/* all done thanks.. */
381			release_input(infile+ilevel--);
382			free(inname[ilevel+1]);
383			bufbase = bbase[ilevel];
384			emitline();
385			continue;
386		}
387	/*
388	 * non-alpha token possibly seen..
389	 * [the order of else if .. stmts is important.]
390	 */
391		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
392			nlpar = 0;
393			record(quotes, nlpar++);
394			/*
395			 * Opening quote: scan forward until matching
396			 * closing quote has been found.
397			 */
398			do {
399
400				l = gpbc();
401				if (LOOK_AHEAD(l,rquote)) {
402					if (--nlpar > 0)
403						outputstr(rquote);
404				} else if (LOOK_AHEAD(l,lquote)) {
405					record(quotes, nlpar++);
406					outputstr(lquote);
407				} else if (l == EOF) {
408					if (nlpar == 1)
409						warnx("unclosed quote:");
410					else
411						warnx("%d unclosed quotes:", nlpar);
412					dump_stack(quotes, nlpar);
413					exit(1);
414				} else {
415					if (nlpar > 0) {
416						if (sp < 0)
417							putc(l, active);
418						else
419							CHRSAVE(l);
420					}
421				}
422			}
423			while (nlpar != 0);
424		}
425
426		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
427			fputs(scommt, active);
428
429			for(;;) {
430				t = gpbc();
431				if (LOOK_AHEAD(t, ecommt)) {
432					fputs(ecommt, active);
433					break;
434				}
435				if (t == EOF)
436					break;
437				putc(t, active);
438			}
439		}
440
441		else if (sp < 0) {		/* not in a macro at all */
442			putc(t, active);	/* output directly..	 */
443		}
444
445		else switch(t) {
446
447		case LPAREN:
448			if (PARLEV > 0)
449				chrsave(t);
450			while (isspace(l = gpbc()))
451				;		/* skip blank, tab, nl.. */
452			putback(l);
453			record(paren, PARLEV++);
454			break;
455
456		case RPAREN:
457			if (--PARLEV > 0)
458				chrsave(t);
459			else {			/* end of argument list */
460				chrsave(EOS);
461
462				if ((uintptr_t)sp == STACKMAX)
463					errx(1, "internal stack overflow");
464
465				eval((const char **) mstack+fp+1, sp-fp,
466				    CALTYP);
467
468				ep = PREVEP;	/* flush strspace */
469				sp = PREVSP;	/* previous sp..  */
470				fp = PREVFP;	/* rewind stack...*/
471			}
472			break;
473
474		case COMMA:
475			if (PARLEV == 1) {
476				chrsave(EOS);		/* new argument   */
477				while (isspace(l = gpbc()))
478					;
479				putback(l);
480				pushs(ep);
481			} else
482				chrsave(t);
483			break;
484
485		default:
486			if (LOOK_AHEAD(t, scommt)) {
487				char *pc;
488				for (pc = scommt; *pc; pc++)
489					chrsave(*pc);
490				for(;;) {
491					t = gpbc();
492					if (LOOK_AHEAD(t, ecommt)) {
493						for (pc = ecommt; *pc; pc++)
494							chrsave(*pc);
495						break;
496					}
497					if (t == EOF)
498					    break;
499					CHRSAVE(t);
500				}
501			} else
502				CHRSAVE(t);		/* stack the char */
503			break;
504		}
505	}
506}
507
508/*
509 * output string directly, without pushing it for reparses.
510 */
511void
512outputstr(const char *s)
513{
514	if (sp < 0)
515		while (*s)
516			putc(*s++, active);
517	else
518		while (*s)
519			CHRSAVE(*s++);
520}
521
522/*
523 * build an input token..
524 * consider only those starting with _ or A-Za-z. This is a
525 * combo with lookup to speed things up.
526 */
527static ndptr
528inspect(int c, char *tp)
529{
530	char *name = tp;
531	char *etp = tp+MAXTOK;
532	ndptr p;
533	unsigned int h;
534
535	h = *tp++ = c;
536
537	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
538		h = (h << 5) + h + (*tp++ = c);
539	if (c != EOF)
540		PUTBACK(c);
541	*tp = EOS;
542	/* token is too long, it won't match anything, but it can still
543	 * be output. */
544	if (tp == ep) {
545		outputstr(name);
546		while (isalnum(c = gpbc()) || c == '_') {
547			if (sp < 0)
548				putc(c, active);
549			else
550				CHRSAVE(c);
551		}
552		*name = EOS;
553		return nil;
554	}
555
556	for (p = hashtab[h % HASHSIZE]; p != nil; p = p->nxtptr)
557		if (h == p->hv && STREQ(name, p->name))
558			break;
559	return p;
560}
561
562/*
563 * initkwds - initialise m4 keywords as fast as possible.
564 * This very similar to install, but without certain overheads,
565 * such as calling lookup. Malloc is not used for storing the
566 * keyword strings, since we simply use the static pointers
567 * within keywrds block.
568 */
569static void
570initkwds(void)
571{
572	size_t i;
573	unsigned int h;
574	ndptr p;
575
576	for (i = 0; i < MAXKEYS; i++) {
577		h = hash(keywrds[i].knam);
578		p = (ndptr) xalloc(sizeof(struct ndblock));
579		p->nxtptr = hashtab[h % HASHSIZE];
580		hashtab[h % HASHSIZE] = p;
581		p->name = xstrdup(keywrds[i].knam);
582		p->defn = null;
583		p->hv = h;
584		p->type = keywrds[i].ktyp & TYPEMASK;
585		if ((keywrds[i].ktyp & NOARGS) == 0)
586			p->type |= NEEDARGS;
587	}
588}
589
590/* Look up a builtin type, even if overridden by the user */
591int
592builtin_type(const char *key)
593{
594	int i;
595
596	for (i = 0; i != MAXKEYS; i++)
597		if (STREQ(keywrds[i].knam, key))
598			return keywrds[i].ktyp;
599	return -1;
600}
601
602const char *
603builtin_realname(int n)
604{
605	int i;
606
607	for (i = 0; i != MAXKEYS; i++)
608		if (((keywrds[i].ktyp ^ n) & TYPEMASK) == 0)
609			return keywrds[i].knam;
610	return NULL;
611}
612
613static void
614record(struct position *t, int lev)
615{
616	if (lev < MAXRECORD) {
617		t[lev].name = CURRENT_NAME;
618		t[lev].line = CURRENT_LINE;
619	}
620}
621
622static void
623dump_stack(struct position *t, int lev)
624{
625	int i;
626
627	for (i = 0; i < lev; i++) {
628		if (i == MAXRECORD) {
629			fprintf(stderr, "   ...\n");
630			break;
631		}
632		fprintf(stderr, "   %s at line %lu\n",
633			t[i].name, t[i].line);
634	}
635}
636
637
638static void
639enlarge_stack(void)
640{
641	STACKMAX *= 2;
642	mstack = realloc(mstack, sizeof(stae) * STACKMAX);
643	sstack = realloc(sstack, STACKMAX);
644	if (mstack == NULL || sstack == NULL)
645		errx(1, "Evaluation stack overflow (%lu)",
646		    (unsigned long)STACKMAX);
647}
648
649/* Emit preprocessor #line directive if -s option used. */
650void
651emitline(void)
652{
653
654	if (synccpp)
655		fprintf(active, "#line %d \"%s\"\n", inlineno[ilevel],
656			inname[ilevel]);
657}
658