main.c revision 227241
1/*	$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $	*/
2/*	$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Ozan Yigit at York University.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#ifndef lint
37#if 0
38static char copyright[] =
39"@(#) Copyright (c) 1989, 1993\n\
40	The Regents of the University of California.  All rights reserved.\n";
41#endif
42#endif /* not lint */
43
44#ifndef lint
45#if 0
46static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
47#else
48#if 0
49static char rcsid[] = "$OpenBSD: main.c,v 1.53 2002/04/26 16:15:16 espie Exp $";
50#endif
51#endif
52#endif /* not lint */
53
54#include <sys/cdefs.h>
55__FBSDID("$FreeBSD: head/usr.bin/m4/main.c 227241 2011-11-06 18:49:48Z ed $");
56
57/*
58 * main.c
59 * Facility: m4 macro processor
60 * by: oz
61 */
62
63#include <sys/types.h>
64#include <assert.h>
65#include <signal.h>
66#include <errno.h>
67#include <unistd.h>
68#include <stdio.h>
69#include <ctype.h>
70#include <string.h>
71#include <stddef.h>
72#include <stdlib.h>
73#include <err.h>
74#include <locale.h>
75#include "mdef.h"
76#include "stdd.h"
77#include "extern.h"
78#include "pathnames.h"
79
80ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
81stae *mstack;		 	/* stack of m4 machine         */
82char *sstack;		 	/* shadow stack, for string space extension */
83static size_t STACKMAX;		/* current maximum size of stack */
84int sp; 			/* current m4  stack pointer   */
85int fp; 			/* m4 call frame pointer       */
86struct input_file infile[MAXINP];/* input file stack (0=stdin)  */
87char *inname[MAXINP];		/* names of these input files */
88int inlineno[MAXINP];		/* current number in each input file */
89FILE **outfile;			/* diversion array(0=bitbucket)*/
90int maxout;
91FILE *active;			/* active output file pointer  */
92int ilevel = 0; 		/* input file stack pointer    */
93int oindex = 0; 		/* diversion index..	       */
94char null[] = "";		/* as it says.. just a null..  */
95const char *m4wraps = "";       /* m4wrap string default..     */
96char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
97char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
98char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
99char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
100int synccpp;			/* Line synchronisation for C preprocessor */
101
102static const struct keyblk keywrds[] = { /* m4 keywords to be installed */
103	{ "include",      INCLTYPE },
104	{ "sinclude",     SINCTYPE },
105	{ "define",       DEFITYPE },
106	{ "defn",         DEFNTYPE },
107	{ "divert",       DIVRTYPE | NOARGS },
108	{ "expr",         EXPRTYPE },
109	{ "eval",         EXPRTYPE },
110	{ "substr",       SUBSTYPE },
111	{ "ifelse",       IFELTYPE },
112	{ "ifdef",        IFDFTYPE },
113	{ "len",          LENGTYPE },
114	{ "incr",         INCRTYPE },
115	{ "decr",         DECRTYPE },
116	{ "dnl",          DNLNTYPE | NOARGS },
117	{ "changequote",  CHNQTYPE | NOARGS },
118	{ "changecom",    CHNCTYPE | NOARGS },
119	{ "index",        INDXTYPE },
120#ifdef EXTENDED
121	{ "paste",        PASTTYPE },
122	{ "spaste",       SPASTYPE },
123    	/* Newer extensions, needed to handle gnu-m4 scripts */
124	{ "indir",        INDIRTYPE},
125	{ "builtin",      BUILTINTYPE},
126	{ "patsubst",	  PATSTYPE},
127	{ "regexp",	  REGEXPTYPE},
128	{ "esyscmd",	  ESYSCMDTYPE},
129	{ "__file__",	  FILENAMETYPE | NOARGS},
130	{ "__line__",	  LINETYPE | NOARGS},
131#endif
132	{ "popdef",       POPDTYPE },
133	{ "pushdef",      PUSDTYPE },
134	{ "dumpdef",      DUMPTYPE | NOARGS },
135	{ "shift",        SHIFTYPE | NOARGS },
136	{ "translit",     TRNLTYPE },
137	{ "undefine",     UNDFTYPE },
138	{ "undivert",     UNDVTYPE | NOARGS },
139	{ "divnum",       DIVNTYPE | NOARGS },
140	{ "maketemp",     MKTMTYPE },
141	{ "errprint",     ERRPTYPE | NOARGS },
142	{ "m4wrap",       M4WRTYPE | NOARGS },
143	{ "m4exit",       EXITTYPE | NOARGS },
144	{ "syscmd",       SYSCTYPE },
145	{ "sysval",       SYSVTYPE | NOARGS },
146	{ "traceon",	  TRACEONTYPE | NOARGS },
147	{ "traceoff",	  TRACEOFFTYPE | NOARGS },
148
149#if defined(unix) || defined(__unix__)
150	{ "unix",         SELFTYPE | NOARGS },
151#else
152#ifdef vms
153	{ "vms",          SELFTYPE | NOARGS },
154#endif
155#endif
156};
157
158#define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
159
160#define MAXRECORD 50
161static struct position {
162	char *name;
163	unsigned long line;
164} quotes[MAXRECORD], paren[MAXRECORD];
165
166static void record(struct position *, int);
167static void dump_stack(struct position *, int);
168
169static void macro(void);
170static void initkwds(void);
171static ndptr inspect(int, char *);
172static int do_look_ahead(int, const char *);
173
174static void enlarge_stack(void);
175
176int
177main(int argc, char *argv[])
178{
179	int c;
180	int n;
181	int rval;
182	char *p;
183
184	setlocale(LC_ALL, "");
185
186	traceout = stderr;
187
188	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
189		signal(SIGINT, onintr);
190
191	initkwds();
192	initspaces();
193	STACKMAX = INITSTACKMAX;
194
195	mstack = (stae *)xalloc(sizeof(stae) * STACKMAX);
196	sstack = (char *)xalloc(STACKMAX);
197
198	maxout = 0;
199	outfile = NULL;
200	resizedivs(MAXOUT);
201
202	while ((c = getopt(argc, argv, "gst:d:D:U:o:I:")) != -1)
203		switch(c) {
204		case 'D':               /* define something..*/
205			for (p = optarg; *p; p++)
206				if (*p == '=')
207					break;
208			if (p == optarg)
209				errx(1, "null variable cannot be defined");
210			if (*p)
211				*p++ = EOS;
212			dodefine(optarg, p);
213			break;
214		case 'I':
215			addtoincludepath(optarg);
216			break;
217		case 'U':               /* undefine...       */
218			remhash(optarg, TOP);
219			break;
220		case 'g':
221			mimic_gnu = 1;
222			break;
223		case 'd':
224			set_trace_flags(optarg);
225			break;
226		case 's':
227			synccpp = 1;
228			break;
229		case 't':
230			mark_traced(optarg, 1);
231			break;
232		case 'o':
233			trace_file(optarg);
234                        break;
235		case '?':
236		default:
237			usage();
238		}
239
240        argc -= optind;
241        argv += optind;
242
243	rval = 0;
244	active = stdout;		/* default active output     */
245	bbase[0] = bufbase;
246        if (!argc) {
247 		sp = -1;		/* stack pointer initialized */
248		fp = 0; 		/* frame pointer initialized */
249		set_input(infile+0, stdin, "stdin");
250					/* default input (naturally) */
251		if ((inname[0] = strdup("-")) == NULL)
252			err(1, NULL);
253		inlineno[0] = 1;
254		emitline();
255		macro();
256	} else
257		for (; argc--; ++argv) {
258			p = *argv;
259			if (p[0] == '-' && p[1] == EOS)
260				set_input(infile, stdin, "stdin");
261			else if (fopen_trypath(infile, p) == NULL) {
262				warn("%s", p);
263				rval = 1;
264				continue;
265			}
266			sp = -1;
267			fp = 0;
268			if ((inname[0] = strdup(p)) == NULL)
269				err(1, NULL);
270			inlineno[0] = 1;
271			emitline();
272			macro();
273		    	release_input(infile);
274		}
275
276	if (*m4wraps) { 		/* anything for rundown ??   */
277		ilevel = 0;		/* in case m4wrap includes.. */
278		bufbase = bp = buf;	/* use the entire buffer   */
279		pbstr(m4wraps); 	/* user-defined wrapup act   */
280		macro();		/* last will and testament   */
281	}
282
283	if (active != stdout)
284		active = stdout;	/* reset output just in case */
285	for (n = 1; n < maxout; n++)	/* default wrap-up: undivert */
286		if (outfile[n] != NULL)
287			getdiv(n);
288					/* remove bitbucket if used  */
289	if (outfile[0] != NULL) {
290		(void) fclose(outfile[0]);
291	}
292
293	exit(rval);
294}
295
296/*
297 * Look ahead for `token'.
298 * (on input `t == token[0]')
299 * Used for comment and quoting delimiters.
300 * Returns 1 if `token' present; copied to output.
301 *         0 if `token' not found; all characters pushed back
302 */
303static int
304do_look_ahead(int t, const char *token)
305{
306	int i;
307
308	assert((unsigned char)t == (unsigned char)token[0]);
309
310	for (i = 1; *++token; i++) {
311		t = gpbc();
312		if (t == EOF || (unsigned char)t != (unsigned char)*token) {
313			putback(t);
314			while (--i)
315				putback(*--token);
316			return 0;
317		}
318	}
319	return 1;
320}
321
322#define LOOK_AHEAD(t, token) (t != EOF && 		\
323    (unsigned char)(t)==(unsigned char)(token)[0] && 	\
324    do_look_ahead(t,token))
325
326/*
327 * macro - the work horse..
328 */
329static void
330macro(void)
331{
332	char token[MAXTOK+1];
333	int t, l;
334	ndptr p;
335	int  nlpar;
336
337	cycle {
338		t = gpbc();
339		if (t == '_' || isalpha(t)) {
340			p = inspect(t, token);
341			if (p != nil)
342				putback(l = gpbc());
343			if (p == nil || (l != LPAREN &&
344			    (p->type & NEEDARGS) != 0))
345				outputstr(token);
346			else {
347		/*
348		 * real thing.. First build a call frame:
349		 */
350				pushf(fp);	/* previous call frm */
351				pushf(p->type); /* type of the call  */
352				pushf(0);	/* parenthesis level */
353				fp = sp;	/* new frame pointer */
354		/*
355		 * now push the string arguments:
356		 */
357				pushs1(p->defn);	/* defn string */
358				pushs1(p->name);	/* macro name  */
359				pushs(ep);	      	/* start next..*/
360
361				if (l != LPAREN && PARLEV == 0)  {
362				    /* no bracks  */
363					chrsave(EOS);
364
365					if ((uintptr_t)sp == STACKMAX)
366						errx(1, "internal stack overflow");
367					eval((const char **) mstack+fp+1, 2,
368					    CALTYP);
369
370					ep = PREVEP;	/* flush strspace */
371					sp = PREVSP;	/* previous sp..  */
372					fp = PREVFP;	/* rewind stack...*/
373				}
374			}
375		} else if (t == EOF) {
376			if (sp > -1) {
377				warnx( "unexpected end of input, unclosed parenthesis:");
378				dump_stack(paren, PARLEV);
379				exit(1);
380			}
381			if (ilevel <= 0)
382				break;			/* all done thanks.. */
383			release_input(infile+ilevel--);
384			free(inname[ilevel+1]);
385			bufbase = bbase[ilevel];
386			emitline();
387			continue;
388		}
389	/*
390	 * non-alpha token possibly seen..
391	 * [the order of else if .. stmts is important.]
392	 */
393		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
394			nlpar = 0;
395			record(quotes, nlpar++);
396			/*
397			 * Opening quote: scan forward until matching
398			 * closing quote has been found.
399			 */
400			do {
401
402				l = gpbc();
403				if (LOOK_AHEAD(l,rquote)) {
404					if (--nlpar > 0)
405						outputstr(rquote);
406				} else if (LOOK_AHEAD(l,lquote)) {
407					record(quotes, nlpar++);
408					outputstr(lquote);
409				} else if (l == EOF) {
410					if (nlpar == 1)
411						warnx("unclosed quote:");
412					else
413						warnx("%d unclosed quotes:", nlpar);
414					dump_stack(quotes, nlpar);
415					exit(1);
416				} else {
417					if (nlpar > 0) {
418						if (sp < 0)
419							putc(l, active);
420						else
421							CHRSAVE(l);
422					}
423				}
424			}
425			while (nlpar != 0);
426		}
427
428		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
429			fputs(scommt, active);
430
431			for(;;) {
432				t = gpbc();
433				if (LOOK_AHEAD(t, ecommt)) {
434					fputs(ecommt, active);
435					break;
436				}
437				if (t == EOF)
438					break;
439				putc(t, active);
440			}
441		}
442
443		else if (sp < 0) {		/* not in a macro at all */
444			putc(t, active);	/* output directly..	 */
445		}
446
447		else switch(t) {
448
449		case LPAREN:
450			if (PARLEV > 0)
451				chrsave(t);
452			while (isspace(l = gpbc()))
453				;		/* skip blank, tab, nl.. */
454			putback(l);
455			record(paren, PARLEV++);
456			break;
457
458		case RPAREN:
459			if (--PARLEV > 0)
460				chrsave(t);
461			else {			/* end of argument list */
462				chrsave(EOS);
463
464				if ((uintptr_t)sp == STACKMAX)
465					errx(1, "internal stack overflow");
466
467				eval((const char **) mstack+fp+1, sp-fp,
468				    CALTYP);
469
470				ep = PREVEP;	/* flush strspace */
471				sp = PREVSP;	/* previous sp..  */
472				fp = PREVFP;	/* rewind stack...*/
473			}
474			break;
475
476		case COMMA:
477			if (PARLEV == 1) {
478				chrsave(EOS);		/* new argument   */
479				while (isspace(l = gpbc()))
480					;
481				putback(l);
482				pushs(ep);
483			} else
484				chrsave(t);
485			break;
486
487		default:
488			if (LOOK_AHEAD(t, scommt)) {
489				char *pc;
490				for (pc = scommt; *pc; pc++)
491					chrsave(*pc);
492				for(;;) {
493					t = gpbc();
494					if (LOOK_AHEAD(t, ecommt)) {
495						for (pc = ecommt; *pc; pc++)
496							chrsave(*pc);
497						break;
498					}
499					if (t == EOF)
500					    break;
501					CHRSAVE(t);
502				}
503			} else
504				CHRSAVE(t);		/* stack the char */
505			break;
506		}
507	}
508}
509
510/*
511 * output string directly, without pushing it for reparses.
512 */
513void
514outputstr(const char *s)
515{
516	if (sp < 0)
517		while (*s)
518			putc(*s++, active);
519	else
520		while (*s)
521			CHRSAVE(*s++);
522}
523
524/*
525 * build an input token..
526 * consider only those starting with _ or A-Za-z. This is a
527 * combo with lookup to speed things up.
528 */
529static ndptr
530inspect(int c, char *tp)
531{
532	char *name = tp;
533	char *etp = tp+MAXTOK;
534	ndptr p;
535	unsigned int h;
536
537	h = *tp++ = c;
538
539	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
540		h = (h << 5) + h + (*tp++ = c);
541	if (c != EOF)
542		PUTBACK(c);
543	*tp = EOS;
544	/* token is too long, it won't match anything, but it can still
545	 * be output. */
546	if (tp == ep) {
547		outputstr(name);
548		while (isalnum(c = gpbc()) || c == '_') {
549			if (sp < 0)
550				putc(c, active);
551			else
552				CHRSAVE(c);
553		}
554		*name = EOS;
555		return nil;
556	}
557
558	for (p = hashtab[h % HASHSIZE]; p != nil; p = p->nxtptr)
559		if (h == p->hv && STREQ(name, p->name))
560			break;
561	return p;
562}
563
564/*
565 * initkwds - initialise m4 keywords as fast as possible.
566 * This very similar to install, but without certain overheads,
567 * such as calling lookup. Malloc is not used for storing the
568 * keyword strings, since we simply use the static pointers
569 * within keywrds block.
570 */
571static void
572initkwds(void)
573{
574	size_t i;
575	unsigned int h;
576	ndptr p;
577
578	for (i = 0; i < MAXKEYS; i++) {
579		h = hash(keywrds[i].knam);
580		p = (ndptr) xalloc(sizeof(struct ndblock));
581		p->nxtptr = hashtab[h % HASHSIZE];
582		hashtab[h % HASHSIZE] = p;
583		p->name = xstrdup(keywrds[i].knam);
584		p->defn = null;
585		p->hv = h;
586		p->type = keywrds[i].ktyp & TYPEMASK;
587		if ((keywrds[i].ktyp & NOARGS) == 0)
588			p->type |= NEEDARGS;
589	}
590}
591
592/* Look up a builtin type, even if overridden by the user */
593int
594builtin_type(const char *key)
595{
596	int i;
597
598	for (i = 0; i != MAXKEYS; i++)
599		if (STREQ(keywrds[i].knam, key))
600			return keywrds[i].ktyp;
601	return -1;
602}
603
604const char *
605builtin_realname(int n)
606{
607	int i;
608
609	for (i = 0; i != MAXKEYS; i++)
610		if (((keywrds[i].ktyp ^ n) & TYPEMASK) == 0)
611			return keywrds[i].knam;
612	return NULL;
613}
614
615static void
616record(struct position *t, int lev)
617{
618	if (lev < MAXRECORD) {
619		t[lev].name = CURRENT_NAME;
620		t[lev].line = CURRENT_LINE;
621	}
622}
623
624static void
625dump_stack(struct position *t, int lev)
626{
627	int i;
628
629	for (i = 0; i < lev; i++) {
630		if (i == MAXRECORD) {
631			fprintf(stderr, "   ...\n");
632			break;
633		}
634		fprintf(stderr, "   %s at line %lu\n",
635			t[i].name, t[i].line);
636	}
637}
638
639
640static void
641enlarge_stack(void)
642{
643	STACKMAX *= 2;
644	mstack = realloc(mstack, sizeof(stae) * STACKMAX);
645	sstack = realloc(sstack, STACKMAX);
646	if (mstack == NULL || sstack == NULL)
647		errx(1, "Evaluation stack overflow (%lu)",
648		    (unsigned long)STACKMAX);
649}
650
651/* Emit preprocessor #line directive if -s option used. */
652void
653emitline(void)
654{
655
656	if (synccpp)
657		fprintf(active, "#line %d \"%s\"\n", inlineno[ilevel],
658			inname[ilevel]);
659}
660