main.c revision 27625
11590Srgrimes/*-
21590Srgrimes * Copyright (c) 1989, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * This code is derived from software contributed to Berkeley by
61590Srgrimes * Ozan Yigit at York University.
71590Srgrimes *
81590Srgrimes * Redistribution and use in source and binary forms, with or without
91590Srgrimes * modification, are permitted provided that the following conditions
101590Srgrimes * are met:
111590Srgrimes * 1. Redistributions of source code must retain the above copyright
121590Srgrimes *    notice, this list of conditions and the following disclaimer.
131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer in the
151590Srgrimes *    documentation and/or other materials provided with the distribution.
161590Srgrimes * 3. All advertising materials mentioning features or use of this software
171590Srgrimes *    must display the following acknowledgement:
181590Srgrimes *	This product includes software developed by the University of
191590Srgrimes *	California, Berkeley and its contributors.
201590Srgrimes * 4. Neither the name of the University nor the names of its contributors
211590Srgrimes *    may be used to endorse or promote products derived from this software
221590Srgrimes *    without specific prior written permission.
231590Srgrimes *
241590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341590Srgrimes * SUCH DAMAGE.
351590Srgrimes */
361590Srgrimes
371590Srgrimes#ifndef lint
3827625Scharnierstatic const char copyright[] =
391590Srgrimes"@(#) Copyright (c) 1989, 1993\n\
401590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
411590Srgrimes#endif /* not lint */
421590Srgrimes
431590Srgrimes#ifndef lint
4427625Scharnier#if 0
451590Srgrimesstatic char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
4627625Scharnier#endif
4727625Scharnierstatic const char rcsid[] =
4827625Scharnier	"$Id$";
491590Srgrimes#endif /* not lint */
501590Srgrimes
511590Srgrimes/*
521590Srgrimes * main.c
531590Srgrimes * Facility: m4 macro processor
541590Srgrimes * by: oz
551590Srgrimes */
561590Srgrimes
571590Srgrimes#include <sys/types.h>
5827625Scharnier#include <ctype.h>
5927625Scharnier#include <err.h>
601590Srgrimes#include <signal.h>
611590Srgrimes#include <stdio.h>
621590Srgrimes#include <string.h>
6327625Scharnier#include <unistd.h>
641590Srgrimes#include "mdef.h"
651590Srgrimes#include "stdd.h"
661590Srgrimes#include "extern.h"
671590Srgrimes#include "pathnames.h"
681590Srgrimes
691590Srgrimesndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
705165Sacheunsigned char buf[BUFSIZE];              /* push-back buffer            */
715165Sacheunsigned char *bufbase = buf;            /* the base for current ilevel */
725165Sacheunsigned char *bbase[MAXINP];            /* the base for each ilevel    */
735165Sacheunsigned char *bp = buf;                 /* first available character   */
745165Sacheunsigned char *endpbb = buf+BUFSIZE;     /* end of push-back buffer     */
751590Srgrimesstae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
761590Srgrimeschar strspace[STRSPMAX+1];	/* string space for evaluation */
771590Srgrimeschar *ep = strspace;		/* first free char in strspace */
781590Srgrimeschar *endest= strspace+STRSPMAX;/* end of string space	       */
791590Srgrimesint sp; 			/* current m4  stack pointer   */
801590Srgrimesint fp; 			/* m4 call frame pointer       */
811590SrgrimesFILE *infile[MAXINP];		/* input file stack (0=stdin)  */
821590SrgrimesFILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
831590SrgrimesFILE *active;			/* active output file pointer  */
841590Srgrimeschar *m4temp;			/* filename for diversions     */
851590Srgrimesint ilevel = 0; 		/* input file stack pointer    */
861590Srgrimesint oindex = 0; 		/* diversion index..	       */
871590Srgrimeschar *null = "";                /* as it says.. just a null..  */
881590Srgrimeschar *m4wraps = "";             /* m4wrap string default..     */
891590Srgrimeschar lquote = LQUOTE;		/* left quote character  (`)   */
901590Srgrimeschar rquote = RQUOTE;		/* right quote character (')   */
911590Srgrimeschar scommt = SCOMMT;		/* start character for comment */
921590Srgrimeschar ecommt = ECOMMT;		/* end character for comment   */
931590Srgrimes
941590Srgrimesstruct keyblk keywrds[] = {	/* m4 keywords to be installed */
951590Srgrimes	"include",      INCLTYPE,
961590Srgrimes	"sinclude",     SINCTYPE,
971590Srgrimes	"define",       DEFITYPE,
981590Srgrimes	"defn",         DEFNTYPE,
991590Srgrimes	"divert",       DIVRTYPE,
1001590Srgrimes	"expr",         EXPRTYPE,
1011590Srgrimes	"eval",         EXPRTYPE,
1021590Srgrimes	"substr",       SUBSTYPE,
1031590Srgrimes	"ifelse",       IFELTYPE,
1041590Srgrimes	"ifdef",        IFDFTYPE,
1051590Srgrimes	"len",          LENGTYPE,
1061590Srgrimes	"incr",         INCRTYPE,
1071590Srgrimes	"decr",         DECRTYPE,
1081590Srgrimes	"dnl",          DNLNTYPE,
1091590Srgrimes	"changequote",  CHNQTYPE,
1101590Srgrimes	"changecom",    CHNCTYPE,
1111590Srgrimes	"index",        INDXTYPE,
1121590Srgrimes#ifdef EXTENDED
1131590Srgrimes	"paste",        PASTTYPE,
1141590Srgrimes	"spaste",       SPASTYPE,
1151590Srgrimes#endif
1161590Srgrimes	"popdef",       POPDTYPE,
1171590Srgrimes	"pushdef",      PUSDTYPE,
1181590Srgrimes	"dumpdef",      DUMPTYPE,
1191590Srgrimes	"shift",        SHIFTYPE,
1201590Srgrimes	"translit",     TRNLTYPE,
1211590Srgrimes	"undefine",     UNDFTYPE,
1221590Srgrimes	"undivert",     UNDVTYPE,
1231590Srgrimes	"divnum",       DIVNTYPE,
1241590Srgrimes	"maketemp",     MKTMTYPE,
1251590Srgrimes	"errprint",     ERRPTYPE,
1261590Srgrimes	"m4wrap",       M4WRTYPE,
1271590Srgrimes	"m4exit",       EXITTYPE,
1281590Srgrimes	"syscmd",       SYSCTYPE,
1291590Srgrimes	"sysval",       SYSVTYPE,
1301590Srgrimes
1311590Srgrimes#ifdef unix
1321590Srgrimes	"unix",         MACRTYPE,
1331590Srgrimes#else
1341590Srgrimes#ifdef vms
1351590Srgrimes	"vms",          MACRTYPE,
1361590Srgrimes#endif
1371590Srgrimes#endif
1381590Srgrimes};
1391590Srgrimes
1401590Srgrimes#define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
1411590Srgrimes
1421590Srgrimesextern int optind;
1431590Srgrimesextern char *optarg;
1441590Srgrimes
1451590Srgrimesvoid macro();
1461590Srgrimesvoid initkwds();
1471590Srgrimesextern int getopt();
1481590Srgrimes
1491590Srgrimesint
1501590Srgrimesmain(argc,argv)
1511590Srgrimes	int argc;
1521590Srgrimes	char *argv[];
1531590Srgrimes{
1541590Srgrimes	register int c;
1551590Srgrimes	register int n;
1561590Srgrimes	char *p;
1571590Srgrimes	register FILE *ifp;
1581590Srgrimes
1591590Srgrimes	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
1601590Srgrimes		signal(SIGINT, onintr);
1611590Srgrimes
1621590Srgrimes	initkwds();
1631590Srgrimes
16424360Simp	while ((c = getopt(argc, argv, "tD:U:o:")) != -1)
1651590Srgrimes		switch(c) {
1661590Srgrimes
1671590Srgrimes		case 'D':               /* define something..*/
1681590Srgrimes			for (p = optarg; *p; p++)
1691590Srgrimes				if (*p == '=')
1701590Srgrimes					break;
1711590Srgrimes			if (*p)
1721590Srgrimes				*p++ = EOS;
1731590Srgrimes			dodefine(optarg, p);
1741590Srgrimes			break;
1751590Srgrimes		case 'U':               /* undefine...       */
1761590Srgrimes			remhash(optarg, TOP);
1771590Srgrimes			break;
1781590Srgrimes		case 'o':		/* specific output   */
1791590Srgrimes		case '?':
1801590Srgrimes			usage();
1811590Srgrimes		}
1821590Srgrimes
1831590Srgrimes        argc -= optind;
1841590Srgrimes        argv += optind;
1851590Srgrimes
1861590Srgrimes	active = stdout;		/* default active output     */
1871590Srgrimes					/* filename for diversions   */
1881590Srgrimes	m4temp = mktemp(xstrdup(_PATH_DIVNAME));
1891590Srgrimes
1901590Srgrimes	bbase[0] = bufbase;
1911590Srgrimes        if (!argc) {
1921590Srgrimes 		sp = -1;		/* stack pointer initialized */
1931590Srgrimes		fp = 0; 		/* frame pointer initialized */
1941590Srgrimes		infile[0] = stdin;	/* default input (naturally) */
1951590Srgrimes		macro();
1961590Srgrimes	} else
1971590Srgrimes		for (; argc--; ++argv) {
1981590Srgrimes			p = *argv;
1991590Srgrimes			if (p[0] == '-' && p[1] == '\0')
2001590Srgrimes				ifp = stdin;
2011590Srgrimes			else if ((ifp = fopen(p, "r")) == NULL)
20227625Scharnier				err(1, "%s", p);
2031590Srgrimes			sp = -1;
2048874Srgrimes			fp = 0;
2051590Srgrimes			infile[0] = ifp;
2061590Srgrimes			macro();
2071590Srgrimes			if (ifp != stdin)
2081590Srgrimes				(void)fclose(ifp);
2091590Srgrimes		}
2101590Srgrimes
2111590Srgrimes	if (*m4wraps) { 		/* anything for rundown ??   */
2121590Srgrimes		ilevel = 0;		/* in case m4wrap includes.. */
2131590Srgrimes		bufbase = bp = buf;	/* use the entire buffer   */
2141590Srgrimes		putback(EOF);		/* eof is a must !!	     */
2151590Srgrimes		pbstr(m4wraps); 	/* user-defined wrapup act   */
2161590Srgrimes		macro();		/* last will and testament   */
2171590Srgrimes	}
2181590Srgrimes
2191590Srgrimes	if (active != stdout)
2201590Srgrimes		active = stdout;	/* reset output just in case */
2211590Srgrimes	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
2221590Srgrimes		if (outfile[n] != NULL)
2231590Srgrimes			getdiv(n);
2241590Srgrimes					/* remove bitbucket if used  */
2251590Srgrimes	if (outfile[0] != NULL) {
2261590Srgrimes		(void) fclose(outfile[0]);
2271590Srgrimes		m4temp[UNIQUE] = '0';
2281590Srgrimes#ifdef vms
2291590Srgrimes		(void) remove(m4temp);
2301590Srgrimes#else
2311590Srgrimes		(void) unlink(m4temp);
2321590Srgrimes#endif
2331590Srgrimes	}
2341590Srgrimes
2351590Srgrimes	return 0;
2361590Srgrimes}
2371590Srgrimes
2381590Srgrimesndptr inspect();
2391590Srgrimes
2401590Srgrimes/*
2411590Srgrimes * macro - the work horse..
2421590Srgrimes */
2431590Srgrimesvoid
2441590Srgrimesmacro() {
2451590Srgrimes	char token[MAXTOK];
2461590Srgrimes	register char *s;
2471590Srgrimes	register int t, l;
2481590Srgrimes	register ndptr p;
2491590Srgrimes	register int  nlpar;
2501590Srgrimes
2511590Srgrimes	cycle {
2525165Sache		if ((t = gpbc()) == '_' || (t != EOF && isalpha(t))) {
2531590Srgrimes			putback(t);
2541590Srgrimes			if ((p = inspect(s = token)) == nil) {
2551590Srgrimes				if (sp < 0)
2561590Srgrimes					while (*s)
2571590Srgrimes						putc(*s++, active);
2581590Srgrimes				else
2591590Srgrimes					while (*s)
2601590Srgrimes						chrsave(*s++);
2611590Srgrimes			}
2621590Srgrimes			else {
2631590Srgrimes		/*
2641590Srgrimes		 * real thing.. First build a call frame:
2651590Srgrimes		 */
2661590Srgrimes				pushf(fp);	/* previous call frm */
2671590Srgrimes				pushf(p->type); /* type of the call  */
2681590Srgrimes				pushf(0);	/* parenthesis level */
2691590Srgrimes				fp = sp;	/* new frame pointer */
2701590Srgrimes		/*
2711590Srgrimes		 * now push the string arguments:
2721590Srgrimes		 */
2731590Srgrimes				pushs(p->defn);	      /* defn string */
2741590Srgrimes				pushs(p->name);	      /* macro name  */
2751590Srgrimes				pushs(ep);	      /* start next..*/
2761590Srgrimes
2771590Srgrimes				putback(l = gpbc());
2781590Srgrimes				if (l != LPAREN)  {   /* add bracks  */
2791590Srgrimes					putback(RPAREN);
2801590Srgrimes					putback(LPAREN);
2811590Srgrimes				}
2821590Srgrimes			}
2831590Srgrimes		}
2841590Srgrimes		else if (t == EOF) {
2851590Srgrimes			if (sp > -1)
28627625Scharnier				errx(1, "unexpected end of input");
2871590Srgrimes			if (ilevel <= 0)
2881590Srgrimes				break;			/* all done thanks.. */
2891590Srgrimes			--ilevel;
2901590Srgrimes			(void) fclose(infile[ilevel+1]);
2911590Srgrimes			bufbase = bbase[ilevel];
2921590Srgrimes			continue;
2931590Srgrimes		}
2941590Srgrimes	/*
2951590Srgrimes	 * non-alpha single-char token seen..
2961590Srgrimes	 * [the order of else if .. stmts is important.]
2971590Srgrimes	 */
2981590Srgrimes		else if (t == lquote) { 		/* strip quotes */
2991590Srgrimes			nlpar = 1;
3001590Srgrimes			do {
3011590Srgrimes				if ((l = gpbc()) == rquote)
3021590Srgrimes					nlpar--;
3031590Srgrimes				else if (l == lquote)
3041590Srgrimes					nlpar++;
3051590Srgrimes				else if (l == EOF)
30627625Scharnier					errx(1, "missing right quote");
3071590Srgrimes				if (nlpar > 0) {
3081590Srgrimes					if (sp < 0)
3091590Srgrimes						putc(l, active);
3101590Srgrimes					else
3111590Srgrimes						chrsave(l);
3121590Srgrimes				}
3131590Srgrimes			}
3141590Srgrimes			while (nlpar != 0);
3151590Srgrimes		}
3161590Srgrimes
3171590Srgrimes		else if (sp < 0) {		/* not in a macro at all */
3181590Srgrimes			if (t == scommt) {	/* comment handling here */
3191590Srgrimes				putc(t, active);
3201590Srgrimes				while ((t = gpbc()) != ecommt)
3211590Srgrimes					putc(t, active);
3221590Srgrimes			}
3231590Srgrimes			putc(t, active);	/* output directly..	 */
3241590Srgrimes		}
3251590Srgrimes
3261590Srgrimes		else switch(t) {
3271590Srgrimes
3281590Srgrimes		case LPAREN:
3291590Srgrimes			if (PARLEV > 0)
3301590Srgrimes				chrsave(t);
3315165Sache			while ((l = gpbc()) != EOF && isspace(l))
3321590Srgrimes				;		/* skip blank, tab, nl.. */
3331590Srgrimes			putback(l);
3341590Srgrimes			PARLEV++;
3351590Srgrimes			break;
3361590Srgrimes
3371590Srgrimes		case RPAREN:
3381590Srgrimes			if (--PARLEV > 0)
3391590Srgrimes				chrsave(t);
3401590Srgrimes			else {			/* end of argument list */
3411590Srgrimes				chrsave(EOS);
3421590Srgrimes
3431590Srgrimes				if (sp == STACKMAX)
34427625Scharnier					errx(1, "internal stack overflow");
3451590Srgrimes
3461590Srgrimes				if (CALTYP == MACRTYPE)
3471590Srgrimes					expand((char **) mstack+fp+1, sp-fp);
3481590Srgrimes				else
3491590Srgrimes					eval((char **) mstack+fp+1, sp-fp, CALTYP);
3501590Srgrimes
3511590Srgrimes				ep = PREVEP;	/* flush strspace */
3521590Srgrimes				sp = PREVSP;	/* previous sp..  */
3531590Srgrimes				fp = PREVFP;	/* rewind stack...*/
3541590Srgrimes			}
3551590Srgrimes			break;
3561590Srgrimes
3571590Srgrimes		case COMMA:
3581590Srgrimes			if (PARLEV == 1) {
3591590Srgrimes				chrsave(EOS);		/* new argument   */
3605165Sache				while ((l = gpbc()) != EOF && isspace(l))
3611590Srgrimes					;
3621590Srgrimes				putback(l);
3631590Srgrimes				pushs(ep);
3641590Srgrimes			} else
3651590Srgrimes				chrsave(t);
3661590Srgrimes			break;
3671590Srgrimes
3681590Srgrimes		default:
3691590Srgrimes			chrsave(t);			/* stack the char */
3701590Srgrimes			break;
3711590Srgrimes		}
3721590Srgrimes	}
3731590Srgrimes}
3741590Srgrimes
3751590Srgrimes/*
3761590Srgrimes * build an input token..
3771590Srgrimes * consider only those starting with _ or A-Za-z. This is a
3781590Srgrimes * combo with lookup to speed things up.
3791590Srgrimes */
3801590Srgrimesndptr
3818874Srgrimesinspect(tp)
3821590Srgrimesregister char *tp;
3831590Srgrimes{
3845165Sache	register int c;
3851590Srgrimes	register char *name = tp;
3861590Srgrimes	register char *etp = tp+MAXTOK;
3871590Srgrimes	register ndptr p;
3881590Srgrimes	register unsigned long h = 0;
3891590Srgrimes
3905165Sache	while ((c = gpbc()) != EOF && (isalnum(c) || c == '_') && tp < etp)
3911590Srgrimes		h = (h << 5) + h + (*tp++ = c);
3921590Srgrimes	putback(c);
3931590Srgrimes	if (tp == etp)
39427625Scharnier		errx(1, "token too long");
3951590Srgrimes
3961590Srgrimes	*tp = EOS;
3971590Srgrimes
3981590Srgrimes	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
3991590Srgrimes		if (STREQ(name, p->name))
4001590Srgrimes			break;
4011590Srgrimes	return p;
4021590Srgrimes}
4031590Srgrimes
4041590Srgrimes/*
4058874Srgrimes * initkwds - initialise m4 keywords as fast as possible.
4061590Srgrimes * This very similar to install, but without certain overheads,
4078874Srgrimes * such as calling lookup. Malloc is not used for storing the
4081590Srgrimes * keyword strings, since we simply use the static  pointers
4091590Srgrimes * within keywrds block.
4101590Srgrimes */
4111590Srgrimesvoid
4121590Srgrimesinitkwds() {
4131590Srgrimes	register int i;
4141590Srgrimes	register int h;
4151590Srgrimes	register ndptr p;
4161590Srgrimes
4171590Srgrimes	for (i = 0; i < MAXKEYS; i++) {
4181590Srgrimes		h = hash(keywrds[i].knam);
4191590Srgrimes		p = (ndptr) xalloc(sizeof(struct ndblock));
4201590Srgrimes		p->nxtptr = hashtab[h];
4211590Srgrimes		hashtab[h] = p;
4221590Srgrimes		p->name = keywrds[i].knam;
4231590Srgrimes		p->defn = null;
4241590Srgrimes		p->type = keywrds[i].ktyp | STATIC;
4251590Srgrimes	}
4261590Srgrimes}
427