tran.c revision 90902
185587Sobrien/****************************************************************
285587SobrienCopyright (C) Lucent Technologies 1997
385587SobrienAll Rights Reserved
485587Sobrien
585587SobrienPermission to use, copy, modify, and distribute this software and
685587Sobrienits documentation for any purpose and without fee is hereby
785587Sobriengranted, provided that the above copyright notice appear in all
885587Sobriencopies and that both that the copyright notice and this
985587Sobrienpermission notice and warranty disclaimer appear in supporting
1085587Sobriendocumentation, and that the name Lucent Technologies or any of
1185587Sobrienits entities not be used in advertising or publicity pertaining
1285587Sobriento distribution of the software without specific, written prior
1385587Sobrienpermission.
1485587Sobrien
1585587SobrienLUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1685587SobrienINCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1785587SobrienIN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
1885587SobrienSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1985587SobrienWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2085587SobrienIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2185587SobrienARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2285587SobrienTHIS SOFTWARE.
2385587Sobrien****************************************************************/
2485587Sobrien
2585587Sobrien#define	DEBUG
2685587Sobrien#include <stdio.h>
2785587Sobrien#include <math.h>
2885587Sobrien#include <ctype.h>
2985587Sobrien#include <string.h>
3085587Sobrien#include <stdlib.h>
3185587Sobrien#include "awk.h"
3285587Sobrien#include "ytab.h"
3385587Sobrien
3485587Sobrien#define	FULLTAB	2	/* rehash when table gets this x full */
3585587Sobrien#define	GROWTAB 4	/* grow table by this factor */
3685587Sobrien
3785587SobrienArray	*symtab;	/* main symbol table */
3885587Sobrien
3985587Sobrienchar	**FS;		/* initial field sep */
4085587Sobrienchar	**RS;		/* initial record sep */
4185587Sobrienchar	**OFS;		/* output field sep */
4285587Sobrienchar	**ORS;		/* output record sep */
4385587Sobrienchar	**OFMT;		/* output format for numbers */
4485587Sobrienchar	**CONVFMT;	/* format for conversions in getsval */
4585587SobrienAwkfloat *NF;		/* number of fields in current record */
4685587SobrienAwkfloat *NR;		/* number of current record */
4785587SobrienAwkfloat *FNR;		/* number of current record in current file */
4885587Sobrienchar	**FILENAME;	/* current filename argument */
4985587SobrienAwkfloat *ARGC;		/* number of arguments from command line */
5085587Sobrienchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
5185587SobrienAwkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
5285587SobrienAwkfloat *RLENGTH;	/* length of same */
5385587Sobrien
5485587SobrienCell	*nrloc;		/* NR */
5585587SobrienCell	*nfloc;		/* NF */
5685587SobrienCell	*fnrloc;	/* FNR */
5785587SobrienArray	*ARGVtab;	/* symbol table containing ARGV[...] */
5885587SobrienArray	*ENVtab;	/* symbol table containing ENVIRON[...] */
5985587SobrienCell	*rstartloc;	/* RSTART */
6085587SobrienCell	*rlengthloc;	/* RLENGTH */
6185587SobrienCell	*symtabloc;	/* SYMTAB */
6285587Sobrien
6385587SobrienCell	*nullloc;	/* a guaranteed empty cell */
6485587SobrienNode	*nullnode;	/* zero&null, converted into a node for comparisons */
6585587SobrienCell	*literal0;
6685587Sobrien
6785587Sobrienextern Cell **fldtab;
6885587Sobrien
6985587Sobrienvoid syminit(void)	/* initialize symbol table with builtin vars */
7085587Sobrien{
7185587Sobrien	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
7285587Sobrien	/* this is used for if(x)... tests: */
7385587Sobrien	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
7485587Sobrien	nullnode = celltonode(nullloc, CCON);
7585587Sobrien
7685587Sobrien	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
7785587Sobrien	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
7885587Sobrien	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
7985587Sobrien	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
8085587Sobrien	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
8185587Sobrien	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
8285587Sobrien	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
8385587Sobrien	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
8485587Sobrien	NF = &nfloc->fval;
8585587Sobrien	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
8685587Sobrien	NR = &nrloc->fval;
8785587Sobrien	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
8885587Sobrien	FNR = &fnrloc->fval;
8985587Sobrien	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
9085587Sobrien	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
9185587Sobrien	RSTART = &rstartloc->fval;
9285587Sobrien	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
9385587Sobrien	RLENGTH = &rlengthloc->fval;
9485587Sobrien	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
9585587Sobrien	symtabloc->sval = (char *) symtab;
9685587Sobrien}
9785587Sobrien
9885587Sobrienvoid arginit(int ac, char **av)	/* set up ARGV and ARGC */
9985587Sobrien{
10085587Sobrien	Cell *cp;
10185587Sobrien	int i;
10285587Sobrien	char temp[50];
10385587Sobrien
10485587Sobrien	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
10585587Sobrien	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
10685587Sobrien	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
10785587Sobrien	cp->sval = (char *) ARGVtab;
10885587Sobrien	for (i = 0; i < ac; i++) {
10985587Sobrien		sprintf(temp, "%d", i);
11085587Sobrien		if (is_number(*av))
11185587Sobrien			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
11285587Sobrien		else
11385587Sobrien			setsymtab(temp, *av, 0.0, STR, ARGVtab);
11485587Sobrien		av++;
11585587Sobrien	}
11685587Sobrien}
11785587Sobrien
11885587Sobrienvoid envinit(char **envp)	/* set up ENVIRON variable */
11985587Sobrien{
12085587Sobrien	Cell *cp;
12185587Sobrien	char *p;
12285587Sobrien
12385587Sobrien	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
12485587Sobrien	ENVtab = makesymtab(NSYMTAB);
12585587Sobrien	cp->sval = (char *) ENVtab;
12685587Sobrien	for ( ; *envp; envp++) {
12785587Sobrien		if ((p = strchr(*envp, '=')) == NULL)
12885587Sobrien			continue;
12985587Sobrien		if( p == *envp ) /* no left hand side name in env string */
13085587Sobrien			continue;
13185587Sobrien		*p++ = 0;	/* split into two strings at = */
13285587Sobrien		if (is_number(p))
13385587Sobrien			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
13485587Sobrien		else
13585587Sobrien			setsymtab(*envp, p, 0.0, STR, ENVtab);
13685587Sobrien		p[-1] = '=';	/* restore in case env is passed down to a shell */
13785587Sobrien	}
13885587Sobrien}
13985587Sobrien
14085587SobrienArray *makesymtab(int n)	/* make a new symbol table */
14185587Sobrien{
14285587Sobrien	Array *ap;
14385587Sobrien	Cell **tp;
14485587Sobrien
14585587Sobrien	ap = (Array *) malloc(sizeof(Array));
14685587Sobrien	tp = (Cell **) calloc(n, sizeof(Cell *));
14785587Sobrien	if (ap == NULL || tp == NULL)
14885587Sobrien		FATAL("out of space in makesymtab");
14985587Sobrien	ap->nelem = 0;
15085587Sobrien	ap->size = n;
15185587Sobrien	ap->tab = tp;
15285587Sobrien	return(ap);
15385587Sobrien}
15485587Sobrien
15585587Sobrienvoid freesymtab(Cell *ap)	/* free a symbol table */
15685587Sobrien{
15785587Sobrien	Cell *cp, *temp;
15885587Sobrien	Array *tp;
15985587Sobrien	int i;
16085587Sobrien
16185587Sobrien	if (!isarr(ap))
16285587Sobrien		return;
16385587Sobrien	tp = (Array *) ap->sval;
16485587Sobrien	if (tp == NULL)
16585587Sobrien		return;
16685587Sobrien	for (i = 0; i < tp->size; i++) {
16785587Sobrien		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
16885587Sobrien			xfree(cp->nval);
16985587Sobrien			if (freeable(cp))
17085587Sobrien				xfree(cp->sval);
17185587Sobrien			temp = cp->cnext;	/* avoids freeing then using */
17285587Sobrien			free(cp);
17390902Sdes			tp->nelem--;
17485587Sobrien		}
17585587Sobrien		tp->tab[i] = 0;
17685587Sobrien	}
17790902Sdes	if (tp->nelem != 0)
17890902Sdes		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
17985587Sobrien	free(tp->tab);
18085587Sobrien	free(tp);
18185587Sobrien}
18285587Sobrien
18385587Sobrienvoid freeelem(Cell *ap, char *s)	/* free elem s from ap (i.e., ap["s"] */
18485587Sobrien{
18585587Sobrien	Array *tp;
18685587Sobrien	Cell *p, *prev = NULL;
18785587Sobrien	int h;
18885587Sobrien
18985587Sobrien	tp = (Array *) ap->sval;
19085587Sobrien	h = hash(s, tp->size);
19185587Sobrien	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
19285587Sobrien		if (strcmp(s, p->nval) == 0) {
19385587Sobrien			if (prev == NULL)	/* 1st one */
19485587Sobrien				tp->tab[h] = p->cnext;
19585587Sobrien			else			/* middle somewhere */
19685587Sobrien				prev->cnext = p->cnext;
19785587Sobrien			if (freeable(p))
19885587Sobrien				xfree(p->sval);
19985587Sobrien			free(p->nval);
20085587Sobrien			free(p);
20185587Sobrien			tp->nelem--;
20285587Sobrien			return;
20385587Sobrien		}
20485587Sobrien}
20585587Sobrien
20685587SobrienCell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
20785587Sobrien{
20885587Sobrien	int h;
20985587Sobrien	Cell *p;
21085587Sobrien
21185587Sobrien	if (n != NULL && (p = lookup(n, tp)) != NULL) {
21285587Sobrien		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
21385587Sobrien			p, p->nval, p->sval, p->fval, p->tval) );
21485587Sobrien		return(p);
21585587Sobrien	}
21685587Sobrien	p = (Cell *) malloc(sizeof(Cell));
21785587Sobrien	if (p == NULL)
21885587Sobrien		FATAL("out of space for symbol table at %s", n);
21985587Sobrien	p->nval = tostring(n);
22085587Sobrien	p->sval = s ? tostring(s) : tostring("");
22185587Sobrien	p->fval = f;
22285587Sobrien	p->tval = t;
22385587Sobrien	p->csub = CUNK;
22485587Sobrien	p->ctype = OCELL;
22585587Sobrien	tp->nelem++;
22685587Sobrien	if (tp->nelem > FULLTAB * tp->size)
22785587Sobrien		rehash(tp);
22885587Sobrien	h = hash(n, tp->size);
22985587Sobrien	p->cnext = tp->tab[h];
23085587Sobrien	tp->tab[h] = p;
23185587Sobrien	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
23285587Sobrien		p, p->nval, p->sval, p->fval, p->tval) );
23385587Sobrien	return(p);
23485587Sobrien}
23585587Sobrien
23685587Sobrienint hash(char *s, int n)	/* form hash value for string s */
23785587Sobrien{
23885587Sobrien	unsigned hashval;
23985587Sobrien
24085587Sobrien	for (hashval = 0; *s != '\0'; s++)
24185587Sobrien		hashval = (*s + 31 * hashval);
24285587Sobrien	return hashval % n;
24385587Sobrien}
24485587Sobrien
24585587Sobrienvoid rehash(Array *tp)	/* rehash items in small table into big one */
24685587Sobrien{
24785587Sobrien	int i, nh, nsz;
24885587Sobrien	Cell *cp, *op, **np;
24985587Sobrien
25085587Sobrien	nsz = GROWTAB * tp->size;
25185587Sobrien	np = (Cell **) calloc(nsz, sizeof(Cell *));
25285587Sobrien	if (np == NULL)		/* can't do it, but can keep running. */
25385587Sobrien		return;		/* someone else will run out later. */
25485587Sobrien	for (i = 0; i < tp->size; i++) {
25585587Sobrien		for (cp = tp->tab[i]; cp; cp = op) {
25685587Sobrien			op = cp->cnext;
25785587Sobrien			nh = hash(cp->nval, nsz);
25885587Sobrien			cp->cnext = np[nh];
25985587Sobrien			np[nh] = cp;
26085587Sobrien		}
26185587Sobrien	}
26285587Sobrien	free(tp->tab);
26385587Sobrien	tp->tab = np;
26485587Sobrien	tp->size = nsz;
26585587Sobrien}
26685587Sobrien
26785587SobrienCell *lookup(char *s, Array *tp)	/* look for s in tp */
26885587Sobrien{
26985587Sobrien	Cell *p;
27085587Sobrien	int h;
27185587Sobrien
27285587Sobrien	h = hash(s, tp->size);
27385587Sobrien	for (p = tp->tab[h]; p != NULL; p = p->cnext)
27485587Sobrien		if (strcmp(s, p->nval) == 0)
27585587Sobrien			return(p);	/* found it */
27685587Sobrien	return(NULL);			/* not found */
27785587Sobrien}
27885587Sobrien
27985587SobrienAwkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
28085587Sobrien{
28185587Sobrien	int fldno;
28285587Sobrien
28385587Sobrien	if ((vp->tval & (NUM | STR)) == 0)
28485587Sobrien		funnyvar(vp, "assign to");
28585587Sobrien	if (isfld(vp)) {
28685587Sobrien		donerec = 0;	/* mark $0 invalid */
28785587Sobrien		fldno = atoi(vp->nval);
28885587Sobrien		if (fldno > *NF)
28985587Sobrien			newfld(fldno);
29085587Sobrien		   dprintf( ("setting field %d to %g\n", fldno, f) );
29185587Sobrien	} else if (isrec(vp)) {
29285587Sobrien		donefld = 0;	/* mark $1... invalid */
29385587Sobrien		donerec = 1;
29485587Sobrien	}
29585587Sobrien	if (freeable(vp))
29685587Sobrien		xfree(vp->sval); /* free any previous string */
29785587Sobrien	vp->tval &= ~STR;	/* mark string invalid */
29885587Sobrien	vp->tval |= NUM;	/* mark number ok */
29985587Sobrien	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
30085587Sobrien	return vp->fval = f;
30185587Sobrien}
30285587Sobrien
30385587Sobrienvoid funnyvar(Cell *vp, char *rw)
30485587Sobrien{
30585587Sobrien	if (isarr(vp))
30685587Sobrien		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
30785587Sobrien	if (vp->tval & FCN)
30885587Sobrien		FATAL("can't %s %s; it's a function.", rw, vp->nval);
30985587Sobrien	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
31085587Sobrien		vp, vp->nval, vp->sval, vp->fval, vp->tval);
31185587Sobrien}
31285587Sobrien
31385587Sobrienchar *setsval(Cell *vp, char *s)	/* set string val of a Cell */
31485587Sobrien{
31585587Sobrien	char *t;
31685587Sobrien	int fldno;
31785587Sobrien
31885587Sobrien	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
31985587Sobrien	if ((vp->tval & (NUM | STR)) == 0)
32085587Sobrien		funnyvar(vp, "assign to");
32185587Sobrien	if (isfld(vp)) {
32285587Sobrien		donerec = 0;	/* mark $0 invalid */
32385587Sobrien		fldno = atoi(vp->nval);
32485587Sobrien		if (fldno > *NF)
32585587Sobrien			newfld(fldno);
32685587Sobrien		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
32785587Sobrien	} else if (isrec(vp)) {
32885587Sobrien		donefld = 0;	/* mark $1... invalid */
32985587Sobrien		donerec = 1;
33085587Sobrien	}
33185587Sobrien	t = tostring(s);	/* in case it's self-assign */
33285587Sobrien	vp->tval &= ~NUM;
33385587Sobrien	vp->tval |= STR;
33485587Sobrien	if (freeable(vp))
33585587Sobrien		xfree(vp->sval);
33685587Sobrien	vp->tval &= ~DONTFREE;
33785587Sobrien	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
33885587Sobrien	return(vp->sval = t);
33985587Sobrien}
34085587Sobrien
34185587SobrienAwkfloat getfval(Cell *vp)	/* get float val of a Cell */
34285587Sobrien{
34385587Sobrien	if ((vp->tval & (NUM | STR)) == 0)
34485587Sobrien		funnyvar(vp, "read value of");
34585587Sobrien	if (isfld(vp) && donefld == 0)
34685587Sobrien		fldbld();
34785587Sobrien	else if (isrec(vp) && donerec == 0)
34885587Sobrien		recbld();
34985587Sobrien	if (!isnum(vp)) {	/* not a number */
35085587Sobrien		vp->fval = atof(vp->sval);	/* best guess */
35185587Sobrien		if (is_number(vp->sval) && !(vp->tval&CON))
35285587Sobrien			vp->tval |= NUM;	/* make NUM only sparingly */
35385587Sobrien	}
35485587Sobrien	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
35585587Sobrien	return(vp->fval);
35685587Sobrien}
35785587Sobrien
35885587Sobrienchar *getsval(Cell *vp)	/* get string val of a Cell */
35985587Sobrien{
36085587Sobrien	char s[100];	/* BUG: unchecked */
36185587Sobrien	double dtemp;
36285587Sobrien
36385587Sobrien	if ((vp->tval & (NUM | STR)) == 0)
36485587Sobrien		funnyvar(vp, "read value of");
36585587Sobrien	if (isfld(vp) && donefld == 0)
36685587Sobrien		fldbld();
36785587Sobrien	else if (isrec(vp) && donerec == 0)
36885587Sobrien		recbld();
36985587Sobrien	if (isstr(vp) == 0) {
37085587Sobrien		if (freeable(vp))
37185587Sobrien			xfree(vp->sval);
37285587Sobrien		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
37385587Sobrien			sprintf(s, "%.30g", vp->fval);
37485587Sobrien		else
37585587Sobrien			sprintf(s, *CONVFMT, vp->fval);
37685587Sobrien		vp->sval = tostring(s);
37785587Sobrien		vp->tval &= ~DONTFREE;
37885587Sobrien		vp->tval |= STR;
37985587Sobrien	}
38085587Sobrien	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
38185587Sobrien	return(vp->sval);
38285587Sobrien}
38385587Sobrien
38485587Sobrienchar *tostring(char *s)	/* make a copy of string s */
38585587Sobrien{
38685587Sobrien	char *p;
38785587Sobrien
38885587Sobrien	p = (char *) malloc(strlen(s)+1);
38985587Sobrien	if (p == NULL)
39085587Sobrien		FATAL("out of space in tostring on %s", s);
39185587Sobrien	strcpy(p, s);
39285587Sobrien	return(p);
39385587Sobrien}
39485587Sobrien
39585587Sobrienchar *qstring(char *is, int delim)	/* collect string up to next delim */
39685587Sobrien{
39785587Sobrien	char *os = is;
39885587Sobrien	int c, n;
39985587Sobrien	uschar *s = (uschar *) is;
40085587Sobrien	uschar *buf, *bp;
40185587Sobrien
40290902Sdes	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
40385587Sobrien		FATAL( "out of space in qstring(%s)", s);
40485587Sobrien	for (bp = buf; (c = *s) != delim; s++) {
40585587Sobrien		if (c == '\n')
40685587Sobrien			SYNTAX( "newline in string %.20s...", os );
40785587Sobrien		else if (c != '\\')
40885587Sobrien			*bp++ = c;
40985587Sobrien		else {	/* \something */
41085587Sobrien			c = *++s;
41185587Sobrien			if (c == 0) {	/* \ at end */
41285587Sobrien				*bp++ = '\\';
41385587Sobrien				break;	/* for loop */
41485587Sobrien			}
41585587Sobrien			switch (c) {
41685587Sobrien			case '\\':	*bp++ = '\\'; break;
41785587Sobrien			case 'n':	*bp++ = '\n'; break;
41885587Sobrien			case 't':	*bp++ = '\t'; break;
41985587Sobrien			case 'b':	*bp++ = '\b'; break;
42085587Sobrien			case 'f':	*bp++ = '\f'; break;
42185587Sobrien			case 'r':	*bp++ = '\r'; break;
42285587Sobrien			default:
42385587Sobrien				if (!isdigit(c)) {
42485587Sobrien					*bp++ = c;
42585587Sobrien					break;
42685587Sobrien				}
42785587Sobrien				n = c - '0';
42885587Sobrien				if (isdigit(s[1])) {
42985587Sobrien					n = 8 * n + *++s - '0';
43085587Sobrien					if (isdigit(s[1]))
43185587Sobrien						n = 8 * n + *++s - '0';
43285587Sobrien				}
43385587Sobrien				*bp++ = n;
43485587Sobrien				break;
43585587Sobrien			}
43685587Sobrien		}
43785587Sobrien	}
43885587Sobrien	*bp++ = 0;
43985587Sobrien	return (char *) buf;
44085587Sobrien}
441