tran.c revision 107806
197403Sobrien/****************************************************************
297403SobrienCopyright (C) Lucent Technologies 1997
397403SobrienAll Rights Reserved
497403Sobrien
597403SobrienPermission to use, copy, modify, and distribute this software and
697403Sobrienits documentation for any purpose and without fee is hereby
797403Sobriengranted, provided that the above copyright notice appear in all
897403Sobriencopies and that both that the copyright notice and this
997403Sobrienpermission notice and warranty disclaimer appear in supporting
1097403Sobriendocumentation, and that the name Lucent Technologies or any of
1197403Sobrienits entities not be used in advertising or publicity pertaining
1297403Sobriento distribution of the software without specific, written prior
1397403Sobrienpermission.
1497403Sobrien
1597403SobrienLUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1697403SobrienINCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1797403SobrienIN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18169691SkanSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1997403SobrienWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2097403SobrienIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2197403SobrienARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2297403SobrienTHIS SOFTWARE.
2397403Sobrien****************************************************************/
2497403Sobrien
2597403Sobrien#define	DEBUG
2697403Sobrien#include <stdio.h>
2797403Sobrien#include <math.h>
2897403Sobrien#include <ctype.h>
2997403Sobrien#include <string.h>
3097403Sobrien#include <stdlib.h>
3197403Sobrien#include "awk.h"
3297403Sobrien#include "ytab.h"
3397403Sobrien
3497403Sobrien#define	FULLTAB	2	/* rehash when table gets this x full */
3597403Sobrien#define	GROWTAB 4	/* grow table by this factor */
3697403Sobrien
3797403SobrienArray	*symtab;	/* main symbol table */
3897403Sobrien
3997403Sobrienchar	**FS;		/* initial field sep */
4097403Sobrienchar	**RS;		/* initial record sep */
4197403Sobrienchar	**OFS;		/* output field sep */
4297403Sobrienchar	**ORS;		/* output record sep */
4397403Sobrienchar	**OFMT;		/* output format for numbers */
4497403Sobrienchar	**CONVFMT;	/* format for conversions in getsval */
4597403SobrienAwkfloat *NF;		/* number of fields in current record */
4697403SobrienAwkfloat *NR;		/* number of current record */
4797403SobrienAwkfloat *FNR;		/* number of current record in current file */
4897403Sobrienchar	**FILENAME;	/* current filename argument */
4997403SobrienAwkfloat *ARGC;		/* number of arguments from command line */
5097403Sobrienchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
5197403SobrienAwkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
5297403SobrienAwkfloat *RLENGTH;	/* length of same */
5397403Sobrien
5497403SobrienCell	*nrloc;		/* NR */
5597403SobrienCell	*nfloc;		/* NF */
56132720SkanCell	*fnrloc;	/* FNR */
57132720SkanArray	*ARGVtab;	/* symbol table containing ARGV[...] */
5897403SobrienArray	*ENVtab;	/* symbol table containing ENVIRON[...] */
5997403SobrienCell	*rstartloc;	/* RSTART */
6097403SobrienCell	*rlengthloc;	/* RLENGTH */
6197403SobrienCell	*symtabloc;	/* SYMTAB */
6297403Sobrien
6397403SobrienCell	*nullloc;	/* a guaranteed empty cell */
6497403SobrienNode	*nullnode;	/* zero&null, converted into a node for comparisons */
6597403SobrienCell	*literal0;
6697403Sobrien
6797403Sobrienextern Cell **fldtab;
68132720Skan
69void syminit(void)	/* initialize symbol table with builtin vars */
70{
71	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
72	/* this is used for if(x)... tests: */
73	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
74	nullnode = celltonode(nullloc, CCON);
75
76	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
77	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
78	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
79	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
81	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
82	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
83	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
84	NF = &nfloc->fval;
85	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
86	NR = &nrloc->fval;
87	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
88	FNR = &fnrloc->fval;
89	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
90	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
91	RSTART = &rstartloc->fval;
92	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
93	RLENGTH = &rlengthloc->fval;
94	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
95	symtabloc->sval = (char *) symtab;
96}
97
98void arginit(int ac, char **av)	/* set up ARGV and ARGC */
99{
100	Cell *cp;
101	int i;
102	char temp[50];
103
104	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
105	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
106	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
107	cp->sval = (char *) ARGVtab;
108	for (i = 0; i < ac; i++) {
109		sprintf(temp, "%d", i);
110		if (is_number(*av))
111			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
112		else
113			setsymtab(temp, *av, 0.0, STR, ARGVtab);
114		av++;
115	}
116}
117
118void envinit(char **envp)	/* set up ENVIRON variable */
119{
120	Cell *cp;
121	char *p;
122
123	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
124	ENVtab = makesymtab(NSYMTAB);
125	cp->sval = (char *) ENVtab;
126	for ( ; *envp; envp++) {
127		if ((p = strchr(*envp, '=')) == NULL)
128			continue;
129		if( p == *envp ) /* no left hand side name in env string */
130			continue;
131		*p++ = 0;	/* split into two strings at = */
132		if (is_number(p))
133			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
134		else
135			setsymtab(*envp, p, 0.0, STR, ENVtab);
136		p[-1] = '=';	/* restore in case env is passed down to a shell */
137	}
138}
139
140Array *makesymtab(int n)	/* make a new symbol table */
141{
142	Array *ap;
143	Cell **tp;
144
145	ap = (Array *) malloc(sizeof(Array));
146	tp = (Cell **) calloc(n, sizeof(Cell *));
147	if (ap == NULL || tp == NULL)
148		FATAL("out of space in makesymtab");
149	ap->nelem = 0;
150	ap->size = n;
151	ap->tab = tp;
152	return(ap);
153}
154
155void freesymtab(Cell *ap)	/* free a symbol table */
156{
157	Cell *cp, *temp;
158	Array *tp;
159	int i;
160
161	if (!isarr(ap))
162		return;
163	tp = (Array *) ap->sval;
164	if (tp == NULL)
165		return;
166	for (i = 0; i < tp->size; i++) {
167		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
168			xfree(cp->nval);
169			if (freeable(cp))
170				xfree(cp->sval);
171			temp = cp->cnext;	/* avoids freeing then using */
172			free(cp);
173			tp->nelem--;
174		}
175		tp->tab[i] = 0;
176	}
177	if (tp->nelem != 0)
178		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
179	free(tp->tab);
180	free(tp);
181}
182
183void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
184{
185	Array *tp;
186	Cell *p, *prev = NULL;
187	int h;
188
189	tp = (Array *) ap->sval;
190	h = hash(s, tp->size);
191	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
192		if (strcmp(s, p->nval) == 0) {
193			if (prev == NULL)	/* 1st one */
194				tp->tab[h] = p->cnext;
195			else			/* middle somewhere */
196				prev->cnext = p->cnext;
197			if (freeable(p))
198				xfree(p->sval);
199			free(p->nval);
200			free(p);
201			tp->nelem--;
202			return;
203		}
204}
205
206Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
207{
208	int h;
209	Cell *p;
210
211	if (n != NULL && (p = lookup(n, tp)) != NULL) {
212		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
213			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
214		return(p);
215	}
216	p = (Cell *) malloc(sizeof(Cell));
217	if (p == NULL)
218		FATAL("out of space for symbol table at %s", n);
219	p->nval = tostring(n);
220	p->sval = s ? tostring(s) : tostring("");
221	p->fval = f;
222	p->tval = t;
223	p->csub = CUNK;
224	p->ctype = OCELL;
225	tp->nelem++;
226	if (tp->nelem > FULLTAB * tp->size)
227		rehash(tp);
228	h = hash(n, tp->size);
229	p->cnext = tp->tab[h];
230	tp->tab[h] = p;
231	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
232		p, p->nval, p->sval, p->fval, p->tval) );
233	return(p);
234}
235
236int hash(const char *s, int n)	/* form hash value for string s */
237{
238	unsigned hashval;
239
240	for (hashval = 0; *s != '\0'; s++)
241		hashval = (*s + 31 * hashval);
242	return hashval % n;
243}
244
245void rehash(Array *tp)	/* rehash items in small table into big one */
246{
247	int i, nh, nsz;
248	Cell *cp, *op, **np;
249
250	nsz = GROWTAB * tp->size;
251	np = (Cell **) calloc(nsz, sizeof(Cell *));
252	if (np == NULL)		/* can't do it, but can keep running. */
253		return;		/* someone else will run out later. */
254	for (i = 0; i < tp->size; i++) {
255		for (cp = tp->tab[i]; cp; cp = op) {
256			op = cp->cnext;
257			nh = hash(cp->nval, nsz);
258			cp->cnext = np[nh];
259			np[nh] = cp;
260		}
261	}
262	free(tp->tab);
263	tp->tab = np;
264	tp->size = nsz;
265}
266
267Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
268{
269	Cell *p;
270	int h;
271
272	h = hash(s, tp->size);
273	for (p = tp->tab[h]; p != NULL; p = p->cnext)
274		if (strcmp(s, p->nval) == 0)
275			return(p);	/* found it */
276	return(NULL);			/* not found */
277}
278
279Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
280{
281	int fldno;
282
283	if ((vp->tval & (NUM | STR)) == 0)
284		funnyvar(vp, "assign to");
285	if (isfld(vp)) {
286		donerec = 0;	/* mark $0 invalid */
287		fldno = atoi(vp->nval);
288		if (fldno > *NF)
289			newfld(fldno);
290		   dprintf( ("setting field %d to %g\n", fldno, f) );
291	} else if (isrec(vp)) {
292		donefld = 0;	/* mark $1... invalid */
293		donerec = 1;
294	}
295	if (freeable(vp))
296		xfree(vp->sval); /* free any previous string */
297	vp->tval &= ~STR;	/* mark string invalid */
298	vp->tval |= NUM;	/* mark number ok */
299	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
300	return vp->fval = f;
301}
302
303void funnyvar(Cell *vp, const char *rw)
304{
305	if (isarr(vp))
306		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
307	if (vp->tval & FCN)
308		FATAL("can't %s %s; it's a function.", rw, vp->nval);
309	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
310		vp, vp->nval, vp->sval, vp->fval, vp->tval);
311}
312
313char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
314{
315	char *t;
316	int fldno;
317
318	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, NN(vp->nval), s, vp->tval) );
319	if ((vp->tval & (NUM | STR)) == 0)
320		funnyvar(vp, "assign to");
321	if (isfld(vp)) {
322		donerec = 0;	/* mark $0 invalid */
323		fldno = atoi(vp->nval);
324		if (fldno > *NF)
325			newfld(fldno);
326		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
327	} else if (isrec(vp)) {
328		donefld = 0;	/* mark $1... invalid */
329		donerec = 1;
330	}
331	t = tostring(s);	/* in case it's self-assign */
332	vp->tval &= ~NUM;
333	vp->tval |= STR;
334	if (freeable(vp))
335		xfree(vp->sval);
336	vp->tval &= ~DONTFREE;
337	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), t,t, vp->tval) );
338	return(vp->sval = t);
339}
340
341Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
342{
343	if ((vp->tval & (NUM | STR)) == 0)
344		funnyvar(vp, "read value of");
345	if (isfld(vp) && donefld == 0)
346		fldbld();
347	else if (isrec(vp) && donerec == 0)
348		recbld();
349	if (!isnum(vp)) {	/* not a number */
350		vp->fval = atof(vp->sval);	/* best guess */
351		if (is_number(vp->sval) && !(vp->tval&CON))
352			vp->tval |= NUM;	/* make NUM only sparingly */
353	}
354	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
355	return(vp->fval);
356}
357
358 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
359
360{
361	char s[100];	/* BUG: unchecked */
362	double dtemp;
363
364	if ((vp->tval & (NUM | STR)) == 0)
365		funnyvar(vp, "read value of");
366	if (isfld(vp) && donefld == 0)
367		fldbld();
368	else if (isrec(vp) && donerec == 0)
369		recbld();
370	if (isstr(vp) == 0) {
371		if (freeable(vp))
372			xfree(vp->sval);
373		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
374			sprintf(s, "%.30g", vp->fval);
375		else
376			sprintf(s, *fmt, vp->fval);
377		vp->sval = tostring(s);
378		vp->tval &= ~DONTFREE;
379		vp->tval |= STR;
380	}
381	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
382	return(vp->sval);
383}
384
385char *getsval(Cell *vp)       /* get string val of a Cell */
386{
387      return get_str_val(vp, CONVFMT);
388}
389
390char *getpssval(Cell *vp)     /* get string val of a Cell for print */
391{
392      return get_str_val(vp, OFMT);
393}
394
395
396char *tostring(const char *s)	/* make a copy of string s */
397{
398	char *p;
399
400	p = (char *) malloc(strlen(s)+1);
401	if (p == NULL)
402		FATAL("out of space in tostring on %s", s);
403	strcpy(p, s);
404	return(p);
405}
406
407char *qstring(const char *is, int delim)	/* collect string up to next delim */
408{
409	const char *os = is;
410	int c, n;
411	uschar *s = (uschar *) is;
412	uschar *buf, *bp;
413
414	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
415		FATAL( "out of space in qstring(%s)", s);
416	for (bp = buf; (c = *s) != delim; s++) {
417		if (c == '\n')
418			SYNTAX( "newline in string %.20s...", os );
419		else if (c != '\\')
420			*bp++ = c;
421		else {	/* \something */
422			c = *++s;
423			if (c == 0) {	/* \ at end */
424				*bp++ = '\\';
425				break;	/* for loop */
426			}
427			switch (c) {
428			case '\\':	*bp++ = '\\'; break;
429			case 'n':	*bp++ = '\n'; break;
430			case 't':	*bp++ = '\t'; break;
431			case 'b':	*bp++ = '\b'; break;
432			case 'f':	*bp++ = '\f'; break;
433			case 'r':	*bp++ = '\r'; break;
434			default:
435				if (!isdigit(c)) {
436					*bp++ = c;
437					break;
438				}
439				n = c - '0';
440				if (isdigit(s[1])) {
441					n = 8 * n + *++s - '0';
442					if (isdigit(s[1]))
443						n = 8 * n + *++s - '0';
444				}
445				*bp++ = n;
446				break;
447			}
448		}
449	}
450	*bp++ = 0;
451	return (char *) buf;
452}
453