tran.c revision 224731
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#define	DEBUG
26#include <stdio.h>
27#include <math.h>
28#include <ctype.h>
29#include <string.h>
30#include <stdlib.h>
31#include "awk.h"
32#include "ytab.h"
33
34#define	FULLTAB	2	/* rehash when table gets this x full */
35#define	GROWTAB 4	/* grow table by this factor */
36
37Array	*symtab;	/* main symbol table */
38
39char	**FS;		/* initial field sep */
40char	**RS;		/* initial record sep */
41char	**OFS;		/* output field sep */
42char	**ORS;		/* output record sep */
43char	**OFMT;		/* output format for numbers */
44char	**CONVFMT;	/* format for conversions in getsval */
45Awkfloat *NF;		/* number of fields in current record */
46Awkfloat *NR;		/* number of current record */
47Awkfloat *FNR;		/* number of current record in current file */
48char	**FILENAME;	/* current filename argument */
49Awkfloat *ARGC;		/* number of arguments from command line */
50char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52Awkfloat *RLENGTH;	/* length of same */
53
54Cell	*fsloc;		/* FS */
55Cell	*nrloc;		/* NR */
56Cell	*nfloc;		/* NF */
57Cell	*fnrloc;	/* FNR */
58Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60Cell	*rstartloc;	/* RSTART */
61Cell	*rlengthloc;	/* RLENGTH */
62Cell	*symtabloc;	/* SYMTAB */
63
64Cell	*nullloc;	/* a guaranteed empty cell */
65Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66Cell	*literal0;
67
68extern Cell **fldtab;
69
70void syminit(void)	/* initialize symbol table with builtin vars */
71{
72	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73	/* this is used for if(x)... tests: */
74	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75	nullnode = celltonode(nullloc, CCON);
76
77	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
78	FS = &fsloc->sval;
79	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
81	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
82	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
85	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
86	NF = &nfloc->fval;
87	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
88	NR = &nrloc->fval;
89	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
90	FNR = &fnrloc->fval;
91	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
92	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
93	RSTART = &rstartloc->fval;
94	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
95	RLENGTH = &rlengthloc->fval;
96	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
97	symtabloc->sval = (char *) symtab;
98}
99
100void arginit(int ac, char **av)	/* set up ARGV and ARGC */
101{
102	Cell *cp;
103	int i;
104	char temp[50];
105
106	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
107	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
108	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
109	cp->sval = (char *) ARGVtab;
110	for (i = 0; i < ac; i++) {
111		sprintf(temp, "%d", i);
112		if (is_number(*av))
113			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
114		else
115			setsymtab(temp, *av, 0.0, STR, ARGVtab);
116		av++;
117	}
118}
119
120void envinit(char **envp)	/* set up ENVIRON variable */
121{
122	Cell *cp;
123	char *p;
124
125	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
126	ENVtab = makesymtab(NSYMTAB);
127	cp->sval = (char *) ENVtab;
128	for ( ; *envp; envp++) {
129		if ((p = strchr(*envp, '=')) == NULL)
130			continue;
131		if( p == *envp ) /* no left hand side name in env string */
132			continue;
133		*p++ = 0;	/* split into two strings at = */
134		if (is_number(p))
135			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
136		else
137			setsymtab(*envp, p, 0.0, STR, ENVtab);
138		p[-1] = '=';	/* restore in case env is passed down to a shell */
139	}
140}
141
142Array *makesymtab(int n)	/* make a new symbol table */
143{
144	Array *ap;
145	Cell **tp;
146
147	ap = (Array *) malloc(sizeof(Array));
148	tp = (Cell **) calloc(n, sizeof(Cell *));
149	if (ap == NULL || tp == NULL)
150		FATAL("out of space in makesymtab");
151	ap->nelem = 0;
152	ap->size = n;
153	ap->tab = tp;
154	return(ap);
155}
156
157void freesymtab(Cell *ap)	/* free a symbol table */
158{
159	Cell *cp, *temp;
160	Array *tp;
161	int i;
162
163	if (!isarr(ap))
164		return;
165	tp = (Array *) ap->sval;
166	if (tp == NULL)
167		return;
168	for (i = 0; i < tp->size; i++) {
169		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
170			xfree(cp->nval);
171			if (freeable(cp))
172				xfree(cp->sval);
173			temp = cp->cnext;	/* avoids freeing then using */
174			free(cp);
175			tp->nelem--;
176		}
177		tp->tab[i] = 0;
178	}
179	if (tp->nelem != 0)
180		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
181	free(tp->tab);
182	free(tp);
183}
184
185void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
186{
187	Array *tp;
188	Cell *p, *prev = NULL;
189	int h;
190
191	tp = (Array *) ap->sval;
192	h = hash(s, tp->size);
193	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
194		if (strcmp(s, p->nval) == 0) {
195			if (prev == NULL)	/* 1st one */
196				tp->tab[h] = p->cnext;
197			else			/* middle somewhere */
198				prev->cnext = p->cnext;
199			if (freeable(p))
200				xfree(p->sval);
201			free(p->nval);
202			free(p);
203			tp->nelem--;
204			return;
205		}
206}
207
208Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
209{
210	int h;
211	Cell *p;
212
213	if (n != NULL && (p = lookup(n, tp)) != NULL) {
214		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
215			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
216		return(p);
217	}
218	p = (Cell *) malloc(sizeof(Cell));
219	if (p == NULL)
220		FATAL("out of space for symbol table at %s", n);
221	p->nval = tostring(n);
222	p->sval = s ? tostring(s) : tostring("");
223	p->fval = f;
224	p->tval = t;
225	p->csub = CUNK;
226	p->ctype = OCELL;
227	tp->nelem++;
228	if (tp->nelem > FULLTAB * tp->size)
229		rehash(tp);
230	h = hash(n, tp->size);
231	p->cnext = tp->tab[h];
232	tp->tab[h] = p;
233	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
234		(void*)p, p->nval, p->sval, p->fval, p->tval) );
235	return(p);
236}
237
238int hash(const char *s, int n)	/* form hash value for string s */
239{
240	unsigned hashval;
241
242	for (hashval = 0; *s != '\0'; s++)
243		hashval = (*s + 31 * hashval);
244	return hashval % n;
245}
246
247void rehash(Array *tp)	/* rehash items in small table into big one */
248{
249	int i, nh, nsz;
250	Cell *cp, *op, **np;
251
252	nsz = GROWTAB * tp->size;
253	np = (Cell **) calloc(nsz, sizeof(Cell *));
254	if (np == NULL)		/* can't do it, but can keep running. */
255		return;		/* someone else will run out later. */
256	for (i = 0; i < tp->size; i++) {
257		for (cp = tp->tab[i]; cp; cp = op) {
258			op = cp->cnext;
259			nh = hash(cp->nval, nsz);
260			cp->cnext = np[nh];
261			np[nh] = cp;
262		}
263	}
264	free(tp->tab);
265	tp->tab = np;
266	tp->size = nsz;
267}
268
269Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
270{
271	Cell *p;
272	int h;
273
274	h = hash(s, tp->size);
275	for (p = tp->tab[h]; p != NULL; p = p->cnext)
276		if (strcmp(s, p->nval) == 0)
277			return(p);	/* found it */
278	return(NULL);			/* not found */
279}
280
281Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
282{
283	int fldno;
284
285	if ((vp->tval & (NUM | STR)) == 0)
286		funnyvar(vp, "assign to");
287	if (isfld(vp)) {
288		donerec = 0;	/* mark $0 invalid */
289		fldno = atoi(vp->nval);
290		if (fldno > *NF)
291			newfld(fldno);
292		   dprintf( ("setting field %d to %g\n", fldno, f) );
293	} else if (isrec(vp)) {
294		donefld = 0;	/* mark $1... invalid */
295		donerec = 1;
296	}
297	if (freeable(vp))
298		xfree(vp->sval); /* free any previous string */
299	vp->tval &= ~STR;	/* mark string invalid */
300	vp->tval |= NUM;	/* mark number ok */
301	   dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
302	return vp->fval = f;
303}
304
305void funnyvar(Cell *vp, const char *rw)
306{
307	if (isarr(vp))
308		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
309	if (vp->tval & FCN)
310		FATAL("can't %s %s; it's a function.", rw, vp->nval);
311	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
312		vp, vp->nval, vp->sval, vp->fval, vp->tval);
313}
314
315char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
316{
317	char *t;
318	int fldno;
319
320	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
321		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
322	if ((vp->tval & (NUM | STR)) == 0)
323		funnyvar(vp, "assign to");
324	if (isfld(vp)) {
325		donerec = 0;	/* mark $0 invalid */
326		fldno = atoi(vp->nval);
327		if (fldno > *NF)
328			newfld(fldno);
329		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
330	} else if (isrec(vp)) {
331		donefld = 0;	/* mark $1... invalid */
332		donerec = 1;
333	}
334	t = tostring(s);	/* in case it's self-assign */
335	if (freeable(vp))
336		xfree(vp->sval);
337	vp->tval &= ~NUM;
338	vp->tval |= STR;
339	vp->tval &= ~DONTFREE;
340	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
341		(void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
342	return(vp->sval = t);
343}
344
345Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
346{
347	if ((vp->tval & (NUM | STR)) == 0)
348		funnyvar(vp, "read value of");
349	if (isfld(vp) && donefld == 0)
350		fldbld();
351	else if (isrec(vp) && donerec == 0)
352		recbld();
353	if (!isnum(vp)) {	/* not a number */
354		vp->fval = atof(vp->sval);	/* best guess */
355		if (is_number(vp->sval) && !(vp->tval&CON))
356			vp->tval |= NUM;	/* make NUM only sparingly */
357	}
358	   dprintf( ("getfval %p: %s = %g, t=%o\n",
359		(void*)vp, NN(vp->nval), vp->fval, vp->tval) );
360	return(vp->fval);
361}
362
363static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
364{
365	char s[100];	/* BUG: unchecked */
366	double dtemp;
367
368	if ((vp->tval & (NUM | STR)) == 0)
369		funnyvar(vp, "read value of");
370	if (isfld(vp) && donefld == 0)
371		fldbld();
372	else if (isrec(vp) && donerec == 0)
373		recbld();
374	if (isstr(vp) == 0) {
375		if (freeable(vp))
376			xfree(vp->sval);
377		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
378			sprintf(s, "%.30g", vp->fval);
379		else
380			sprintf(s, *fmt, vp->fval);
381		vp->sval = tostring(s);
382		vp->tval &= ~DONTFREE;
383		vp->tval |= STR;
384	}
385	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
386		(void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
387	return(vp->sval);
388}
389
390char *getsval(Cell *vp)       /* get string val of a Cell */
391{
392      return get_str_val(vp, CONVFMT);
393}
394
395char *getpssval(Cell *vp)     /* get string val of a Cell for print */
396{
397      return get_str_val(vp, OFMT);
398}
399
400
401char *tostring(const char *s)	/* make a copy of string s */
402{
403	char *p;
404
405	p = (char *) malloc(strlen(s)+1);
406	if (p == NULL)
407		FATAL("out of space in tostring on %s", s);
408	strcpy(p, s);
409	return(p);
410}
411
412char *qstring(const char *is, int delim)	/* collect string up to next delim */
413{
414	const char *os = is;
415	int c, n;
416	uschar *s = (uschar *) is;
417	uschar *buf, *bp;
418
419	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
420		FATAL( "out of space in qstring(%s)", s);
421	for (bp = buf; (c = *s) != delim; s++) {
422		if (c == '\n')
423			SYNTAX( "newline in string %.20s...", os );
424		else if (c != '\\')
425			*bp++ = c;
426		else {	/* \something */
427			c = *++s;
428			if (c == 0) {	/* \ at end */
429				*bp++ = '\\';
430				break;	/* for loop */
431			}
432			switch (c) {
433			case '\\':	*bp++ = '\\'; break;
434			case 'n':	*bp++ = '\n'; break;
435			case 't':	*bp++ = '\t'; break;
436			case 'b':	*bp++ = '\b'; break;
437			case 'f':	*bp++ = '\f'; break;
438			case 'r':	*bp++ = '\r'; break;
439			default:
440				if (!isdigit(c)) {
441					*bp++ = c;
442					break;
443				}
444				n = c - '0';
445				if (isdigit(s[1])) {
446					n = 8 * n + *++s - '0';
447					if (isdigit(s[1]))
448						n = 8 * n + *++s - '0';
449				}
450				*bp++ = n;
451				break;
452			}
453		}
454	}
455	*bp++ = 0;
456	return (char *) buf;
457}
458