1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#define	DEBUG
26#include <stdio.h>
27#include <math.h>
28#include <ctype.h>
29#include <string.h>
30#include <stdlib.h>
31#include "awk.h"
32#include "ytab.h"
33
34#define	FULLTAB	2	/* rehash when table gets this x full */
35#define	GROWTAB 4	/* grow table by this factor */
36
37Array	*symtab;	/* main symbol table */
38
39char	**FS;		/* initial field sep */
40char	**RS;		/* initial record sep */
41char	**OFS;		/* output field sep */
42char	**ORS;		/* output record sep */
43char	**OFMT;		/* output format for numbers */
44char	**CONVFMT;	/* format for conversions in getsval */
45Awkfloat *NF;		/* number of fields in current record */
46Awkfloat *NR;		/* number of current record */
47Awkfloat *FNR;		/* number of current record in current file */
48char	**FILENAME;	/* current filename argument */
49Awkfloat *ARGC;		/* number of arguments from command line */
50char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52Awkfloat *RLENGTH;	/* length of same */
53
54Cell	*fsloc;		/* FS */
55Cell	*nrloc;		/* NR */
56Cell	*nfloc;		/* NF */
57Cell	*fnrloc;	/* FNR */
58Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60Cell	*rstartloc;	/* RSTART */
61Cell	*rlengthloc;	/* RLENGTH */
62Cell	*symtabloc;	/* SYMTAB */
63
64Cell	*nullloc;	/* a guaranteed empty cell */
65Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66Cell	*literal0;
67
68extern Cell **fldtab;
69
70void syminit(void)	/* initialize symbol table with builtin vars */
71{
72	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73	/* this is used for if(x)... tests: */
74	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75	nullnode = celltonode(nullloc, CCON);
76
77	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
78	FS = &fsloc->sval;
79	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
81	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
82	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
85	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
86	NF = &nfloc->fval;
87	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
88	NR = &nrloc->fval;
89	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
90	FNR = &fnrloc->fval;
91	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
92	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
93	RSTART = &rstartloc->fval;
94	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
95	RLENGTH = &rlengthloc->fval;
96	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
97	symtabloc->sval = (char *) symtab;
98}
99
100void arginit(int ac, char **av)	/* set up ARGV and ARGC */
101{
102	Cell *cp;
103	int i;
104	char temp[50];
105
106	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
107	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
108	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
109	cp->sval = (char *) ARGVtab;
110	for (i = 0; i < ac; i++) {
111		sprintf(temp, "%d", i);
112		if (is_number(*av))
113			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
114		else
115			setsymtab(temp, *av, 0.0, STR, ARGVtab);
116		av++;
117	}
118}
119
120void envinit(char **envp)	/* set up ENVIRON variable */
121{
122	Cell *cp;
123	char *p;
124
125	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
126	ENVtab = makesymtab(NSYMTAB);
127	cp->sval = (char *) ENVtab;
128	for ( ; *envp; envp++) {
129		if ((p = strchr(*envp, '=')) == NULL)
130			continue;
131		if( p == *envp ) /* no left hand side name in env string */
132			continue;
133		*p++ = 0;	/* split into two strings at = */
134		if (is_number(p))
135			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
136		else
137			setsymtab(*envp, p, 0.0, STR, ENVtab);
138		p[-1] = '=';	/* restore in case env is passed down to a shell */
139	}
140}
141
142Array *makesymtab(int n)	/* make a new symbol table */
143{
144	Array *ap;
145	Cell **tp;
146
147	ap = (Array *) malloc(sizeof(Array));
148	tp = (Cell **) calloc(n, sizeof(Cell *));
149	if (ap == NULL || tp == NULL)
150		FATAL("out of space in makesymtab");
151	ap->nelem = 0;
152	ap->size = n;
153	ap->tab = tp;
154	return(ap);
155}
156
157void freesymtab(Cell *ap)	/* free a symbol table */
158{
159	Cell *cp, *temp;
160	Array *tp;
161	int i;
162
163	if (!isarr(ap))
164		return;
165	tp = (Array *) ap->sval;
166	if (tp == NULL)
167		return;
168	for (i = 0; i < tp->size; i++) {
169		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
170			xfree(cp->nval);
171			if (freeable(cp))
172				xfree(cp->sval);
173			temp = cp->cnext;	/* avoids freeing then using */
174			free(cp);
175			tp->nelem--;
176		}
177		tp->tab[i] = 0;
178	}
179	if (tp->nelem != 0)
180		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
181	free(tp->tab);
182	free(tp);
183}
184
185void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
186{
187	Array *tp;
188	Cell *p, *prev = NULL;
189	int h;
190
191	tp = (Array *) ap->sval;
192	h = hash(s, tp->size);
193	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
194		if (strcmp(s, p->nval) == 0) {
195			if (prev == NULL)	/* 1st one */
196				tp->tab[h] = p->cnext;
197			else			/* middle somewhere */
198				prev->cnext = p->cnext;
199			if (freeable(p))
200				xfree(p->sval);
201			free(p->nval);
202			free(p);
203			tp->nelem--;
204			return;
205		}
206}
207
208Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
209{
210	int h;
211	Cell *p;
212
213	if (n != NULL && (p = lookup(n, tp)) != NULL) {
214		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
215			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
216		return(p);
217	}
218	p = (Cell *) malloc(sizeof(Cell));
219	if (p == NULL)
220		FATAL("out of space for symbol table at %s", n);
221	p->nval = tostring(n);
222	p->sval = s ? tostring(s) : tostring("");
223	p->fval = f;
224	p->tval = t;
225	p->csub = CUNK;
226	p->ctype = OCELL;
227	tp->nelem++;
228	if (tp->nelem > FULLTAB * tp->size)
229		rehash(tp);
230	h = hash(n, tp->size);
231	p->cnext = tp->tab[h];
232	tp->tab[h] = p;
233	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
234		p, p->nval, p->sval, p->fval, p->tval) );
235	return(p);
236}
237
238int hash(const char *s, int n)	/* form hash value for string s */
239{
240	unsigned hashval;
241
242	for (hashval = 0; *s != '\0'; s++)
243		hashval = (*s + 31 * hashval);
244	return hashval % n;
245}
246
247void rehash(Array *tp)	/* rehash items in small table into big one */
248{
249	int i, nh, nsz;
250	Cell *cp, *op, **np;
251
252	nsz = GROWTAB * tp->size;
253	np = (Cell **) calloc(nsz, sizeof(Cell *));
254	if (np == NULL)		/* can't do it, but can keep running. */
255		return;		/* someone else will run out later. */
256	for (i = 0; i < tp->size; i++) {
257		for (cp = tp->tab[i]; cp; cp = op) {
258			op = cp->cnext;
259			nh = hash(cp->nval, nsz);
260			cp->cnext = np[nh];
261			np[nh] = cp;
262		}
263	}
264	free(tp->tab);
265	tp->tab = np;
266	tp->size = nsz;
267}
268
269Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
270{
271	Cell *p;
272	int h;
273
274	h = hash(s, tp->size);
275	for (p = tp->tab[h]; p != NULL; p = p->cnext)
276		if (strcmp(s, p->nval) == 0)
277			return(p);	/* found it */
278	return(NULL);			/* not found */
279}
280
281Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
282{
283	int fldno;
284
285	if ((vp->tval & (NUM | STR)) == 0)
286		funnyvar(vp, "assign to");
287	if (isfld(vp)) {
288		donerec = 0;	/* mark $0 invalid */
289		fldno = atoi(vp->nval);
290		if (fldno > *NF)
291			newfld(fldno);
292		   dprintf( ("setting field %d to %g\n", fldno, f) );
293	} else if (isrec(vp)) {
294		donefld = 0;	/* mark $1... invalid */
295		donerec = 1;
296	}
297	if (freeable(vp))
298		xfree(vp->sval); /* free any previous string */
299	vp->tval &= ~STR;	/* mark string invalid */
300	vp->tval |= NUM;	/* mark number ok */
301	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
302	return vp->fval = f;
303}
304
305void funnyvar(Cell *vp, const char *rw)
306{
307	if (isarr(vp))
308		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
309	if (vp->tval & FCN)
310		FATAL("can't %s %s; it's a function.", rw, vp->nval);
311	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
312		vp, vp->nval, vp->sval, vp->fval, vp->tval);
313}
314
315char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
316{
317	char *t;
318	int fldno;
319
320	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
321		vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
322	if ((vp->tval & (NUM | STR)) == 0)
323		funnyvar(vp, "assign to");
324	if (isfld(vp)) {
325		donerec = 0;	/* mark $0 invalid */
326		fldno = atoi(vp->nval);
327		if (fldno > *NF)
328			newfld(fldno);
329		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
330	} else if (isrec(vp)) {
331		donefld = 0;	/* mark $1... invalid */
332		donerec = 1;
333	}
334	t = tostring(s);	/* in case it's self-assign */
335	if (freeable(vp))
336		xfree(vp->sval);
337	vp->tval &= ~NUM;
338	vp->tval |= STR;
339	vp->tval &= ~DONTFREE;
340	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
341		vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
342	return(vp->sval = t);
343}
344
345Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
346{
347	if ((vp->tval & (NUM | STR)) == 0)
348		funnyvar(vp, "read value of");
349	if (isfld(vp) && donefld == 0)
350		fldbld();
351	else if (isrec(vp) && donerec == 0)
352		recbld();
353	if (!isnum(vp)) {	/* not a number */
354		vp->fval = atof(vp->sval);	/* best guess */
355		if (is_number(vp->sval) && !(vp->tval&CON))
356			vp->tval |= NUM;	/* make NUM only sparingly */
357	}
358	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
359	return(vp->fval);
360}
361
362static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
363{
364	char s[100];	/* BUG: unchecked */
365	double dtemp;
366
367	if ((vp->tval & (NUM | STR)) == 0)
368		funnyvar(vp, "read value of");
369	if (isfld(vp) && donefld == 0)
370		fldbld();
371	else if (isrec(vp) && donerec == 0)
372		recbld();
373	if (isstr(vp) == 0) {
374		if (freeable(vp))
375			xfree(vp->sval);
376		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
377			sprintf(s, "%.30g", vp->fval);
378		else
379			sprintf(s, *fmt, vp->fval);
380		vp->sval = tostring(s);
381#if 0
382		/*
383		  Cannot reuse the converted form unless you confirm
384		  that the current value of CONVFMT is the same as that
385		  which was used to convert the previous string. See
386		  conformance test awk.ex 233
387		*/
388		vp->tval &= ~DONTFREE;
389		vp->tval |= STR;
390#endif
391	}
392	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
393	return(vp->sval);
394}
395
396char *getsval(Cell *vp)       /* get string val of a Cell */
397{
398      return get_str_val(vp, CONVFMT);
399}
400
401char *getpssval(Cell *vp)     /* get string val of a Cell for print */
402{
403      return get_str_val(vp, OFMT);
404}
405
406
407char *tostring(const char *s)	/* make a copy of string s */
408{
409	char *p;
410
411	p = (char *) malloc(strlen(s)+1);
412	if (p == NULL)
413		FATAL("out of space in tostring on %s", s);
414	strcpy(p, s);
415	return(p);
416}
417
418char *qstring(const char *is, int delim)	/* collect string up to next delim */
419{
420	const char *os = is;
421	int c, n;
422	uschar *s = (uschar *) is;
423	uschar *buf, *bp;
424
425	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
426		FATAL( "out of space in qstring(%s)", s);
427	for (bp = buf; (c = *s) != delim; s++) {
428		if (c == '\n')
429			SYNTAX( "newline in string %.20s...", os );
430		else if (c != '\\')
431			*bp++ = c;
432		else {	/* \something */
433			c = *++s;
434			if (c == 0) {	/* \ at end */
435				*bp++ = '\\';
436				break;	/* for loop */
437			}
438			switch (c) {
439			case '\\':	*bp++ = '\\'; break;
440			case 'n':	*bp++ = '\n'; break;
441			case 't':	*bp++ = '\t'; break;
442			case 'b':	*bp++ = '\b'; break;
443			case 'f':	*bp++ = '\f'; break;
444			case 'r':	*bp++ = '\r'; break;
445			default:
446				if (!isdigit(c)) {
447					*bp++ = c;
448					break;
449				}
450				n = c - '0';
451				if (isdigit(s[1])) {
452					n = 8 * n + *++s - '0';
453					if (isdigit(s[1]))
454						n = 8 * n + *++s - '0';
455				}
456				*bp++ = n;
457				break;
458			}
459		}
460	}
461	*bp++ = 0;
462	return (char *) buf;
463}
464