1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#if HAVE_NBTOOL_CONFIG_H
26#include "nbtool_config.h"
27#endif
28
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <ctype.h>
33#include "awk.h"
34#include "awkgram.h"
35
36extern YYSTYPE	yylval;
37extern int	infunc;
38
39int	lineno	= 1;
40int	bracecnt = 0;
41int	brackcnt  = 0;
42int	parencnt = 0;
43
44typedef struct Keyword {
45	const char *word;
46	int	sub;
47	int	type;
48} Keyword;
49
50int peek(void);
51int gettok(char **, int *);
52int binsearch(const char *, const Keyword *, int);
53
54const Keyword keywords[] ={	/* keep sorted: binary searched */
55	{ "BEGIN",	XBEGIN,		XBEGIN },
56	{ "END",	XEND,		XEND },
57	{ "NF",		VARNF,		VARNF },
58	{ "atan2",	FATAN,		BLTIN },
59	{ "break",	BREAK,		BREAK },
60	{ "close",	CLOSE,		CLOSE },
61	{ "continue",	CONTINUE,	CONTINUE },
62	{ "cos",	FCOS,		BLTIN },
63	{ "delete",	DELETE,		DELETE },
64	{ "do",		DO,		DO },
65	{ "else",	ELSE,		ELSE },
66	{ "exit",	EXIT,		EXIT },
67	{ "exp",	FEXP,		BLTIN },
68	{ "fflush",	FFLUSH,		BLTIN },
69	{ "for",	FOR,		FOR },
70	{ "func",	FUNC,		FUNC },
71	{ "function",	FUNC,		FUNC },
72	{ "gensub",	GENSUB,		GENSUB },
73	{ "getline",	GETLINE,	GETLINE },
74	{ "gsub",	GSUB,		GSUB },
75	{ "if",		IF,		IF },
76	{ "in",		IN,		IN },
77	{ "index",	INDEX,		INDEX },
78	{ "int",	FINT,		BLTIN },
79	{ "length",	FLENGTH,	BLTIN },
80	{ "log",	FLOG,		BLTIN },
81	{ "match",	MATCHFCN,	MATCHFCN },
82	{ "next",	NEXT,		NEXT },
83	{ "nextfile",	NEXTFILE,	NEXTFILE },
84	{ "print",	PRINT,		PRINT },
85	{ "printf",	PRINTF,		PRINTF },
86	{ "rand",	FRAND,		BLTIN },
87	{ "return",	RETURN,		RETURN },
88	{ "sin",	FSIN,		BLTIN },
89	{ "split",	SPLIT,		SPLIT },
90	{ "sprintf",	SPRINTF,	SPRINTF },
91	{ "sqrt",	FSQRT,		BLTIN },
92	{ "srand",	FSRAND,		BLTIN },
93	{ "strftime",	FSTRFTIME,	BLTIN },
94	{ "sub",	SUB,		SUB },
95	{ "substr",	SUBSTR,		SUBSTR },
96	{ "system",	FSYSTEM,	BLTIN },
97	{ "systime",	FSYSTIME,	BLTIN },
98	{ "tolower",	FTOLOWER,	BLTIN },
99	{ "toupper",	FTOUPPER,	BLTIN },
100	{ "while",	WHILE,		WHILE },
101};
102
103#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
104
105int peek(void)
106{
107	int c = input();
108	unput(c);
109	return c;
110}
111
112int gettok(char **pbuf, int *psz)	/* get next input token */
113{
114	int c, retc;
115	uschar *buf = (uschar *) *pbuf;
116	int sz = *psz;
117	uschar *bp = buf;
118
119	c = input();
120	if (c == 0)
121		return 0;
122	buf[0] = c;
123	buf[1] = 0;
124	if (!isalnum(c) && c != '.' && c != '_')
125		return c;
126
127	*bp++ = c;
128	if (isalpha(c) || c == '_') {	/* it's a varname */
129		for ( ; (c = input()) != 0; ) {
130			if (bp-buf >= sz)
131				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
132					FATAL( "out of space for name %.10s...", buf );
133			if (isalnum(c) || c == '_')
134				*bp++ = c;
135			else {
136				*bp = 0;
137				unput(c);
138				break;
139			}
140		}
141		*bp = 0;
142		retc = 'a';	/* alphanumeric */
143	} else {	/* maybe it's a number, but could be . */
144		char *rem;
145		/* read input until can't be a number */
146		for ( ; (c = input()) != 0; ) {
147			if (bp-buf >= sz)
148				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
149					FATAL( "out of space for number %.10s...", buf );
150			if (isdigit(c) || c == 'e' || c == 'E'
151			  || c == '.' || c == '+' || c == '-')
152				*bp++ = c;
153			else {
154				unput(c);
155				break;
156			}
157		}
158		*bp = 0;
159		strtod(buf, &rem);	/* parse the number */
160		if (rem == (char *)buf) {	/* it wasn't a valid number at all */
161			buf[1] = 0;	/* return one character as token */
162			retc = buf[0];	/* character is its own type */
163			unputstr(rem+1); /* put rest back for later */
164		} else {	/* some prefix was a number */
165			unputstr(rem);	/* put rest back for later */
166			rem[0] = 0;	/* truncate buf after number part */
167			retc = '0';	/* type is number */
168		}
169	}
170	*pbuf = buf;
171	*psz = sz;
172	return retc;
173}
174
175int	word(char *);
176int	string(void);
177int	regexpr(void);
178int	sc	= 0;	/* 1 => return a } right now */
179int	reg	= 0;	/* 1 => return a REGEXPR now */
180
181int yylex(void)
182{
183	int c;
184	static char *buf = 0;
185	static int bufsize = 5; /* BUG: setting this small causes core dump! */
186
187	if (buf == 0 && (buf = malloc(bufsize)) == NULL)
188		FATAL( "out of space in yylex" );
189	if (sc) {
190		sc = 0;
191		RET('}');
192	}
193	if (reg) {
194		reg = 0;
195		return regexpr();
196	}
197	for (;;) {
198		c = gettok(&buf, &bufsize);
199		if (c == 0)
200			return 0;
201		if (isalpha(c) || c == '_')
202			return word(buf);
203		if (isdigit(c)) {
204			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
205			/* should this also have STR set? */
206			RET(NUMBER);
207		}
208
209		yylval.i = c;
210		switch (c) {
211		case '\n':	/* {EOL} */
212			RET(NL);
213		case '\r':	/* assume \n is coming */
214		case ' ':	/* {WS}+ */
215		case '\t':
216			break;
217		case '#':	/* #.* strip comments */
218			while ((c = input()) != '\n' && c != 0)
219				;
220			unput(c);
221			break;
222		case ';':
223			RET(';');
224		case '\\':
225			if (peek() == '\n') {
226				input();
227			} else if (peek() == '\r') {
228				input(); input();	/* \n */
229				lineno++;
230			} else {
231				RET(c);
232			}
233			break;
234		case '&':
235			if (peek() == '&') {
236				input(); RET(AND);
237			} else
238				RET('&');
239		case '|':
240			if (peek() == '|') {
241				input(); RET(BOR);
242			} else
243				RET('|');
244		case '!':
245			if (peek() == '=') {
246				input(); yylval.i = NE; RET(NE);
247			} else if (peek() == '~') {
248				input(); yylval.i = NOTMATCH; RET(MATCHOP);
249			} else
250				RET(NOT);
251		case '~':
252			yylval.i = MATCH;
253			RET(MATCHOP);
254		case '<':
255			if (peek() == '=') {
256				input(); yylval.i = LE; RET(LE);
257			} else {
258				yylval.i = LT; RET(LT);
259			}
260		case '=':
261			if (peek() == '=') {
262				input(); yylval.i = EQ; RET(EQ);
263			} else {
264				yylval.i = ASSIGN; RET(ASGNOP);
265			}
266		case '>':
267			if (peek() == '=') {
268				input(); yylval.i = GE; RET(GE);
269			} else if (peek() == '>') {
270				input(); yylval.i = APPEND; RET(APPEND);
271			} else {
272				yylval.i = GT; RET(GT);
273			}
274		case '+':
275			if (peek() == '+') {
276				input(); yylval.i = INCR; RET(INCR);
277			} else if (peek() == '=') {
278				input(); yylval.i = ADDEQ; RET(ASGNOP);
279			} else
280				RET('+');
281		case '-':
282			if (peek() == '-') {
283				input(); yylval.i = DECR; RET(DECR);
284			} else if (peek() == '=') {
285				input(); yylval.i = SUBEQ; RET(ASGNOP);
286			} else
287				RET('-');
288		case '*':
289			if (peek() == '=') {	/* *= */
290				input(); yylval.i = MULTEQ; RET(ASGNOP);
291			} else if (peek() == '*') {	/* ** or **= */
292				input();	/* eat 2nd * */
293				if (peek() == '=') {
294					input(); yylval.i = POWEQ; RET(ASGNOP);
295				} else {
296					RET(POWER);
297				}
298			} else
299				RET('*');
300		case '/':
301			RET('/');
302		case '%':
303			if (peek() == '=') {
304				input(); yylval.i = MODEQ; RET(ASGNOP);
305			} else
306				RET('%');
307		case '^':
308			if (peek() == '=') {
309				input(); yylval.i = POWEQ; RET(ASGNOP);
310			} else
311				RET(POWER);
312
313		case '$':
314			/* BUG: awkward, if not wrong */
315			c = gettok(&buf, &bufsize);
316			if (isalpha(c)) {
317				if (strcmp(buf, "NF") == 0) {	/* very special */
318					unputstr("(NF)");
319					RET(INDIRECT);
320				}
321				c = peek();
322				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
323					unputstr(buf);
324					RET(INDIRECT);
325				}
326				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
327				RET(IVAR);
328			} else if (c == 0) {	/*  */
329				SYNTAX( "unexpected end of input after $" );
330				RET(';');
331			} else {
332				unputstr(buf);
333				RET(INDIRECT);
334			}
335
336		case '}':
337			if (--bracecnt < 0)
338				SYNTAX( "extra }" );
339			sc = 1;
340			RET(';');
341		case ']':
342			if (--brackcnt < 0)
343				SYNTAX( "extra ]" );
344			RET(']');
345		case ')':
346			if (--parencnt < 0)
347				SYNTAX( "extra )" );
348			RET(')');
349		case '{':
350			bracecnt++;
351			RET('{');
352		case '[':
353			brackcnt++;
354			RET('[');
355		case '(':
356			parencnt++;
357			RET('(');
358
359		case '"':
360			return string();	/* BUG: should be like tran.c ? */
361
362		default:
363			RET(c);
364		}
365	}
366}
367
368int string(void)
369{
370	int c, n;
371	uschar *s, *bp;
372	static uschar *buf = 0;
373	static int bufsz = 500;
374
375	if (buf == 0 && (buf = malloc(bufsz)) == NULL)
376		FATAL("out of space for strings");
377	for (bp = buf; (c = input()) != '"'; ) {
378		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
379			FATAL("out of space for string %.10s...", buf);
380		switch (c) {
381		case '\n':
382		case '\r':
383		case 0:
384			SYNTAX( "non-terminated string %.10s...", buf );
385			lineno++;
386			if (c == 0)	/* hopeless */
387				FATAL( "giving up" );
388			break;
389		case '\\':
390			c = input();
391			switch (c) {
392			case '\n': break;
393			case '"': *bp++ = '"'; break;
394			case 'n': *bp++ = '\n'; break;
395			case 't': *bp++ = '\t'; break;
396			case 'f': *bp++ = '\f'; break;
397			case 'r': *bp++ = '\r'; break;
398			case 'b': *bp++ = '\b'; break;
399			case 'v': *bp++ = '\v'; break;
400			case 'a': *bp++ = '\007'; break;
401			case '\\': *bp++ = '\\'; break;
402
403			case '0': case '1': case '2': /* octal: \d \dd \ddd */
404			case '3': case '4': case '5': case '6': case '7':
405				n = c - '0';
406				if ((c = peek()) >= '0' && c < '8') {
407					n = 8 * n + input() - '0';
408					if ((c = peek()) >= '0' && c < '8')
409						n = 8 * n + input() - '0';
410				}
411				*bp++ = n;
412				break;
413
414			case 'x':	/* hex  \x0-9a-fA-F + */
415			    {	char xbuf[100], *px;
416				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
417					if (isdigit(c)
418					 || (c >= 'a' && c <= 'f')
419					 || (c >= 'A' && c <= 'F'))
420						*px++ = c;
421					else
422						break;
423				}
424				*px = 0;
425				unput(c);
426	  			sscanf(xbuf, "%x", &n);
427				*bp++ = n;
428				break;
429			    }
430
431			default:
432				WARNING("warning: escape sequence `\\%c' "
433				    "treated as plain `%c'", c, c);
434				*bp++ = c;
435				break;
436			}
437			break;
438		default:
439			*bp++ = c;
440			break;
441		}
442	}
443	*bp = 0;
444	s = tostring(buf);
445	*bp++ = ' '; *bp++ = 0;
446	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
447	RET(STRING);
448}
449
450
451int binsearch(const char *w, const Keyword *kp, int n)
452{
453	int cond, low, mid, high;
454
455	low = 0;
456	high = n - 1;
457	while (low <= high) {
458		mid = (low + high) / 2;
459		if ((cond = strcmp(w, kp[mid].word)) < 0)
460			high = mid - 1;
461		else if (cond > 0)
462			low = mid + 1;
463		else
464			return mid;
465	}
466	return -1;
467}
468
469int word(char *w)
470{
471	const Keyword *kp;
472	int c, n;
473
474	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
475/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
476	kp = keywords + n;
477	if (n != -1) {	/* found in table */
478		yylval.i = kp->sub;
479		switch (kp->type) {	/* special handling */
480		case BLTIN:
481			if (kp->sub == FSYSTEM && safe)
482				SYNTAX( "system is unsafe" );
483			RET(kp->type);
484		case FUNC:
485			if (infunc)
486				SYNTAX( "illegal nested function" );
487			RET(kp->type);
488		case RETURN:
489			if (!infunc)
490				SYNTAX( "return not in function" );
491			RET(kp->type);
492		case VARNF:
493			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
494			RET(VARNF);
495		default:
496			RET(kp->type);
497		}
498	}
499	c = peek();	/* look for '(' */
500	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
501		yylval.i = n;
502		RET(ARG);
503	} else {
504		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
505		if (c == '(') {
506			RET(CALL);
507		} else {
508			RET(VAR);
509		}
510	}
511}
512
513void startreg(void)	/* next call to yylex will return a regular expression */
514{
515	reg = 1;
516}
517
518int regexpr(void)
519{
520	int c;
521	static uschar *buf = 0;
522	static int bufsz = 500;
523	uschar *bp;
524
525	if (buf == 0 && (buf = malloc(bufsz)) == NULL)
526		FATAL("out of space for rex expr");
527	bp = buf;
528	for ( ; (c = input()) != '/' && c != 0; ) {
529		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
530			FATAL("out of space for reg expr %.10s...", buf);
531		if (c == '\n') {
532			SYNTAX( "newline in regular expression %.10s...", buf );
533			unput('\n');
534			break;
535		} else if (c == '\\') {
536			*bp++ = '\\';
537			*bp++ = input();
538		} else {
539			*bp++ = c;
540		}
541	}
542	*bp = 0;
543	if (c == 0)
544		SYNTAX("non-terminated regular expression %.10s...", buf);
545	yylval.s = tostring(buf);
546	unput('/');
547	RET(REGEXPR);
548}
549
550/* low-level lexical stuff, sort of inherited from lex */
551
552char	ebuf[300];
553char	*ep = ebuf;
554char	yysbuf[100];	/* pushback buffer */
555char	*yysptr = yysbuf;
556FILE	*yyin = 0;
557
558int input(void)	/* get next lexical input character */
559{
560	int c;
561	extern char *lexprog;
562
563	if (yysptr > yysbuf)
564		c = (uschar)*--yysptr;
565	else if (lexprog != NULL) {	/* awk '...' */
566		if ((c = (uschar)*lexprog) != 0)
567			lexprog++;
568	} else				/* awk -f ... */
569		c = pgetc();
570	if (c == '\n')
571		lineno++;
572	else if (c == EOF)
573		c = 0;
574	if (ep >= ebuf + sizeof ebuf)
575		ep = ebuf;
576	return *ep++ = c;
577}
578
579void unput(int c)	/* put lexical character back on input */
580{
581	if (c == '\n')
582		lineno--;
583	if (yysptr >= yysbuf + sizeof(yysbuf))
584		FATAL("pushed back too much: %.20s...", yysbuf);
585	*yysptr++ = c;
586	if (--ep < ebuf)
587		ep = ebuf + sizeof(ebuf) - 1;
588}
589
590void unputstr(const char *s)	/* put a string back on input */
591{
592	int i;
593
594	for (i = strlen(s)-1; i >= 0; i--)
595		unput(s[i]);
596}
597