lex.c revision 85587
185587Sobrien/****************************************************************
285587SobrienCopyright (C) Lucent Technologies 1997
385587SobrienAll Rights Reserved
485587Sobrien
585587SobrienPermission to use, copy, modify, and distribute this software and
685587Sobrienits documentation for any purpose and without fee is hereby
785587Sobriengranted, provided that the above copyright notice appear in all
885587Sobriencopies and that both that the copyright notice and this
985587Sobrienpermission notice and warranty disclaimer appear in supporting
1085587Sobriendocumentation, and that the name Lucent Technologies or any of
1185587Sobrienits entities not be used in advertising or publicity pertaining
1285587Sobriento distribution of the software without specific, written prior
1385587Sobrienpermission.
1485587Sobrien
1585587SobrienLUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1685587SobrienINCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1785587SobrienIN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
1885587SobrienSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1985587SobrienWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2085587SobrienIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2185587SobrienARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2285587SobrienTHIS SOFTWARE.
2385587Sobrien****************************************************************/
2485587Sobrien
2585587Sobrien#include <stdio.h>
2685587Sobrien#include <stdlib.h>
2785587Sobrien#include <string.h>
2885587Sobrien#include <ctype.h>
2985587Sobrien#include "awk.h"
3085587Sobrien#include "ytab.h"
3185587Sobrien
3285587Sobrienextern YYSTYPE	yylval;
3385587Sobrienextern int	infunc;
3485587Sobrien
3585587Sobrienint	lineno	= 1;
3685587Sobrienint	bracecnt = 0;
3785587Sobrienint	brackcnt  = 0;
3885587Sobrienint	parencnt = 0;
3985587Sobrien
4085587Sobrientypedef struct Keyword {
4185587Sobrien	char	*word;
4285587Sobrien	int	sub;
4385587Sobrien	int	type;
4485587Sobrien} Keyword;
4585587Sobrien
4685587SobrienKeyword keywords[] ={	/* keep sorted: binary searched */
4785587Sobrien	{ "BEGIN",	XBEGIN,		XBEGIN },
4885587Sobrien	{ "END",	XEND,		XEND },
4985587Sobrien	{ "NF",		VARNF,		VARNF },
5085587Sobrien	{ "atan2",	FATAN,		BLTIN },
5185587Sobrien	{ "break",	BREAK,		BREAK },
5285587Sobrien	{ "close",	CLOSE,		CLOSE },
5385587Sobrien	{ "continue",	CONTINUE,	CONTINUE },
5485587Sobrien	{ "cos",	FCOS,		BLTIN },
5585587Sobrien	{ "delete",	DELETE,		DELETE },
5685587Sobrien	{ "do",		DO,		DO },
5785587Sobrien	{ "else",	ELSE,		ELSE },
5885587Sobrien	{ "exit",	EXIT,		EXIT },
5985587Sobrien	{ "exp",	FEXP,		BLTIN },
6085587Sobrien	{ "fflush",	FFLUSH,		BLTIN },
6185587Sobrien	{ "for",	FOR,		FOR },
6285587Sobrien	{ "func",	FUNC,		FUNC },
6385587Sobrien	{ "function",	FUNC,		FUNC },
6485587Sobrien	{ "getline",	GETLINE,	GETLINE },
6585587Sobrien	{ "gsub",	GSUB,		GSUB },
6685587Sobrien	{ "if",		IF,		IF },
6785587Sobrien	{ "in",		IN,		IN },
6885587Sobrien	{ "index",	INDEX,		INDEX },
6985587Sobrien	{ "int",	FINT,		BLTIN },
7085587Sobrien	{ "length",	FLENGTH,	BLTIN },
7185587Sobrien	{ "log",	FLOG,		BLTIN },
7285587Sobrien	{ "match",	MATCHFCN,	MATCHFCN },
7385587Sobrien	{ "next",	NEXT,		NEXT },
7485587Sobrien	{ "nextfile",	NEXTFILE,	NEXTFILE },
7585587Sobrien	{ "print",	PRINT,		PRINT },
7685587Sobrien	{ "printf",	PRINTF,		PRINTF },
7785587Sobrien	{ "rand",	FRAND,		BLTIN },
7885587Sobrien	{ "return",	RETURN,		RETURN },
7985587Sobrien	{ "sin",	FSIN,		BLTIN },
8085587Sobrien	{ "split",	SPLIT,		SPLIT },
8185587Sobrien	{ "sprintf",	SPRINTF,	SPRINTF },
8285587Sobrien	{ "sqrt",	FSQRT,		BLTIN },
8385587Sobrien	{ "srand",	FSRAND,		BLTIN },
8485587Sobrien	{ "sub",	SUB,		SUB },
8585587Sobrien	{ "substr",	SUBSTR,		SUBSTR },
8685587Sobrien	{ "system",	FSYSTEM,	BLTIN },
8785587Sobrien	{ "tolower",	FTOLOWER,	BLTIN },
8885587Sobrien	{ "toupper",	FTOUPPER,	BLTIN },
8985587Sobrien	{ "while",	WHILE,		WHILE },
9085587Sobrien};
9185587Sobrien
9285587Sobrien#define DEBUG
9385587Sobrien#ifdef	DEBUG
9485587Sobrien#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
9585587Sobrien#else
9685587Sobrien#define	RET(x)	return(x)
9785587Sobrien#endif
9885587Sobrien
9985587Sobrienint peek(void)
10085587Sobrien{
10185587Sobrien	int c = input();
10285587Sobrien	unput(c);
10385587Sobrien	return c;
10485587Sobrien}
10585587Sobrien
10685587Sobrienint gettok(char **pbuf, int *psz)	/* get next input token */
10785587Sobrien{
10885587Sobrien	int c;
10985587Sobrien	char *buf = *pbuf;
11085587Sobrien	int sz = *psz;
11185587Sobrien	char *bp = buf;
11285587Sobrien
11385587Sobrien	c = input();
11485587Sobrien	if (c == 0)
11585587Sobrien		return 0;
11685587Sobrien	buf[0] = c;
11785587Sobrien	buf[1] = 0;
11885587Sobrien	if (!isalnum(c) && c != '.' && c != '_')
11985587Sobrien		return c;
12085587Sobrien
12185587Sobrien	*bp++ = c;
12285587Sobrien	if (isalpha(c) || c == '_') {	/* it's a varname */
12385587Sobrien		for ( ; (c = input()) != 0; ) {
12485587Sobrien			if (bp-buf >= sz)
12585587Sobrien				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
12685587Sobrien					FATAL( "out of space for name %.10s...", buf );
12785587Sobrien			if (isalnum(c) || c == '_')
12885587Sobrien				*bp++ = c;
12985587Sobrien			else {
13085587Sobrien				*bp = 0;
13185587Sobrien				unput(c);
13285587Sobrien				break;
13385587Sobrien			}
13485587Sobrien		}
13585587Sobrien		*bp = 0;
13685587Sobrien	} else {	/* it's a number */
13785587Sobrien		char *rem;
13885587Sobrien		/* read input until can't be a number */
13985587Sobrien		for ( ; (c = input()) != 0; ) {
14085587Sobrien			if (bp-buf >= sz)
14185587Sobrien				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
14285587Sobrien					FATAL( "out of space for number %.10s...", buf );
14385587Sobrien			if (isdigit(c) || c == 'e' || c == 'E'
14485587Sobrien			  || c == '.' || c == '+' || c == '-')
14585587Sobrien				*bp++ = c;
14685587Sobrien			else {
14785587Sobrien				unput(c);
14885587Sobrien				break;
14985587Sobrien			}
15085587Sobrien		}
15185587Sobrien		*bp = 0;
15285587Sobrien		strtod(buf, &rem);	/* parse the number */
15385587Sobrien		unputstr(rem);		/* put rest back for later */
15485587Sobrien		rem[0] = 0;
15585587Sobrien	}
15685587Sobrien	*pbuf = buf;
15785587Sobrien	*psz = sz;
15885587Sobrien	return buf[0];
15985587Sobrien}
16085587Sobrien
16185587Sobrienint	word(char *);
16285587Sobrienint	string(void);
16385587Sobrienint	regexpr(void);
16485587Sobrienint	sc	= 0;	/* 1 => return a } right now */
16585587Sobrienint	reg	= 0;	/* 1 => return a REGEXPR now */
16685587Sobrien
16785587Sobrienint yylex(void)
16885587Sobrien{
16985587Sobrien	int c;
17085587Sobrien	static char *buf = 0;
17185587Sobrien	static int bufsize = 500;
17285587Sobrien
17385587Sobrien	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
17485587Sobrien		FATAL( "out of space in yylex" );
17585587Sobrien	if (sc) {
17685587Sobrien		sc = 0;
17785587Sobrien		RET('}');
17885587Sobrien	}
17985587Sobrien	if (reg) {
18085587Sobrien		reg = 0;
18185587Sobrien		return regexpr();
18285587Sobrien	}
18385587Sobrien	for (;;) {
18485587Sobrien		c = gettok(&buf, &bufsize);
18585587Sobrien		if (c == 0)
18685587Sobrien			return 0;
18785587Sobrien		if (isalpha(c) || c == '_')
18885587Sobrien			return word(buf);
18985587Sobrien		if (isdigit(c) || c == '.') {
19085587Sobrien			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
19185587Sobrien			/* should this also have STR set? */
19285587Sobrien			RET(NUMBER);
19385587Sobrien		}
19485587Sobrien
19585587Sobrien		yylval.i = c;
19685587Sobrien		switch (c) {
19785587Sobrien		case '\n':	/* {EOL} */
19885587Sobrien			RET(NL);
19985587Sobrien		case '\r':	/* assume \n is coming */
20085587Sobrien		case ' ':	/* {WS}+ */
20185587Sobrien		case '\t':
20285587Sobrien			break;
20385587Sobrien		case '#':	/* #.* strip comments */
20485587Sobrien			while ((c = input()) != '\n' && c != 0)
20585587Sobrien				;
20685587Sobrien			unput(c);
20785587Sobrien			break;
20885587Sobrien		case ';':
20985587Sobrien			RET(';');
21085587Sobrien		case '\\':
21185587Sobrien			if (peek() == '\n') {
21285587Sobrien				input();
21385587Sobrien			} else if (peek() == '\r') {
21485587Sobrien				input(); input();	/* \n */
21585587Sobrien				lineno++;
21685587Sobrien			} else {
21785587Sobrien				RET(c);
21885587Sobrien			}
21985587Sobrien			break;
22085587Sobrien		case '&':
22185587Sobrien			if (peek() == '&') {
22285587Sobrien				input(); RET(AND);
22385587Sobrien			} else
22485587Sobrien				RET('&');
22585587Sobrien		case '|':
22685587Sobrien			if (peek() == '|') {
22785587Sobrien				input(); RET(BOR);
22885587Sobrien			} else
22985587Sobrien				RET('|');
23085587Sobrien		case '!':
23185587Sobrien			if (peek() == '=') {
23285587Sobrien				input(); yylval.i = NE; RET(NE);
23385587Sobrien			} else if (peek() == '~') {
23485587Sobrien				input(); yylval.i = NOTMATCH; RET(MATCHOP);
23585587Sobrien			} else
23685587Sobrien				RET(NOT);
23785587Sobrien		case '~':
23885587Sobrien			yylval.i = MATCH;
23985587Sobrien			RET(MATCHOP);
24085587Sobrien		case '<':
24185587Sobrien			if (peek() == '=') {
24285587Sobrien				input(); yylval.i = LE; RET(LE);
24385587Sobrien			} else {
24485587Sobrien				yylval.i = LT; RET(LT);
24585587Sobrien			}
24685587Sobrien		case '=':
24785587Sobrien			if (peek() == '=') {
24885587Sobrien				input(); yylval.i = EQ; RET(EQ);
24985587Sobrien			} else {
25085587Sobrien				yylval.i = ASSIGN; RET(ASGNOP);
25185587Sobrien			}
25285587Sobrien		case '>':
25385587Sobrien			if (peek() == '=') {
25485587Sobrien				input(); yylval.i = GE; RET(GE);
25585587Sobrien			} else if (peek() == '>') {
25685587Sobrien				input(); yylval.i = APPEND; RET(APPEND);
25785587Sobrien			} else {
25885587Sobrien				yylval.i = GT; RET(GT);
25985587Sobrien			}
26085587Sobrien		case '+':
26185587Sobrien			if (peek() == '+') {
26285587Sobrien				input(); yylval.i = INCR; RET(INCR);
26385587Sobrien			} else if (peek() == '=') {
26485587Sobrien				input(); yylval.i = ADDEQ; RET(ASGNOP);
26585587Sobrien			} else
26685587Sobrien				RET('+');
26785587Sobrien		case '-':
26885587Sobrien			if (peek() == '-') {
26985587Sobrien				input(); yylval.i = DECR; RET(DECR);
27085587Sobrien			} else if (peek() == '=') {
27185587Sobrien				input(); yylval.i = SUBEQ; RET(ASGNOP);
27285587Sobrien			} else
27385587Sobrien				RET('-');
27485587Sobrien		case '*':
27585587Sobrien			if (peek() == '=') {	/* *= */
27685587Sobrien				input(); yylval.i = MULTEQ; RET(ASGNOP);
27785587Sobrien			} else if (peek() == '*') {	/* ** or **= */
27885587Sobrien				input();	/* eat 2nd * */
27985587Sobrien				if (peek() == '=') {
28085587Sobrien					input(); yylval.i = POWEQ; RET(ASGNOP);
28185587Sobrien				} else {
28285587Sobrien					RET(POWER);
28385587Sobrien				}
28485587Sobrien			} else
28585587Sobrien				RET('*');
28685587Sobrien		case '/':
28785587Sobrien			RET('/');
28885587Sobrien		case '%':
28985587Sobrien			if (peek() == '=') {
29085587Sobrien				input(); yylval.i = MODEQ; RET(ASGNOP);
29185587Sobrien			} else
29285587Sobrien				RET('%');
29385587Sobrien		case '^':
29485587Sobrien			if (peek() == '=') {
29585587Sobrien				input(); yylval.i = POWEQ; RET(ASGNOP);
29685587Sobrien			} else
29785587Sobrien				RET(POWER);
29885587Sobrien
29985587Sobrien		case '$':
30085587Sobrien			/* BUG: awkward, if not wrong */
30185587Sobrien			c = gettok(&buf, &bufsize);
30285587Sobrien			if (isalpha(c)) {
30385587Sobrien				if (strcmp(buf, "NF") == 0) {	/* very special */
30485587Sobrien					unputstr("(NF)");
30585587Sobrien					RET(INDIRECT);
30685587Sobrien				}
30785587Sobrien				c = peek();
30885587Sobrien				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
30985587Sobrien					unputstr(buf);
31085587Sobrien					RET(INDIRECT);
31185587Sobrien				}
31285587Sobrien				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
31385587Sobrien				RET(IVAR);
31485587Sobrien			} else {
31585587Sobrien				unputstr(buf);
31685587Sobrien				RET(INDIRECT);
31785587Sobrien			}
31885587Sobrien
31985587Sobrien		case '}':
32085587Sobrien			if (--bracecnt < 0)
32185587Sobrien				SYNTAX( "extra }" );
32285587Sobrien			sc = 1;
32385587Sobrien			RET(';');
32485587Sobrien		case ']':
32585587Sobrien			if (--brackcnt < 0)
32685587Sobrien				SYNTAX( "extra ]" );
32785587Sobrien			RET(']');
32885587Sobrien		case ')':
32985587Sobrien			if (--parencnt < 0)
33085587Sobrien				SYNTAX( "extra )" );
33185587Sobrien			RET(')');
33285587Sobrien		case '{':
33385587Sobrien			bracecnt++;
33485587Sobrien			RET('{');
33585587Sobrien		case '[':
33685587Sobrien			brackcnt++;
33785587Sobrien			RET('[');
33885587Sobrien		case '(':
33985587Sobrien			parencnt++;
34085587Sobrien			RET('(');
34185587Sobrien
34285587Sobrien		case '"':
34385587Sobrien			return string();	/* BUG: should be like tran.c ? */
34485587Sobrien
34585587Sobrien		default:
34685587Sobrien			RET(c);
34785587Sobrien		}
34885587Sobrien	}
34985587Sobrien}
35085587Sobrien
35185587Sobrienint string(void)
35285587Sobrien{
35385587Sobrien	int c, n;
35485587Sobrien	char *s, *bp;
35585587Sobrien	static char *buf = 0;
35685587Sobrien	static int bufsz = 500;
35785587Sobrien
35885587Sobrien	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
35985587Sobrien		FATAL("out of space for strings");
36085587Sobrien	for (bp = buf; (c = input()) != '"'; ) {
36185587Sobrien		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
36285587Sobrien			FATAL("out of space for string %.10s...", buf);
36385587Sobrien		switch (c) {
36485587Sobrien		case '\n':
36585587Sobrien		case '\r':
36685587Sobrien		case 0:
36785587Sobrien			SYNTAX( "non-terminated string %.10s...", buf );
36885587Sobrien			lineno++;
36985587Sobrien			break;
37085587Sobrien		case '\\':
37185587Sobrien			c = input();
37285587Sobrien			switch (c) {
37385587Sobrien			case '"': *bp++ = '"'; break;
37485587Sobrien			case 'n': *bp++ = '\n'; break;
37585587Sobrien			case 't': *bp++ = '\t'; break;
37685587Sobrien			case 'f': *bp++ = '\f'; break;
37785587Sobrien			case 'r': *bp++ = '\r'; break;
37885587Sobrien			case 'b': *bp++ = '\b'; break;
37985587Sobrien			case 'v': *bp++ = '\v'; break;
38085587Sobrien			case 'a': *bp++ = '\007'; break;
38185587Sobrien			case '\\': *bp++ = '\\'; break;
38285587Sobrien
38385587Sobrien			case '0': case '1': case '2': /* octal: \d \dd \ddd */
38485587Sobrien			case '3': case '4': case '5': case '6': case '7':
38585587Sobrien				n = c - '0';
38685587Sobrien				if ((c = peek()) >= '0' && c < '8') {
38785587Sobrien					n = 8 * n + input() - '0';
38885587Sobrien					if ((c = peek()) >= '0' && c < '8')
38985587Sobrien						n = 8 * n + input() - '0';
39085587Sobrien				}
39185587Sobrien				*bp++ = n;
39285587Sobrien				break;
39385587Sobrien
39485587Sobrien			case 'x':	/* hex  \x0-9a-fA-F + */
39585587Sobrien			    {	char xbuf[100], *px;
39685587Sobrien				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
39785587Sobrien					if (isdigit(c)
39885587Sobrien					 || (c >= 'a' && c <= 'f')
39985587Sobrien					 || (c >= 'A' && c <= 'F'))
40085587Sobrien						*px++ = c;
40185587Sobrien					else
40285587Sobrien						break;
40385587Sobrien				}
40485587Sobrien				*px = 0;
40585587Sobrien				unput(c);
40685587Sobrien	  			sscanf(xbuf, "%x", &n);
40785587Sobrien				*bp++ = n;
40885587Sobrien				break;
40985587Sobrien			    }
41085587Sobrien
41185587Sobrien			default:
41285587Sobrien				*bp++ = c;
41385587Sobrien				break;
41485587Sobrien			}
41585587Sobrien			break;
41685587Sobrien		default:
41785587Sobrien			*bp++ = c;
41885587Sobrien			break;
41985587Sobrien		}
42085587Sobrien	}
42185587Sobrien	*bp = 0;
42285587Sobrien	s = tostring(buf);
42385587Sobrien	*bp++ = ' '; *bp++ = 0;
42485587Sobrien	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
42585587Sobrien	RET(STRING);
42685587Sobrien}
42785587Sobrien
42885587Sobrien
42985587Sobrienint binsearch(char *w, Keyword *kp, int n)
43085587Sobrien{
43185587Sobrien	int cond, low, mid, high;
43285587Sobrien
43385587Sobrien	low = 0;
43485587Sobrien	high = n - 1;
43585587Sobrien	while (low <= high) {
43685587Sobrien		mid = (low + high) / 2;
43785587Sobrien		if ((cond = strcmp(w, kp[mid].word)) < 0)
43885587Sobrien			high = mid - 1;
43985587Sobrien		else if (cond > 0)
44085587Sobrien			low = mid + 1;
44185587Sobrien		else
44285587Sobrien			return mid;
44385587Sobrien	}
44485587Sobrien	return -1;
44585587Sobrien}
44685587Sobrien
44785587Sobrienint word(char *w)
44885587Sobrien{
44985587Sobrien	Keyword *kp;
45085587Sobrien	int c, n;
45185587Sobrien
45285587Sobrien	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
45385587Sobrien	kp = keywords + n;
45485587Sobrien	if (n != -1) {	/* found in table */
45585587Sobrien		yylval.i = kp->sub;
45685587Sobrien		switch (kp->type) {	/* special handling */
45785587Sobrien		case FSYSTEM:
45885587Sobrien			if (safe)
45985587Sobrien				SYNTAX( "system is unsafe" );
46085587Sobrien			RET(kp->type);
46185587Sobrien		case FUNC:
46285587Sobrien			if (infunc)
46385587Sobrien				SYNTAX( "illegal nested function" );
46485587Sobrien			RET(kp->type);
46585587Sobrien		case RETURN:
46685587Sobrien			if (!infunc)
46785587Sobrien				SYNTAX( "return not in function" );
46885587Sobrien			RET(kp->type);
46985587Sobrien		case VARNF:
47085587Sobrien			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
47185587Sobrien			RET(VARNF);
47285587Sobrien		default:
47385587Sobrien			RET(kp->type);
47485587Sobrien		}
47585587Sobrien	}
47685587Sobrien	c = peek();	/* look for '(' */
47785587Sobrien	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
47885587Sobrien		yylval.i = n;
47985587Sobrien		RET(ARG);
48085587Sobrien	} else {
48185587Sobrien		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
48285587Sobrien		if (c == '(') {
48385587Sobrien			RET(CALL);
48485587Sobrien		} else {
48585587Sobrien			RET(VAR);
48685587Sobrien		}
48785587Sobrien	}
48885587Sobrien}
48985587Sobrien
49085587Sobrienvoid startreg(void)	/* next call to yyles will return a regular expression */
49185587Sobrien{
49285587Sobrien	reg = 1;
49385587Sobrien}
49485587Sobrien
49585587Sobrienint regexpr(void)
49685587Sobrien{
49785587Sobrien	int c;
49885587Sobrien	static char *buf = 0;
49985587Sobrien	static int bufsz = 500;
50085587Sobrien	char *bp;
50185587Sobrien
50285587Sobrien	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
50385587Sobrien		FATAL("out of space for rex expr");
50485587Sobrien	bp = buf;
50585587Sobrien	for ( ; (c = input()) != '/' && c != 0; ) {
50685587Sobrien		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
50785587Sobrien			FATAL("out of space for reg expr %.10s...", buf);
50885587Sobrien		if (c == '\n') {
50985587Sobrien			SYNTAX( "newline in regular expression %.10s...", buf );
51085587Sobrien			unput('\n');
51185587Sobrien			break;
51285587Sobrien		} else if (c == '\\') {
51385587Sobrien			*bp++ = '\\';
51485587Sobrien			*bp++ = input();
51585587Sobrien		} else {
51685587Sobrien			*bp++ = c;
51785587Sobrien		}
51885587Sobrien	}
51985587Sobrien	*bp = 0;
52085587Sobrien	yylval.s = tostring(buf);
52185587Sobrien	unput('/');
52285587Sobrien	RET(REGEXPR);
52385587Sobrien}
52485587Sobrien
52585587Sobrien/* low-level lexical stuff, sort of inherited from lex */
52685587Sobrien
52785587Sobrienchar	ebuf[300];
52885587Sobrienchar	*ep = ebuf;
52985587Sobrienchar	yysbuf[100];	/* pushback buffer */
53085587Sobrienchar	*yysptr = yysbuf;
53185587SobrienFILE	*yyin = 0;
53285587Sobrien
53385587Sobrienint input(void)	/* get next lexical input character */
53485587Sobrien{
53585587Sobrien	int c;
53685587Sobrien	extern char *lexprog;
53785587Sobrien
53885587Sobrien	if (yysptr > yysbuf)
53985587Sobrien		c = *--yysptr;
54085587Sobrien	else if (lexprog != NULL) {	/* awk '...' */
54185587Sobrien		if ((c = *lexprog) != 0)
54285587Sobrien			lexprog++;
54385587Sobrien	} else				/* awk -f ... */
54485587Sobrien		c = pgetc();
54585587Sobrien	if (c == '\n')
54685587Sobrien		lineno++;
54785587Sobrien	else if (c == EOF)
54885587Sobrien		c = 0;
54985587Sobrien	if (ep >= ebuf + sizeof ebuf)
55085587Sobrien		ep = ebuf;
55185587Sobrien	return *ep++ = c;
55285587Sobrien}
55385587Sobrien
55485587Sobrienvoid unput(int c)	/* put lexical character back on input */
55585587Sobrien{
55685587Sobrien	if (c == '\n')
55785587Sobrien		lineno--;
55885587Sobrien	if (yysptr >= yysbuf + sizeof(yysbuf))
55985587Sobrien		FATAL("pushed back too much: %.20s...", yysbuf);
56085587Sobrien	*yysptr++ = c;
56185587Sobrien	if (--ep < ebuf)
56285587Sobrien		ep = ebuf + sizeof(ebuf) - 1;
56385587Sobrien}
56485587Sobrien
56585587Sobrienvoid unputstr(char *s)	/* put a string back on input */
56685587Sobrien{
56785587Sobrien	int i;
56885587Sobrien
56985587Sobrien	for (i = strlen(s)-1; i >= 0; i--)
57085587Sobrien		unput(s[i]);
57185587Sobrien}
572