db_lex.c revision 37504
18876Srgrimes/*
24Srgrimes * Mach Operating System
34Srgrimes * Copyright (c) 1991,1990 Carnegie Mellon University
44Srgrimes * All Rights Reserved.
58876Srgrimes *
64Srgrimes * Permission to use, copy, modify and distribute this software and its
74Srgrimes * documentation is hereby granted, provided that both the copyright
84Srgrimes * notice and this permission notice appear in all copies of the
94Srgrimes * software, derivative works or modified versions, and any portions
104Srgrimes * thereof, and that both notices appear in supporting documentation.
118876Srgrimes *
128876Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
134Srgrimes * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
144Srgrimes * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
158876Srgrimes *
164Srgrimes * Carnegie Mellon requests users of this software to return to
178876Srgrimes *
184Srgrimes *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
194Srgrimes *  School of Computer Science
204Srgrimes *  Carnegie Mellon University
214Srgrimes *  Pittsburgh PA 15213-3890
228876Srgrimes *
234Srgrimes * any improvements or extensions that they make and grant Carnegie the
244Srgrimes * rights to redistribute these changes.
254Srgrimes *
2637504Sbde *	$Id: db_lex.c,v 1.16 1998/06/27 15:40:56 dfr Exp $
274Srgrimes */
28623Srgrimes
294Srgrimes/*
304Srgrimes *	Author: David B. Golub, Carnegie Mellon University
314Srgrimes *	Date:	7/90
324Srgrimes */
334Srgrimes/*
344Srgrimes * Lexical analyzer.
354Srgrimes */
362056Swollman#include <sys/param.h>
3712734Sbde
382056Swollman#include <ddb/ddb.h>
394Srgrimes#include <ddb/db_lex.h>
404Srgrimes
4112720Sphkstatic char	db_line[120];
4212720Sphkstatic char *	db_lp, *db_endlp;
434Srgrimes
4412473Sbdestatic int	db_lex __P((void));
4512515Sphkstatic void 	db_flush_line __P((void));
4612515Sphkstatic int 	db_read_char __P((void));
4712515Sphkstatic void 	db_unread_char __P((int));
482112Swollman
494Srgrimesint
504Srgrimesdb_read_line()
514Srgrimes{
524Srgrimes	int	i;
534Srgrimes
544Srgrimes	i = db_readline(db_line, sizeof(db_line));
554Srgrimes	if (i == 0)
564Srgrimes	    return (0);	/* EOI */
574Srgrimes	db_lp = db_line;
584Srgrimes	db_endlp = db_lp + i;
594Srgrimes	return (i);
604Srgrimes}
614Srgrimes
6212515Sphkstatic void
634Srgrimesdb_flush_line()
644Srgrimes{
654Srgrimes	db_lp = db_line;
664Srgrimes	db_endlp = db_line;
674Srgrimes}
684Srgrimes
6912515Sphkstatic int	db_look_char = 0;
704Srgrimes
7112515Sphkstatic int
724Srgrimesdb_read_char()
734Srgrimes{
744Srgrimes	int	c;
754Srgrimes
764Srgrimes	if (db_look_char != 0) {
774Srgrimes	    c = db_look_char;
784Srgrimes	    db_look_char = 0;
794Srgrimes	}
804Srgrimes	else if (db_lp >= db_endlp)
814Srgrimes	    c = -1;
828876Srgrimes	else
834Srgrimes	    c = *db_lp++;
844Srgrimes	return (c);
854Srgrimes}
864Srgrimes
8712515Sphkstatic void
884Srgrimesdb_unread_char(c)
89798Swollman	int c;
904Srgrimes{
914Srgrimes	db_look_char = c;
924Srgrimes}
934Srgrimes
9412515Sphkstatic int	db_look_token = 0;
954Srgrimes
964Srgrimesvoid
974Srgrimesdb_unread_token(t)
984Srgrimes	int	t;
994Srgrimes{
1004Srgrimes	db_look_token = t;
1014Srgrimes}
1024Srgrimes
1034Srgrimesint
1044Srgrimesdb_read_token()
1054Srgrimes{
1064Srgrimes	int	t;
1074Srgrimes
1084Srgrimes	if (db_look_token) {
1094Srgrimes	    t = db_look_token;
1104Srgrimes	    db_look_token = 0;
1114Srgrimes	}
1124Srgrimes	else
1134Srgrimes	    t = db_lex();
1144Srgrimes	return (t);
1154Srgrimes}
1164Srgrimes
11737504Sbdedb_expr_t	db_tok_number;
1184Srgrimeschar	db_tok_string[TOK_STRING_SIZE];
1194Srgrimes
12037504Sbdedb_expr_t	db_radix = 16;
1214Srgrimes
1224Srgrimesvoid
1234Srgrimesdb_flush_lex()
1244Srgrimes{
1254Srgrimes	db_flush_line();
1264Srgrimes	db_look_char = 0;
1274Srgrimes	db_look_token = 0;
1284Srgrimes}
1294Srgrimes
1302112Swollmanstatic int
1314Srgrimesdb_lex()
1324Srgrimes{
1334Srgrimes	int	c;
1344Srgrimes
1354Srgrimes	c = db_read_char();
1364Srgrimes	while (c <= ' ' || c > '~') {
1374Srgrimes	    if (c == '\n' || c == -1)
1384Srgrimes		return (tEOL);
1394Srgrimes	    c = db_read_char();
1404Srgrimes	}
1414Srgrimes
1424Srgrimes	if (c >= '0' && c <= '9') {
1434Srgrimes	    /* number */
144798Swollman	    int	r, digit = 0;
1454Srgrimes
1464Srgrimes	    if (c > '0')
1474Srgrimes		r = db_radix;
1484Srgrimes	    else {
1494Srgrimes		c = db_read_char();
1504Srgrimes		if (c == 'O' || c == 'o')
1514Srgrimes		    r = 8;
1524Srgrimes		else if (c == 'T' || c == 't')
1534Srgrimes		    r = 10;
1544Srgrimes		else if (c == 'X' || c == 'x')
1554Srgrimes		    r = 16;
1564Srgrimes		else {
1574Srgrimes		    r = db_radix;
1584Srgrimes		    db_unread_char(c);
1594Srgrimes		}
1604Srgrimes		c = db_read_char();
1614Srgrimes	    }
1624Srgrimes	    db_tok_number = 0;
1634Srgrimes	    for (;;) {
1644Srgrimes		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
1654Srgrimes		    digit = c - '0';
1664Srgrimes		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
1674Srgrimes				     (c >= 'a' && c <= 'f'))) {
1684Srgrimes		    if (c >= 'a')
1694Srgrimes			digit = c - 'a' + 10;
1704Srgrimes		    else if (c >= 'A')
1714Srgrimes			digit = c - 'A' + 10;
1724Srgrimes		}
1734Srgrimes		else
1744Srgrimes		    break;
1754Srgrimes		db_tok_number = db_tok_number * r + digit;
1764Srgrimes		c = db_read_char();
1774Srgrimes	    }
1784Srgrimes	    if ((c >= '0' && c <= '9') ||
1794Srgrimes		(c >= 'A' && c <= 'Z') ||
1804Srgrimes		(c >= 'a' && c <= 'z') ||
1814Srgrimes		(c == '_'))
1824Srgrimes	    {
1834Srgrimes		db_error("Bad character in number\n");
1844Srgrimes		db_flush_lex();
1854Srgrimes		return (tEOF);
1864Srgrimes	    }
1874Srgrimes	    db_unread_char(c);
1884Srgrimes	    return (tNUMBER);
1894Srgrimes	}
1904Srgrimes	if ((c >= 'A' && c <= 'Z') ||
1914Srgrimes	    (c >= 'a' && c <= 'z') ||
1924Srgrimes	    c == '_' || c == '\\')
1934Srgrimes	{
1944Srgrimes	    /* string */
1954Srgrimes	    char *cp;
1964Srgrimes
1974Srgrimes	    cp = db_tok_string;
1984Srgrimes	    if (c == '\\') {
1994Srgrimes		c = db_read_char();
2004Srgrimes		if (c == '\n' || c == -1)
2014Srgrimes		    db_error("Bad escape\n");
2024Srgrimes	    }
2034Srgrimes	    *cp++ = c;
2044Srgrimes	    while (1) {
2054Srgrimes		c = db_read_char();
2064Srgrimes		if ((c >= 'A' && c <= 'Z') ||
2074Srgrimes		    (c >= 'a' && c <= 'z') ||
2084Srgrimes		    (c >= '0' && c <= '9') ||
2094Srgrimes		    c == '_' || c == '\\' || c == ':')
2104Srgrimes		{
2114Srgrimes		    if (c == '\\') {
2124Srgrimes			c = db_read_char();
2134Srgrimes			if (c == '\n' || c == -1)
2144Srgrimes			    db_error("Bad escape\n");
2154Srgrimes		    }
2164Srgrimes		    *cp++ = c;
2174Srgrimes		    if (cp == db_tok_string+sizeof(db_tok_string)) {
2184Srgrimes			db_error("String too long\n");
2194Srgrimes			db_flush_lex();
2204Srgrimes			return (tEOF);
2214Srgrimes		    }
2224Srgrimes		    continue;
2234Srgrimes		}
2244Srgrimes		else {
2254Srgrimes		    *cp = '\0';
2264Srgrimes		    break;
2274Srgrimes		}
2284Srgrimes	    }
2294Srgrimes	    db_unread_char(c);
2304Srgrimes	    return (tIDENT);
2314Srgrimes	}
2324Srgrimes
2334Srgrimes	switch (c) {
2344Srgrimes	    case '+':
2354Srgrimes		return (tPLUS);
2364Srgrimes	    case '-':
2374Srgrimes		return (tMINUS);
2384Srgrimes	    case '.':
2394Srgrimes		c = db_read_char();
2404Srgrimes		if (c == '.')
2414Srgrimes		    return (tDOTDOT);
2424Srgrimes		db_unread_char(c);
2434Srgrimes		return (tDOT);
2444Srgrimes	    case '*':
2454Srgrimes		return (tSTAR);
2464Srgrimes	    case '/':
2474Srgrimes		return (tSLASH);
2484Srgrimes	    case '=':
2494Srgrimes		return (tEQ);
2504Srgrimes	    case '%':
2514Srgrimes		return (tPCT);
2524Srgrimes	    case '#':
2534Srgrimes		return (tHASH);
2544Srgrimes	    case '(':
2554Srgrimes		return (tLPAREN);
2564Srgrimes	    case ')':
2574Srgrimes		return (tRPAREN);
2584Srgrimes	    case ',':
2594Srgrimes		return (tCOMMA);
2604Srgrimes	    case '"':
2614Srgrimes		return (tDITTO);
2624Srgrimes	    case '$':
2634Srgrimes		return (tDOLLAR);
2644Srgrimes	    case '!':
2654Srgrimes		return (tEXCL);
2664Srgrimes	    case '<':
2674Srgrimes		c = db_read_char();
2684Srgrimes		if (c == '<')
2694Srgrimes		    return (tSHIFT_L);
2704Srgrimes		db_unread_char(c);
2714Srgrimes		break;
2724Srgrimes	    case '>':
2734Srgrimes		c = db_read_char();
2744Srgrimes		if (c == '>')
2754Srgrimes		    return (tSHIFT_R);
2764Srgrimes		db_unread_char(c);
2774Srgrimes		break;
2784Srgrimes	    case -1:
2794Srgrimes		return (tEOF);
2804Srgrimes	}
2814Srgrimes	db_printf("Bad character\n");
2824Srgrimes	db_flush_lex();
2834Srgrimes	return (tEOF);
2844Srgrimes}
285