db_lex.c revision 623
14Srgrimes/*
24Srgrimes * Mach Operating System
34Srgrimes * Copyright (c) 1991,1990 Carnegie Mellon University
44Srgrimes * All Rights Reserved.
54Srgrimes *
64Srgrimes * Permission to use, copy, modify and distribute this software and its
74Srgrimes * documentation is hereby granted, provided that both the copyright
84Srgrimes * notice and this permission notice appear in all copies of the
94Srgrimes * software, derivative works or modified versions, and any portions
104Srgrimes * thereof, and that both notices appear in supporting documentation.
114Srgrimes *
124Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
134Srgrimes * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
144Srgrimes * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
154Srgrimes *
164Srgrimes * Carnegie Mellon requests users of this software to return to
174Srgrimes *
184Srgrimes *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
194Srgrimes *  School of Computer Science
204Srgrimes *  Carnegie Mellon University
214Srgrimes *  Pittsburgh PA 15213-3890
224Srgrimes *
234Srgrimes * any improvements or extensions that they make and grant Carnegie the
244Srgrimes * rights to redistribute these changes.
254Srgrimes *
26623Srgrimes *	$Id$
274Srgrimes */
28623Srgrimes
294Srgrimes/*
304Srgrimes *	Author: David B. Golub, Carnegie Mellon University
314Srgrimes *	Date:	7/90
324Srgrimes */
334Srgrimes/*
344Srgrimes * Lexical analyzer.
354Srgrimes */
364Srgrimes#include <ddb/db_lex.h>
374Srgrimes
384Srgrimeschar	db_line[120];
394Srgrimeschar *	db_lp, *db_endlp;
404Srgrimes
414Srgrimesint
424Srgrimesdb_read_line()
434Srgrimes{
444Srgrimes	int	i;
454Srgrimes
464Srgrimes	i = db_readline(db_line, sizeof(db_line));
474Srgrimes	if (i == 0)
484Srgrimes	    return (0);	/* EOI */
494Srgrimes	db_lp = db_line;
504Srgrimes	db_endlp = db_lp + i;
514Srgrimes	return (i);
524Srgrimes}
534Srgrimes
544Srgrimesvoid
554Srgrimesdb_flush_line()
564Srgrimes{
574Srgrimes	db_lp = db_line;
584Srgrimes	db_endlp = db_line;
594Srgrimes}
604Srgrimes
614Srgrimesint	db_look_char = 0;
624Srgrimes
634Srgrimesint
644Srgrimesdb_read_char()
654Srgrimes{
664Srgrimes	int	c;
674Srgrimes
684Srgrimes	if (db_look_char != 0) {
694Srgrimes	    c = db_look_char;
704Srgrimes	    db_look_char = 0;
714Srgrimes	}
724Srgrimes	else if (db_lp >= db_endlp)
734Srgrimes	    c = -1;
744Srgrimes	else
754Srgrimes	    c = *db_lp++;
764Srgrimes	return (c);
774Srgrimes}
784Srgrimes
794Srgrimesvoid
804Srgrimesdb_unread_char(c)
814Srgrimes{
824Srgrimes	db_look_char = c;
834Srgrimes}
844Srgrimes
854Srgrimesint	db_look_token = 0;
864Srgrimes
874Srgrimesvoid
884Srgrimesdb_unread_token(t)
894Srgrimes	int	t;
904Srgrimes{
914Srgrimes	db_look_token = t;
924Srgrimes}
934Srgrimes
944Srgrimesint
954Srgrimesdb_read_token()
964Srgrimes{
974Srgrimes	int	t;
984Srgrimes
994Srgrimes	if (db_look_token) {
1004Srgrimes	    t = db_look_token;
1014Srgrimes	    db_look_token = 0;
1024Srgrimes	}
1034Srgrimes	else
1044Srgrimes	    t = db_lex();
1054Srgrimes	return (t);
1064Srgrimes}
1074Srgrimes
1084Srgrimesint	db_tok_number;
1094Srgrimeschar	db_tok_string[TOK_STRING_SIZE];
1104Srgrimes
1114Srgrimesint	db_radix = 16;
1124Srgrimes
1134Srgrimesvoid
1144Srgrimesdb_flush_lex()
1154Srgrimes{
1164Srgrimes	db_flush_line();
1174Srgrimes	db_look_char = 0;
1184Srgrimes	db_look_token = 0;
1194Srgrimes}
1204Srgrimes
1214Srgrimesint
1224Srgrimesdb_lex()
1234Srgrimes{
1244Srgrimes	int	c;
1254Srgrimes
1264Srgrimes	c = db_read_char();
1274Srgrimes	while (c <= ' ' || c > '~') {
1284Srgrimes	    if (c == '\n' || c == -1)
1294Srgrimes		return (tEOL);
1304Srgrimes	    c = db_read_char();
1314Srgrimes	}
1324Srgrimes
1334Srgrimes	if (c >= '0' && c <= '9') {
1344Srgrimes	    /* number */
1354Srgrimes	    int	r, digit;
1364Srgrimes
1374Srgrimes	    if (c > '0')
1384Srgrimes		r = db_radix;
1394Srgrimes	    else {
1404Srgrimes		c = db_read_char();
1414Srgrimes		if (c == 'O' || c == 'o')
1424Srgrimes		    r = 8;
1434Srgrimes		else if (c == 'T' || c == 't')
1444Srgrimes		    r = 10;
1454Srgrimes		else if (c == 'X' || c == 'x')
1464Srgrimes		    r = 16;
1474Srgrimes		else {
1484Srgrimes		    r = db_radix;
1494Srgrimes		    db_unread_char(c);
1504Srgrimes		}
1514Srgrimes		c = db_read_char();
1524Srgrimes	    }
1534Srgrimes	    db_tok_number = 0;
1544Srgrimes	    for (;;) {
1554Srgrimes		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
1564Srgrimes		    digit = c - '0';
1574Srgrimes		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
1584Srgrimes				     (c >= 'a' && c <= 'f'))) {
1594Srgrimes		    if (c >= 'a')
1604Srgrimes			digit = c - 'a' + 10;
1614Srgrimes		    else if (c >= 'A')
1624Srgrimes			digit = c - 'A' + 10;
1634Srgrimes		}
1644Srgrimes		else
1654Srgrimes		    break;
1664Srgrimes		db_tok_number = db_tok_number * r + digit;
1674Srgrimes		c = db_read_char();
1684Srgrimes	    }
1694Srgrimes	    if ((c >= '0' && c <= '9') ||
1704Srgrimes		(c >= 'A' && c <= 'Z') ||
1714Srgrimes		(c >= 'a' && c <= 'z') ||
1724Srgrimes		(c == '_'))
1734Srgrimes	    {
1744Srgrimes		db_error("Bad character in number\n");
1754Srgrimes		db_flush_lex();
1764Srgrimes		return (tEOF);
1774Srgrimes	    }
1784Srgrimes	    db_unread_char(c);
1794Srgrimes	    return (tNUMBER);
1804Srgrimes	}
1814Srgrimes	if ((c >= 'A' && c <= 'Z') ||
1824Srgrimes	    (c >= 'a' && c <= 'z') ||
1834Srgrimes	    c == '_' || c == '\\')
1844Srgrimes	{
1854Srgrimes	    /* string */
1864Srgrimes	    char *cp;
1874Srgrimes
1884Srgrimes	    cp = db_tok_string;
1894Srgrimes	    if (c == '\\') {
1904Srgrimes		c = db_read_char();
1914Srgrimes		if (c == '\n' || c == -1)
1924Srgrimes		    db_error("Bad escape\n");
1934Srgrimes	    }
1944Srgrimes	    *cp++ = c;
1954Srgrimes	    while (1) {
1964Srgrimes		c = db_read_char();
1974Srgrimes		if ((c >= 'A' && c <= 'Z') ||
1984Srgrimes		    (c >= 'a' && c <= 'z') ||
1994Srgrimes		    (c >= '0' && c <= '9') ||
2004Srgrimes		    c == '_' || c == '\\' || c == ':')
2014Srgrimes		{
2024Srgrimes		    if (c == '\\') {
2034Srgrimes			c = db_read_char();
2044Srgrimes			if (c == '\n' || c == -1)
2054Srgrimes			    db_error("Bad escape\n");
2064Srgrimes		    }
2074Srgrimes		    *cp++ = c;
2084Srgrimes		    if (cp == db_tok_string+sizeof(db_tok_string)) {
2094Srgrimes			db_error("String too long\n");
2104Srgrimes			db_flush_lex();
2114Srgrimes			return (tEOF);
2124Srgrimes		    }
2134Srgrimes		    continue;
2144Srgrimes		}
2154Srgrimes		else {
2164Srgrimes		    *cp = '\0';
2174Srgrimes		    break;
2184Srgrimes		}
2194Srgrimes	    }
2204Srgrimes	    db_unread_char(c);
2214Srgrimes	    return (tIDENT);
2224Srgrimes	}
2234Srgrimes
2244Srgrimes	switch (c) {
2254Srgrimes	    case '+':
2264Srgrimes		return (tPLUS);
2274Srgrimes	    case '-':
2284Srgrimes		return (tMINUS);
2294Srgrimes	    case '.':
2304Srgrimes		c = db_read_char();
2314Srgrimes		if (c == '.')
2324Srgrimes		    return (tDOTDOT);
2334Srgrimes		db_unread_char(c);
2344Srgrimes		return (tDOT);
2354Srgrimes	    case '*':
2364Srgrimes		return (tSTAR);
2374Srgrimes	    case '/':
2384Srgrimes		return (tSLASH);
2394Srgrimes	    case '=':
2404Srgrimes		return (tEQ);
2414Srgrimes	    case '%':
2424Srgrimes		return (tPCT);
2434Srgrimes	    case '#':
2444Srgrimes		return (tHASH);
2454Srgrimes	    case '(':
2464Srgrimes		return (tLPAREN);
2474Srgrimes	    case ')':
2484Srgrimes		return (tRPAREN);
2494Srgrimes	    case ',':
2504Srgrimes		return (tCOMMA);
2514Srgrimes	    case '"':
2524Srgrimes		return (tDITTO);
2534Srgrimes	    case '$':
2544Srgrimes		return (tDOLLAR);
2554Srgrimes	    case '!':
2564Srgrimes		return (tEXCL);
2574Srgrimes	    case '<':
2584Srgrimes		c = db_read_char();
2594Srgrimes		if (c == '<')
2604Srgrimes		    return (tSHIFT_L);
2614Srgrimes		db_unread_char(c);
2624Srgrimes		break;
2634Srgrimes	    case '>':
2644Srgrimes		c = db_read_char();
2654Srgrimes		if (c == '>')
2664Srgrimes		    return (tSHIFT_R);
2674Srgrimes		db_unread_char(c);
2684Srgrimes		break;
2694Srgrimes	    case -1:
2704Srgrimes		return (tEOF);
2714Srgrimes	}
2724Srgrimes	db_printf("Bad character\n");
2734Srgrimes	db_flush_lex();
2744Srgrimes	return (tEOF);
2754Srgrimes}
276