db_lex.c revision 4
14Srgrimes/*
24Srgrimes * Mach Operating System
34Srgrimes * Copyright (c) 1991,1990 Carnegie Mellon University
44Srgrimes * All Rights Reserved.
54Srgrimes *
64Srgrimes * Permission to use, copy, modify and distribute this software and its
74Srgrimes * documentation is hereby granted, provided that both the copyright
84Srgrimes * notice and this permission notice appear in all copies of the
94Srgrimes * software, derivative works or modified versions, and any portions
104Srgrimes * thereof, and that both notices appear in supporting documentation.
114Srgrimes *
124Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
134Srgrimes * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
144Srgrimes * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
154Srgrimes *
164Srgrimes * Carnegie Mellon requests users of this software to return to
174Srgrimes *
184Srgrimes *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
194Srgrimes *  School of Computer Science
204Srgrimes *  Carnegie Mellon University
214Srgrimes *  Pittsburgh PA 15213-3890
224Srgrimes *
234Srgrimes * any improvements or extensions that they make and grant Carnegie the
244Srgrimes * rights to redistribute these changes.
254Srgrimes */
264Srgrimes/*
274Srgrimes * HISTORY
284Srgrimes * $Log: db_lex.c,v $
294Srgrimes * Revision 1.1  1992/03/25  21:45:13  pace
304Srgrimes * Initial revision
314Srgrimes *
324Srgrimes * Revision 2.3  91/02/05  17:06:36  mrt
334Srgrimes * 	Changed to new Mach copyright
344Srgrimes * 	[91/01/31  16:18:20  mrt]
354Srgrimes *
364Srgrimes * Revision 2.2  90/08/27  21:51:10  dbg
374Srgrimes * 	Add 'dotdot' token.
384Srgrimes * 	[90/08/22            dbg]
394Srgrimes *
404Srgrimes * 	Allow backslash to quote any character into an identifier.
414Srgrimes * 	Allow colon in identifier for symbol table qualification.
424Srgrimes * 	[90/08/16            dbg]
434Srgrimes * 	Reduce lint.
444Srgrimes * 	[90/08/07            dbg]
454Srgrimes * 	Created.
464Srgrimes * 	[90/07/25            dbg]
474Srgrimes *
484Srgrimes */
494Srgrimes/*
504Srgrimes *	Author: David B. Golub, Carnegie Mellon University
514Srgrimes *	Date:	7/90
524Srgrimes */
534Srgrimes/*
544Srgrimes * Lexical analyzer.
554Srgrimes */
564Srgrimes#include <ddb/db_lex.h>
574Srgrimes
584Srgrimeschar	db_line[120];
594Srgrimeschar *	db_lp, *db_endlp;
604Srgrimes
614Srgrimesint
624Srgrimesdb_read_line()
634Srgrimes{
644Srgrimes	int	i;
654Srgrimes
664Srgrimes	i = db_readline(db_line, sizeof(db_line));
674Srgrimes	if (i == 0)
684Srgrimes	    return (0);	/* EOI */
694Srgrimes	db_lp = db_line;
704Srgrimes	db_endlp = db_lp + i;
714Srgrimes	return (i);
724Srgrimes}
734Srgrimes
744Srgrimesvoid
754Srgrimesdb_flush_line()
764Srgrimes{
774Srgrimes	db_lp = db_line;
784Srgrimes	db_endlp = db_line;
794Srgrimes}
804Srgrimes
814Srgrimesint	db_look_char = 0;
824Srgrimes
834Srgrimesint
844Srgrimesdb_read_char()
854Srgrimes{
864Srgrimes	int	c;
874Srgrimes
884Srgrimes	if (db_look_char != 0) {
894Srgrimes	    c = db_look_char;
904Srgrimes	    db_look_char = 0;
914Srgrimes	}
924Srgrimes	else if (db_lp >= db_endlp)
934Srgrimes	    c = -1;
944Srgrimes	else
954Srgrimes	    c = *db_lp++;
964Srgrimes	return (c);
974Srgrimes}
984Srgrimes
994Srgrimesvoid
1004Srgrimesdb_unread_char(c)
1014Srgrimes{
1024Srgrimes	db_look_char = c;
1034Srgrimes}
1044Srgrimes
1054Srgrimesint	db_look_token = 0;
1064Srgrimes
1074Srgrimesvoid
1084Srgrimesdb_unread_token(t)
1094Srgrimes	int	t;
1104Srgrimes{
1114Srgrimes	db_look_token = t;
1124Srgrimes}
1134Srgrimes
1144Srgrimesint
1154Srgrimesdb_read_token()
1164Srgrimes{
1174Srgrimes	int	t;
1184Srgrimes
1194Srgrimes	if (db_look_token) {
1204Srgrimes	    t = db_look_token;
1214Srgrimes	    db_look_token = 0;
1224Srgrimes	}
1234Srgrimes	else
1244Srgrimes	    t = db_lex();
1254Srgrimes	return (t);
1264Srgrimes}
1274Srgrimes
1284Srgrimesint	db_tok_number;
1294Srgrimeschar	db_tok_string[TOK_STRING_SIZE];
1304Srgrimes
1314Srgrimesint	db_radix = 16;
1324Srgrimes
1334Srgrimesvoid
1344Srgrimesdb_flush_lex()
1354Srgrimes{
1364Srgrimes	db_flush_line();
1374Srgrimes	db_look_char = 0;
1384Srgrimes	db_look_token = 0;
1394Srgrimes}
1404Srgrimes
1414Srgrimesint
1424Srgrimesdb_lex()
1434Srgrimes{
1444Srgrimes	int	c;
1454Srgrimes
1464Srgrimes	c = db_read_char();
1474Srgrimes	while (c <= ' ' || c > '~') {
1484Srgrimes	    if (c == '\n' || c == -1)
1494Srgrimes		return (tEOL);
1504Srgrimes	    c = db_read_char();
1514Srgrimes	}
1524Srgrimes
1534Srgrimes	if (c >= '0' && c <= '9') {
1544Srgrimes	    /* number */
1554Srgrimes	    int	r, digit;
1564Srgrimes
1574Srgrimes	    if (c > '0')
1584Srgrimes		r = db_radix;
1594Srgrimes	    else {
1604Srgrimes		c = db_read_char();
1614Srgrimes		if (c == 'O' || c == 'o')
1624Srgrimes		    r = 8;
1634Srgrimes		else if (c == 'T' || c == 't')
1644Srgrimes		    r = 10;
1654Srgrimes		else if (c == 'X' || c == 'x')
1664Srgrimes		    r = 16;
1674Srgrimes		else {
1684Srgrimes		    r = db_radix;
1694Srgrimes		    db_unread_char(c);
1704Srgrimes		}
1714Srgrimes		c = db_read_char();
1724Srgrimes	    }
1734Srgrimes	    db_tok_number = 0;
1744Srgrimes	    for (;;) {
1754Srgrimes		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
1764Srgrimes		    digit = c - '0';
1774Srgrimes		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
1784Srgrimes				     (c >= 'a' && c <= 'f'))) {
1794Srgrimes		    if (c >= 'a')
1804Srgrimes			digit = c - 'a' + 10;
1814Srgrimes		    else if (c >= 'A')
1824Srgrimes			digit = c - 'A' + 10;
1834Srgrimes		}
1844Srgrimes		else
1854Srgrimes		    break;
1864Srgrimes		db_tok_number = db_tok_number * r + digit;
1874Srgrimes		c = db_read_char();
1884Srgrimes	    }
1894Srgrimes	    if ((c >= '0' && c <= '9') ||
1904Srgrimes		(c >= 'A' && c <= 'Z') ||
1914Srgrimes		(c >= 'a' && c <= 'z') ||
1924Srgrimes		(c == '_'))
1934Srgrimes	    {
1944Srgrimes		db_error("Bad character in number\n");
1954Srgrimes		db_flush_lex();
1964Srgrimes		return (tEOF);
1974Srgrimes	    }
1984Srgrimes	    db_unread_char(c);
1994Srgrimes	    return (tNUMBER);
2004Srgrimes	}
2014Srgrimes	if ((c >= 'A' && c <= 'Z') ||
2024Srgrimes	    (c >= 'a' && c <= 'z') ||
2034Srgrimes	    c == '_' || c == '\\')
2044Srgrimes	{
2054Srgrimes	    /* string */
2064Srgrimes	    char *cp;
2074Srgrimes
2084Srgrimes	    cp = db_tok_string;
2094Srgrimes	    if (c == '\\') {
2104Srgrimes		c = db_read_char();
2114Srgrimes		if (c == '\n' || c == -1)
2124Srgrimes		    db_error("Bad escape\n");
2134Srgrimes	    }
2144Srgrimes	    *cp++ = c;
2154Srgrimes	    while (1) {
2164Srgrimes		c = db_read_char();
2174Srgrimes		if ((c >= 'A' && c <= 'Z') ||
2184Srgrimes		    (c >= 'a' && c <= 'z') ||
2194Srgrimes		    (c >= '0' && c <= '9') ||
2204Srgrimes		    c == '_' || c == '\\' || c == ':')
2214Srgrimes		{
2224Srgrimes		    if (c == '\\') {
2234Srgrimes			c = db_read_char();
2244Srgrimes			if (c == '\n' || c == -1)
2254Srgrimes			    db_error("Bad escape\n");
2264Srgrimes		    }
2274Srgrimes		    *cp++ = c;
2284Srgrimes		    if (cp == db_tok_string+sizeof(db_tok_string)) {
2294Srgrimes			db_error("String too long\n");
2304Srgrimes			db_flush_lex();
2314Srgrimes			return (tEOF);
2324Srgrimes		    }
2334Srgrimes		    continue;
2344Srgrimes		}
2354Srgrimes		else {
2364Srgrimes		    *cp = '\0';
2374Srgrimes		    break;
2384Srgrimes		}
2394Srgrimes	    }
2404Srgrimes	    db_unread_char(c);
2414Srgrimes	    return (tIDENT);
2424Srgrimes	}
2434Srgrimes
2444Srgrimes	switch (c) {
2454Srgrimes	    case '+':
2464Srgrimes		return (tPLUS);
2474Srgrimes	    case '-':
2484Srgrimes		return (tMINUS);
2494Srgrimes	    case '.':
2504Srgrimes		c = db_read_char();
2514Srgrimes		if (c == '.')
2524Srgrimes		    return (tDOTDOT);
2534Srgrimes		db_unread_char(c);
2544Srgrimes		return (tDOT);
2554Srgrimes	    case '*':
2564Srgrimes		return (tSTAR);
2574Srgrimes	    case '/':
2584Srgrimes		return (tSLASH);
2594Srgrimes	    case '=':
2604Srgrimes		return (tEQ);
2614Srgrimes	    case '%':
2624Srgrimes		return (tPCT);
2634Srgrimes	    case '#':
2644Srgrimes		return (tHASH);
2654Srgrimes	    case '(':
2664Srgrimes		return (tLPAREN);
2674Srgrimes	    case ')':
2684Srgrimes		return (tRPAREN);
2694Srgrimes	    case ',':
2704Srgrimes		return (tCOMMA);
2714Srgrimes	    case '"':
2724Srgrimes		return (tDITTO);
2734Srgrimes	    case '$':
2744Srgrimes		return (tDOLLAR);
2754Srgrimes	    case '!':
2764Srgrimes		return (tEXCL);
2774Srgrimes	    case '<':
2784Srgrimes		c = db_read_char();
2794Srgrimes		if (c == '<')
2804Srgrimes		    return (tSHIFT_L);
2814Srgrimes		db_unread_char(c);
2824Srgrimes		break;
2834Srgrimes	    case '>':
2844Srgrimes		c = db_read_char();
2854Srgrimes		if (c == '>')
2864Srgrimes		    return (tSHIFT_R);
2874Srgrimes		db_unread_char(c);
2884Srgrimes		break;
2894Srgrimes	    case -1:
2904Srgrimes		return (tEOF);
2914Srgrimes	}
2924Srgrimes	db_printf("Bad character\n");
2934Srgrimes	db_flush_lex();
2944Srgrimes	return (tEOF);
2954Srgrimes}
296