db_lex.c revision 8876
18876Srgrimes/*
24Srgrimes * Mach Operating System
34Srgrimes * Copyright (c) 1991,1990 Carnegie Mellon University
44Srgrimes * All Rights Reserved.
58876Srgrimes *
64Srgrimes * Permission to use, copy, modify and distribute this software and its
74Srgrimes * documentation is hereby granted, provided that both the copyright
84Srgrimes * notice and this permission notice appear in all copies of the
94Srgrimes * software, derivative works or modified versions, and any portions
104Srgrimes * thereof, and that both notices appear in supporting documentation.
118876Srgrimes *
128876Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
134Srgrimes * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
144Srgrimes * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
158876Srgrimes *
164Srgrimes * Carnegie Mellon requests users of this software to return to
178876Srgrimes *
184Srgrimes *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
194Srgrimes *  School of Computer Science
204Srgrimes *  Carnegie Mellon University
214Srgrimes *  Pittsburgh PA 15213-3890
228876Srgrimes *
234Srgrimes * any improvements or extensions that they make and grant Carnegie the
244Srgrimes * rights to redistribute these changes.
254Srgrimes *
268876Srgrimes *	$Id: db_lex.c,v 1.5 1994/08/18 22:34:23 wollman Exp $
274Srgrimes */
28623Srgrimes
294Srgrimes/*
304Srgrimes *	Author: David B. Golub, Carnegie Mellon University
314Srgrimes *	Date:	7/90
324Srgrimes */
334Srgrimes/*
344Srgrimes * Lexical analyzer.
354Srgrimes */
362056Swollman#include <sys/param.h>
372056Swollman#include <sys/systm.h>
382056Swollman#include <ddb/ddb.h>
394Srgrimes#include <ddb/db_lex.h>
404Srgrimes
414Srgrimeschar	db_line[120];
424Srgrimeschar *	db_lp, *db_endlp;
434Srgrimes
442112Swollmanstatic int db_lex(void);
452112Swollman
464Srgrimesint
474Srgrimesdb_read_line()
484Srgrimes{
494Srgrimes	int	i;
504Srgrimes
514Srgrimes	i = db_readline(db_line, sizeof(db_line));
524Srgrimes	if (i == 0)
534Srgrimes	    return (0);	/* EOI */
544Srgrimes	db_lp = db_line;
554Srgrimes	db_endlp = db_lp + i;
564Srgrimes	return (i);
574Srgrimes}
584Srgrimes
594Srgrimesvoid
604Srgrimesdb_flush_line()
614Srgrimes{
624Srgrimes	db_lp = db_line;
634Srgrimes	db_endlp = db_line;
644Srgrimes}
654Srgrimes
664Srgrimesint	db_look_char = 0;
674Srgrimes
684Srgrimesint
694Srgrimesdb_read_char()
704Srgrimes{
714Srgrimes	int	c;
724Srgrimes
734Srgrimes	if (db_look_char != 0) {
744Srgrimes	    c = db_look_char;
754Srgrimes	    db_look_char = 0;
764Srgrimes	}
774Srgrimes	else if (db_lp >= db_endlp)
784Srgrimes	    c = -1;
798876Srgrimes	else
804Srgrimes	    c = *db_lp++;
814Srgrimes	return (c);
824Srgrimes}
834Srgrimes
844Srgrimesvoid
854Srgrimesdb_unread_char(c)
86798Swollman	int c;
874Srgrimes{
884Srgrimes	db_look_char = c;
894Srgrimes}
904Srgrimes
914Srgrimesint	db_look_token = 0;
924Srgrimes
934Srgrimesvoid
944Srgrimesdb_unread_token(t)
954Srgrimes	int	t;
964Srgrimes{
974Srgrimes	db_look_token = t;
984Srgrimes}
994Srgrimes
1004Srgrimesint
1014Srgrimesdb_read_token()
1024Srgrimes{
1034Srgrimes	int	t;
1044Srgrimes
1054Srgrimes	if (db_look_token) {
1064Srgrimes	    t = db_look_token;
1074Srgrimes	    db_look_token = 0;
1084Srgrimes	}
1094Srgrimes	else
1104Srgrimes	    t = db_lex();
1114Srgrimes	return (t);
1124Srgrimes}
1134Srgrimes
1144Srgrimesint	db_tok_number;
1154Srgrimeschar	db_tok_string[TOK_STRING_SIZE];
1164Srgrimes
1174Srgrimesint	db_radix = 16;
1184Srgrimes
1194Srgrimesvoid
1204Srgrimesdb_flush_lex()
1214Srgrimes{
1224Srgrimes	db_flush_line();
1234Srgrimes	db_look_char = 0;
1244Srgrimes	db_look_token = 0;
1254Srgrimes}
1264Srgrimes
1272112Swollmanstatic int
1284Srgrimesdb_lex()
1294Srgrimes{
1304Srgrimes	int	c;
1314Srgrimes
1324Srgrimes	c = db_read_char();
1334Srgrimes	while (c <= ' ' || c > '~') {
1344Srgrimes	    if (c == '\n' || c == -1)
1354Srgrimes		return (tEOL);
1364Srgrimes	    c = db_read_char();
1374Srgrimes	}
1384Srgrimes
1394Srgrimes	if (c >= '0' && c <= '9') {
1404Srgrimes	    /* number */
141798Swollman	    int	r, digit = 0;
1424Srgrimes
1434Srgrimes	    if (c > '0')
1444Srgrimes		r = db_radix;
1454Srgrimes	    else {
1464Srgrimes		c = db_read_char();
1474Srgrimes		if (c == 'O' || c == 'o')
1484Srgrimes		    r = 8;
1494Srgrimes		else if (c == 'T' || c == 't')
1504Srgrimes		    r = 10;
1514Srgrimes		else if (c == 'X' || c == 'x')
1524Srgrimes		    r = 16;
1534Srgrimes		else {
1544Srgrimes		    r = db_radix;
1554Srgrimes		    db_unread_char(c);
1564Srgrimes		}
1574Srgrimes		c = db_read_char();
1584Srgrimes	    }
1594Srgrimes	    db_tok_number = 0;
1604Srgrimes	    for (;;) {
1614Srgrimes		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
1624Srgrimes		    digit = c - '0';
1634Srgrimes		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
1644Srgrimes				     (c >= 'a' && c <= 'f'))) {
1654Srgrimes		    if (c >= 'a')
1664Srgrimes			digit = c - 'a' + 10;
1674Srgrimes		    else if (c >= 'A')
1684Srgrimes			digit = c - 'A' + 10;
1694Srgrimes		}
1704Srgrimes		else
1714Srgrimes		    break;
1724Srgrimes		db_tok_number = db_tok_number * r + digit;
1734Srgrimes		c = db_read_char();
1744Srgrimes	    }
1754Srgrimes	    if ((c >= '0' && c <= '9') ||
1764Srgrimes		(c >= 'A' && c <= 'Z') ||
1774Srgrimes		(c >= 'a' && c <= 'z') ||
1784Srgrimes		(c == '_'))
1794Srgrimes	    {
1804Srgrimes		db_error("Bad character in number\n");
1814Srgrimes		db_flush_lex();
1824Srgrimes		return (tEOF);
1834Srgrimes	    }
1844Srgrimes	    db_unread_char(c);
1854Srgrimes	    return (tNUMBER);
1864Srgrimes	}
1874Srgrimes	if ((c >= 'A' && c <= 'Z') ||
1884Srgrimes	    (c >= 'a' && c <= 'z') ||
1894Srgrimes	    c == '_' || c == '\\')
1904Srgrimes	{
1914Srgrimes	    /* string */
1924Srgrimes	    char *cp;
1934Srgrimes
1944Srgrimes	    cp = db_tok_string;
1954Srgrimes	    if (c == '\\') {
1964Srgrimes		c = db_read_char();
1974Srgrimes		if (c == '\n' || c == -1)
1984Srgrimes		    db_error("Bad escape\n");
1994Srgrimes	    }
2004Srgrimes	    *cp++ = c;
2014Srgrimes	    while (1) {
2024Srgrimes		c = db_read_char();
2034Srgrimes		if ((c >= 'A' && c <= 'Z') ||
2044Srgrimes		    (c >= 'a' && c <= 'z') ||
2054Srgrimes		    (c >= '0' && c <= '9') ||
2064Srgrimes		    c == '_' || c == '\\' || c == ':')
2074Srgrimes		{
2084Srgrimes		    if (c == '\\') {
2094Srgrimes			c = db_read_char();
2104Srgrimes			if (c == '\n' || c == -1)
2114Srgrimes			    db_error("Bad escape\n");
2124Srgrimes		    }
2134Srgrimes		    *cp++ = c;
2144Srgrimes		    if (cp == db_tok_string+sizeof(db_tok_string)) {
2154Srgrimes			db_error("String too long\n");
2164Srgrimes			db_flush_lex();
2174Srgrimes			return (tEOF);
2184Srgrimes		    }
2194Srgrimes		    continue;
2204Srgrimes		}
2214Srgrimes		else {
2224Srgrimes		    *cp = '\0';
2234Srgrimes		    break;
2244Srgrimes		}
2254Srgrimes	    }
2264Srgrimes	    db_unread_char(c);
2274Srgrimes	    return (tIDENT);
2284Srgrimes	}
2294Srgrimes
2304Srgrimes	switch (c) {
2314Srgrimes	    case '+':
2324Srgrimes		return (tPLUS);
2334Srgrimes	    case '-':
2344Srgrimes		return (tMINUS);
2354Srgrimes	    case '.':
2364Srgrimes		c = db_read_char();
2374Srgrimes		if (c == '.')
2384Srgrimes		    return (tDOTDOT);
2394Srgrimes		db_unread_char(c);
2404Srgrimes		return (tDOT);
2414Srgrimes	    case '*':
2424Srgrimes		return (tSTAR);
2434Srgrimes	    case '/':
2444Srgrimes		return (tSLASH);
2454Srgrimes	    case '=':
2464Srgrimes		return (tEQ);
2474Srgrimes	    case '%':
2484Srgrimes		return (tPCT);
2494Srgrimes	    case '#':
2504Srgrimes		return (tHASH);
2514Srgrimes	    case '(':
2524Srgrimes		return (tLPAREN);
2534Srgrimes	    case ')':
2544Srgrimes		return (tRPAREN);
2554Srgrimes	    case ',':
2564Srgrimes		return (tCOMMA);
2574Srgrimes	    case '"':
2584Srgrimes		return (tDITTO);
2594Srgrimes	    case '$':
2604Srgrimes		return (tDOLLAR);
2614Srgrimes	    case '!':
2624Srgrimes		return (tEXCL);
2634Srgrimes	    case '<':
2644Srgrimes		c = db_read_char();
2654Srgrimes		if (c == '<')
2664Srgrimes		    return (tSHIFT_L);
2674Srgrimes		db_unread_char(c);
2684Srgrimes		break;
2694Srgrimes	    case '>':
2704Srgrimes		c = db_read_char();
2714Srgrimes		if (c == '>')
2724Srgrimes		    return (tSHIFT_R);
2734Srgrimes		db_unread_char(c);
2744Srgrimes		break;
2754Srgrimes	    case -1:
2764Srgrimes		return (tEOF);
2774Srgrimes	}
2784Srgrimes	db_printf("Bad character\n");
2794Srgrimes	db_flush_lex();
2804Srgrimes	return (tEOF);
2814Srgrimes}
282