db_lex.c revision 8876
18876Srgrimes/* 24Srgrimes * Mach Operating System 34Srgrimes * Copyright (c) 1991,1990 Carnegie Mellon University 44Srgrimes * All Rights Reserved. 58876Srgrimes * 64Srgrimes * Permission to use, copy, modify and distribute this software and its 74Srgrimes * documentation is hereby granted, provided that both the copyright 84Srgrimes * notice and this permission notice appear in all copies of the 94Srgrimes * software, derivative works or modified versions, and any portions 104Srgrimes * thereof, and that both notices appear in supporting documentation. 118876Srgrimes * 128876Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS 134Srgrimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 144Srgrimes * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 158876Srgrimes * 164Srgrimes * Carnegie Mellon requests users of this software to return to 178876Srgrimes * 184Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 194Srgrimes * School of Computer Science 204Srgrimes * Carnegie Mellon University 214Srgrimes * Pittsburgh PA 15213-3890 228876Srgrimes * 234Srgrimes * any improvements or extensions that they make and grant Carnegie the 244Srgrimes * rights to redistribute these changes. 254Srgrimes * 268876Srgrimes * $Id: db_lex.c,v 1.5 1994/08/18 22:34:23 wollman Exp $ 274Srgrimes */ 28623Srgrimes 294Srgrimes/* 304Srgrimes * Author: David B. Golub, Carnegie Mellon University 314Srgrimes * Date: 7/90 324Srgrimes */ 334Srgrimes/* 344Srgrimes * Lexical analyzer. 354Srgrimes */ 362056Swollman#include <sys/param.h> 372056Swollman#include <sys/systm.h> 382056Swollman#include <ddb/ddb.h> 394Srgrimes#include <ddb/db_lex.h> 404Srgrimes 414Srgrimeschar db_line[120]; 424Srgrimeschar * db_lp, *db_endlp; 434Srgrimes 442112Swollmanstatic int db_lex(void); 452112Swollman 464Srgrimesint 474Srgrimesdb_read_line() 484Srgrimes{ 494Srgrimes int i; 504Srgrimes 514Srgrimes i = db_readline(db_line, sizeof(db_line)); 524Srgrimes if (i == 0) 534Srgrimes return (0); /* EOI */ 544Srgrimes db_lp = db_line; 554Srgrimes db_endlp = db_lp + i; 564Srgrimes return (i); 574Srgrimes} 584Srgrimes 594Srgrimesvoid 604Srgrimesdb_flush_line() 614Srgrimes{ 624Srgrimes db_lp = db_line; 634Srgrimes db_endlp = db_line; 644Srgrimes} 654Srgrimes 664Srgrimesint db_look_char = 0; 674Srgrimes 684Srgrimesint 694Srgrimesdb_read_char() 704Srgrimes{ 714Srgrimes int c; 724Srgrimes 734Srgrimes if (db_look_char != 0) { 744Srgrimes c = db_look_char; 754Srgrimes db_look_char = 0; 764Srgrimes } 774Srgrimes else if (db_lp >= db_endlp) 784Srgrimes c = -1; 798876Srgrimes else 804Srgrimes c = *db_lp++; 814Srgrimes return (c); 824Srgrimes} 834Srgrimes 844Srgrimesvoid 854Srgrimesdb_unread_char(c) 86798Swollman int c; 874Srgrimes{ 884Srgrimes db_look_char = c; 894Srgrimes} 904Srgrimes 914Srgrimesint db_look_token = 0; 924Srgrimes 934Srgrimesvoid 944Srgrimesdb_unread_token(t) 954Srgrimes int t; 964Srgrimes{ 974Srgrimes db_look_token = t; 984Srgrimes} 994Srgrimes 1004Srgrimesint 1014Srgrimesdb_read_token() 1024Srgrimes{ 1034Srgrimes int t; 1044Srgrimes 1054Srgrimes if (db_look_token) { 1064Srgrimes t = db_look_token; 1074Srgrimes db_look_token = 0; 1084Srgrimes } 1094Srgrimes else 1104Srgrimes t = db_lex(); 1114Srgrimes return (t); 1124Srgrimes} 1134Srgrimes 1144Srgrimesint db_tok_number; 1154Srgrimeschar db_tok_string[TOK_STRING_SIZE]; 1164Srgrimes 1174Srgrimesint db_radix = 16; 1184Srgrimes 1194Srgrimesvoid 1204Srgrimesdb_flush_lex() 1214Srgrimes{ 1224Srgrimes db_flush_line(); 1234Srgrimes db_look_char = 0; 1244Srgrimes db_look_token = 0; 1254Srgrimes} 1264Srgrimes 1272112Swollmanstatic int 1284Srgrimesdb_lex() 1294Srgrimes{ 1304Srgrimes int c; 1314Srgrimes 1324Srgrimes c = db_read_char(); 1334Srgrimes while (c <= ' ' || c > '~') { 1344Srgrimes if (c == '\n' || c == -1) 1354Srgrimes return (tEOL); 1364Srgrimes c = db_read_char(); 1374Srgrimes } 1384Srgrimes 1394Srgrimes if (c >= '0' && c <= '9') { 1404Srgrimes /* number */ 141798Swollman int r, digit = 0; 1424Srgrimes 1434Srgrimes if (c > '0') 1444Srgrimes r = db_radix; 1454Srgrimes else { 1464Srgrimes c = db_read_char(); 1474Srgrimes if (c == 'O' || c == 'o') 1484Srgrimes r = 8; 1494Srgrimes else if (c == 'T' || c == 't') 1504Srgrimes r = 10; 1514Srgrimes else if (c == 'X' || c == 'x') 1524Srgrimes r = 16; 1534Srgrimes else { 1544Srgrimes r = db_radix; 1554Srgrimes db_unread_char(c); 1564Srgrimes } 1574Srgrimes c = db_read_char(); 1584Srgrimes } 1594Srgrimes db_tok_number = 0; 1604Srgrimes for (;;) { 1614Srgrimes if (c >= '0' && c <= ((r == 8) ? '7' : '9')) 1624Srgrimes digit = c - '0'; 1634Srgrimes else if (r == 16 && ((c >= 'A' && c <= 'F') || 1644Srgrimes (c >= 'a' && c <= 'f'))) { 1654Srgrimes if (c >= 'a') 1664Srgrimes digit = c - 'a' + 10; 1674Srgrimes else if (c >= 'A') 1684Srgrimes digit = c - 'A' + 10; 1694Srgrimes } 1704Srgrimes else 1714Srgrimes break; 1724Srgrimes db_tok_number = db_tok_number * r + digit; 1734Srgrimes c = db_read_char(); 1744Srgrimes } 1754Srgrimes if ((c >= '0' && c <= '9') || 1764Srgrimes (c >= 'A' && c <= 'Z') || 1774Srgrimes (c >= 'a' && c <= 'z') || 1784Srgrimes (c == '_')) 1794Srgrimes { 1804Srgrimes db_error("Bad character in number\n"); 1814Srgrimes db_flush_lex(); 1824Srgrimes return (tEOF); 1834Srgrimes } 1844Srgrimes db_unread_char(c); 1854Srgrimes return (tNUMBER); 1864Srgrimes } 1874Srgrimes if ((c >= 'A' && c <= 'Z') || 1884Srgrimes (c >= 'a' && c <= 'z') || 1894Srgrimes c == '_' || c == '\\') 1904Srgrimes { 1914Srgrimes /* string */ 1924Srgrimes char *cp; 1934Srgrimes 1944Srgrimes cp = db_tok_string; 1954Srgrimes if (c == '\\') { 1964Srgrimes c = db_read_char(); 1974Srgrimes if (c == '\n' || c == -1) 1984Srgrimes db_error("Bad escape\n"); 1994Srgrimes } 2004Srgrimes *cp++ = c; 2014Srgrimes while (1) { 2024Srgrimes c = db_read_char(); 2034Srgrimes if ((c >= 'A' && c <= 'Z') || 2044Srgrimes (c >= 'a' && c <= 'z') || 2054Srgrimes (c >= '0' && c <= '9') || 2064Srgrimes c == '_' || c == '\\' || c == ':') 2074Srgrimes { 2084Srgrimes if (c == '\\') { 2094Srgrimes c = db_read_char(); 2104Srgrimes if (c == '\n' || c == -1) 2114Srgrimes db_error("Bad escape\n"); 2124Srgrimes } 2134Srgrimes *cp++ = c; 2144Srgrimes if (cp == db_tok_string+sizeof(db_tok_string)) { 2154Srgrimes db_error("String too long\n"); 2164Srgrimes db_flush_lex(); 2174Srgrimes return (tEOF); 2184Srgrimes } 2194Srgrimes continue; 2204Srgrimes } 2214Srgrimes else { 2224Srgrimes *cp = '\0'; 2234Srgrimes break; 2244Srgrimes } 2254Srgrimes } 2264Srgrimes db_unread_char(c); 2274Srgrimes return (tIDENT); 2284Srgrimes } 2294Srgrimes 2304Srgrimes switch (c) { 2314Srgrimes case '+': 2324Srgrimes return (tPLUS); 2334Srgrimes case '-': 2344Srgrimes return (tMINUS); 2354Srgrimes case '.': 2364Srgrimes c = db_read_char(); 2374Srgrimes if (c == '.') 2384Srgrimes return (tDOTDOT); 2394Srgrimes db_unread_char(c); 2404Srgrimes return (tDOT); 2414Srgrimes case '*': 2424Srgrimes return (tSTAR); 2434Srgrimes case '/': 2444Srgrimes return (tSLASH); 2454Srgrimes case '=': 2464Srgrimes return (tEQ); 2474Srgrimes case '%': 2484Srgrimes return (tPCT); 2494Srgrimes case '#': 2504Srgrimes return (tHASH); 2514Srgrimes case '(': 2524Srgrimes return (tLPAREN); 2534Srgrimes case ')': 2544Srgrimes return (tRPAREN); 2554Srgrimes case ',': 2564Srgrimes return (tCOMMA); 2574Srgrimes case '"': 2584Srgrimes return (tDITTO); 2594Srgrimes case '$': 2604Srgrimes return (tDOLLAR); 2614Srgrimes case '!': 2624Srgrimes return (tEXCL); 2634Srgrimes case '<': 2644Srgrimes c = db_read_char(); 2654Srgrimes if (c == '<') 2664Srgrimes return (tSHIFT_L); 2674Srgrimes db_unread_char(c); 2684Srgrimes break; 2694Srgrimes case '>': 2704Srgrimes c = db_read_char(); 2714Srgrimes if (c == '>') 2724Srgrimes return (tSHIFT_R); 2734Srgrimes db_unread_char(c); 2744Srgrimes break; 2754Srgrimes case -1: 2764Srgrimes return (tEOF); 2774Srgrimes } 2784Srgrimes db_printf("Bad character\n"); 2794Srgrimes db_flush_lex(); 2804Srgrimes return (tEOF); 2814Srgrimes} 282