db_lex.c revision 623
14Srgrimes/* 24Srgrimes * Mach Operating System 34Srgrimes * Copyright (c) 1991,1990 Carnegie Mellon University 44Srgrimes * All Rights Reserved. 54Srgrimes * 64Srgrimes * Permission to use, copy, modify and distribute this software and its 74Srgrimes * documentation is hereby granted, provided that both the copyright 84Srgrimes * notice and this permission notice appear in all copies of the 94Srgrimes * software, derivative works or modified versions, and any portions 104Srgrimes * thereof, and that both notices appear in supporting documentation. 114Srgrimes * 124Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS 134Srgrimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 144Srgrimes * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 154Srgrimes * 164Srgrimes * Carnegie Mellon requests users of this software to return to 174Srgrimes * 184Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 194Srgrimes * School of Computer Science 204Srgrimes * Carnegie Mellon University 214Srgrimes * Pittsburgh PA 15213-3890 224Srgrimes * 234Srgrimes * any improvements or extensions that they make and grant Carnegie the 244Srgrimes * rights to redistribute these changes. 254Srgrimes * 26623Srgrimes * $Id$ 274Srgrimes */ 28623Srgrimes 294Srgrimes/* 304Srgrimes * Author: David B. Golub, Carnegie Mellon University 314Srgrimes * Date: 7/90 324Srgrimes */ 334Srgrimes/* 344Srgrimes * Lexical analyzer. 354Srgrimes */ 364Srgrimes#include <ddb/db_lex.h> 374Srgrimes 384Srgrimeschar db_line[120]; 394Srgrimeschar * db_lp, *db_endlp; 404Srgrimes 414Srgrimesint 424Srgrimesdb_read_line() 434Srgrimes{ 444Srgrimes int i; 454Srgrimes 464Srgrimes i = db_readline(db_line, sizeof(db_line)); 474Srgrimes if (i == 0) 484Srgrimes return (0); /* EOI */ 494Srgrimes db_lp = db_line; 504Srgrimes db_endlp = db_lp + i; 514Srgrimes return (i); 524Srgrimes} 534Srgrimes 544Srgrimesvoid 554Srgrimesdb_flush_line() 564Srgrimes{ 574Srgrimes db_lp = db_line; 584Srgrimes db_endlp = db_line; 594Srgrimes} 604Srgrimes 614Srgrimesint db_look_char = 0; 624Srgrimes 634Srgrimesint 644Srgrimesdb_read_char() 654Srgrimes{ 664Srgrimes int c; 674Srgrimes 684Srgrimes if (db_look_char != 0) { 694Srgrimes c = db_look_char; 704Srgrimes db_look_char = 0; 714Srgrimes } 724Srgrimes else if (db_lp >= db_endlp) 734Srgrimes c = -1; 744Srgrimes else 754Srgrimes c = *db_lp++; 764Srgrimes return (c); 774Srgrimes} 784Srgrimes 794Srgrimesvoid 804Srgrimesdb_unread_char(c) 814Srgrimes{ 824Srgrimes db_look_char = c; 834Srgrimes} 844Srgrimes 854Srgrimesint db_look_token = 0; 864Srgrimes 874Srgrimesvoid 884Srgrimesdb_unread_token(t) 894Srgrimes int t; 904Srgrimes{ 914Srgrimes db_look_token = t; 924Srgrimes} 934Srgrimes 944Srgrimesint 954Srgrimesdb_read_token() 964Srgrimes{ 974Srgrimes int t; 984Srgrimes 994Srgrimes if (db_look_token) { 1004Srgrimes t = db_look_token; 1014Srgrimes db_look_token = 0; 1024Srgrimes } 1034Srgrimes else 1044Srgrimes t = db_lex(); 1054Srgrimes return (t); 1064Srgrimes} 1074Srgrimes 1084Srgrimesint db_tok_number; 1094Srgrimeschar db_tok_string[TOK_STRING_SIZE]; 1104Srgrimes 1114Srgrimesint db_radix = 16; 1124Srgrimes 1134Srgrimesvoid 1144Srgrimesdb_flush_lex() 1154Srgrimes{ 1164Srgrimes db_flush_line(); 1174Srgrimes db_look_char = 0; 1184Srgrimes db_look_token = 0; 1194Srgrimes} 1204Srgrimes 1214Srgrimesint 1224Srgrimesdb_lex() 1234Srgrimes{ 1244Srgrimes int c; 1254Srgrimes 1264Srgrimes c = db_read_char(); 1274Srgrimes while (c <= ' ' || c > '~') { 1284Srgrimes if (c == '\n' || c == -1) 1294Srgrimes return (tEOL); 1304Srgrimes c = db_read_char(); 1314Srgrimes } 1324Srgrimes 1334Srgrimes if (c >= '0' && c <= '9') { 1344Srgrimes /* number */ 1354Srgrimes int r, digit; 1364Srgrimes 1374Srgrimes if (c > '0') 1384Srgrimes r = db_radix; 1394Srgrimes else { 1404Srgrimes c = db_read_char(); 1414Srgrimes if (c == 'O' || c == 'o') 1424Srgrimes r = 8; 1434Srgrimes else if (c == 'T' || c == 't') 1444Srgrimes r = 10; 1454Srgrimes else if (c == 'X' || c == 'x') 1464Srgrimes r = 16; 1474Srgrimes else { 1484Srgrimes r = db_radix; 1494Srgrimes db_unread_char(c); 1504Srgrimes } 1514Srgrimes c = db_read_char(); 1524Srgrimes } 1534Srgrimes db_tok_number = 0; 1544Srgrimes for (;;) { 1554Srgrimes if (c >= '0' && c <= ((r == 8) ? '7' : '9')) 1564Srgrimes digit = c - '0'; 1574Srgrimes else if (r == 16 && ((c >= 'A' && c <= 'F') || 1584Srgrimes (c >= 'a' && c <= 'f'))) { 1594Srgrimes if (c >= 'a') 1604Srgrimes digit = c - 'a' + 10; 1614Srgrimes else if (c >= 'A') 1624Srgrimes digit = c - 'A' + 10; 1634Srgrimes } 1644Srgrimes else 1654Srgrimes break; 1664Srgrimes db_tok_number = db_tok_number * r + digit; 1674Srgrimes c = db_read_char(); 1684Srgrimes } 1694Srgrimes if ((c >= '0' && c <= '9') || 1704Srgrimes (c >= 'A' && c <= 'Z') || 1714Srgrimes (c >= 'a' && c <= 'z') || 1724Srgrimes (c == '_')) 1734Srgrimes { 1744Srgrimes db_error("Bad character in number\n"); 1754Srgrimes db_flush_lex(); 1764Srgrimes return (tEOF); 1774Srgrimes } 1784Srgrimes db_unread_char(c); 1794Srgrimes return (tNUMBER); 1804Srgrimes } 1814Srgrimes if ((c >= 'A' && c <= 'Z') || 1824Srgrimes (c >= 'a' && c <= 'z') || 1834Srgrimes c == '_' || c == '\\') 1844Srgrimes { 1854Srgrimes /* string */ 1864Srgrimes char *cp; 1874Srgrimes 1884Srgrimes cp = db_tok_string; 1894Srgrimes if (c == '\\') { 1904Srgrimes c = db_read_char(); 1914Srgrimes if (c == '\n' || c == -1) 1924Srgrimes db_error("Bad escape\n"); 1934Srgrimes } 1944Srgrimes *cp++ = c; 1954Srgrimes while (1) { 1964Srgrimes c = db_read_char(); 1974Srgrimes if ((c >= 'A' && c <= 'Z') || 1984Srgrimes (c >= 'a' && c <= 'z') || 1994Srgrimes (c >= '0' && c <= '9') || 2004Srgrimes c == '_' || c == '\\' || c == ':') 2014Srgrimes { 2024Srgrimes if (c == '\\') { 2034Srgrimes c = db_read_char(); 2044Srgrimes if (c == '\n' || c == -1) 2054Srgrimes db_error("Bad escape\n"); 2064Srgrimes } 2074Srgrimes *cp++ = c; 2084Srgrimes if (cp == db_tok_string+sizeof(db_tok_string)) { 2094Srgrimes db_error("String too long\n"); 2104Srgrimes db_flush_lex(); 2114Srgrimes return (tEOF); 2124Srgrimes } 2134Srgrimes continue; 2144Srgrimes } 2154Srgrimes else { 2164Srgrimes *cp = '\0'; 2174Srgrimes break; 2184Srgrimes } 2194Srgrimes } 2204Srgrimes db_unread_char(c); 2214Srgrimes return (tIDENT); 2224Srgrimes } 2234Srgrimes 2244Srgrimes switch (c) { 2254Srgrimes case '+': 2264Srgrimes return (tPLUS); 2274Srgrimes case '-': 2284Srgrimes return (tMINUS); 2294Srgrimes case '.': 2304Srgrimes c = db_read_char(); 2314Srgrimes if (c == '.') 2324Srgrimes return (tDOTDOT); 2334Srgrimes db_unread_char(c); 2344Srgrimes return (tDOT); 2354Srgrimes case '*': 2364Srgrimes return (tSTAR); 2374Srgrimes case '/': 2384Srgrimes return (tSLASH); 2394Srgrimes case '=': 2404Srgrimes return (tEQ); 2414Srgrimes case '%': 2424Srgrimes return (tPCT); 2434Srgrimes case '#': 2444Srgrimes return (tHASH); 2454Srgrimes case '(': 2464Srgrimes return (tLPAREN); 2474Srgrimes case ')': 2484Srgrimes return (tRPAREN); 2494Srgrimes case ',': 2504Srgrimes return (tCOMMA); 2514Srgrimes case '"': 2524Srgrimes return (tDITTO); 2534Srgrimes case '$': 2544Srgrimes return (tDOLLAR); 2554Srgrimes case '!': 2564Srgrimes return (tEXCL); 2574Srgrimes case '<': 2584Srgrimes c = db_read_char(); 2594Srgrimes if (c == '<') 2604Srgrimes return (tSHIFT_L); 2614Srgrimes db_unread_char(c); 2624Srgrimes break; 2634Srgrimes case '>': 2644Srgrimes c = db_read_char(); 2654Srgrimes if (c == '>') 2664Srgrimes return (tSHIFT_R); 2674Srgrimes db_unread_char(c); 2684Srgrimes break; 2694Srgrimes case -1: 2704Srgrimes return (tEOF); 2714Srgrimes } 2724Srgrimes db_printf("Bad character\n"); 2734Srgrimes db_flush_lex(); 2744Srgrimes return (tEOF); 2754Srgrimes} 276