db_lex.c revision 4
14Srgrimes/* 24Srgrimes * Mach Operating System 34Srgrimes * Copyright (c) 1991,1990 Carnegie Mellon University 44Srgrimes * All Rights Reserved. 54Srgrimes * 64Srgrimes * Permission to use, copy, modify and distribute this software and its 74Srgrimes * documentation is hereby granted, provided that both the copyright 84Srgrimes * notice and this permission notice appear in all copies of the 94Srgrimes * software, derivative works or modified versions, and any portions 104Srgrimes * thereof, and that both notices appear in supporting documentation. 114Srgrimes * 124Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS 134Srgrimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 144Srgrimes * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 154Srgrimes * 164Srgrimes * Carnegie Mellon requests users of this software to return to 174Srgrimes * 184Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 194Srgrimes * School of Computer Science 204Srgrimes * Carnegie Mellon University 214Srgrimes * Pittsburgh PA 15213-3890 224Srgrimes * 234Srgrimes * any improvements or extensions that they make and grant Carnegie the 244Srgrimes * rights to redistribute these changes. 254Srgrimes */ 264Srgrimes/* 274Srgrimes * HISTORY 284Srgrimes * $Log: db_lex.c,v $ 294Srgrimes * Revision 1.1 1992/03/25 21:45:13 pace 304Srgrimes * Initial revision 314Srgrimes * 324Srgrimes * Revision 2.3 91/02/05 17:06:36 mrt 334Srgrimes * Changed to new Mach copyright 344Srgrimes * [91/01/31 16:18:20 mrt] 354Srgrimes * 364Srgrimes * Revision 2.2 90/08/27 21:51:10 dbg 374Srgrimes * Add 'dotdot' token. 384Srgrimes * [90/08/22 dbg] 394Srgrimes * 404Srgrimes * Allow backslash to quote any character into an identifier. 414Srgrimes * Allow colon in identifier for symbol table qualification. 424Srgrimes * [90/08/16 dbg] 434Srgrimes * Reduce lint. 444Srgrimes * [90/08/07 dbg] 454Srgrimes * Created. 464Srgrimes * [90/07/25 dbg] 474Srgrimes * 484Srgrimes */ 494Srgrimes/* 504Srgrimes * Author: David B. Golub, Carnegie Mellon University 514Srgrimes * Date: 7/90 524Srgrimes */ 534Srgrimes/* 544Srgrimes * Lexical analyzer. 554Srgrimes */ 564Srgrimes#include <ddb/db_lex.h> 574Srgrimes 584Srgrimeschar db_line[120]; 594Srgrimeschar * db_lp, *db_endlp; 604Srgrimes 614Srgrimesint 624Srgrimesdb_read_line() 634Srgrimes{ 644Srgrimes int i; 654Srgrimes 664Srgrimes i = db_readline(db_line, sizeof(db_line)); 674Srgrimes if (i == 0) 684Srgrimes return (0); /* EOI */ 694Srgrimes db_lp = db_line; 704Srgrimes db_endlp = db_lp + i; 714Srgrimes return (i); 724Srgrimes} 734Srgrimes 744Srgrimesvoid 754Srgrimesdb_flush_line() 764Srgrimes{ 774Srgrimes db_lp = db_line; 784Srgrimes db_endlp = db_line; 794Srgrimes} 804Srgrimes 814Srgrimesint db_look_char = 0; 824Srgrimes 834Srgrimesint 844Srgrimesdb_read_char() 854Srgrimes{ 864Srgrimes int c; 874Srgrimes 884Srgrimes if (db_look_char != 0) { 894Srgrimes c = db_look_char; 904Srgrimes db_look_char = 0; 914Srgrimes } 924Srgrimes else if (db_lp >= db_endlp) 934Srgrimes c = -1; 944Srgrimes else 954Srgrimes c = *db_lp++; 964Srgrimes return (c); 974Srgrimes} 984Srgrimes 994Srgrimesvoid 1004Srgrimesdb_unread_char(c) 1014Srgrimes{ 1024Srgrimes db_look_char = c; 1034Srgrimes} 1044Srgrimes 1054Srgrimesint db_look_token = 0; 1064Srgrimes 1074Srgrimesvoid 1084Srgrimesdb_unread_token(t) 1094Srgrimes int t; 1104Srgrimes{ 1114Srgrimes db_look_token = t; 1124Srgrimes} 1134Srgrimes 1144Srgrimesint 1154Srgrimesdb_read_token() 1164Srgrimes{ 1174Srgrimes int t; 1184Srgrimes 1194Srgrimes if (db_look_token) { 1204Srgrimes t = db_look_token; 1214Srgrimes db_look_token = 0; 1224Srgrimes } 1234Srgrimes else 1244Srgrimes t = db_lex(); 1254Srgrimes return (t); 1264Srgrimes} 1274Srgrimes 1284Srgrimesint db_tok_number; 1294Srgrimeschar db_tok_string[TOK_STRING_SIZE]; 1304Srgrimes 1314Srgrimesint db_radix = 16; 1324Srgrimes 1334Srgrimesvoid 1344Srgrimesdb_flush_lex() 1354Srgrimes{ 1364Srgrimes db_flush_line(); 1374Srgrimes db_look_char = 0; 1384Srgrimes db_look_token = 0; 1394Srgrimes} 1404Srgrimes 1414Srgrimesint 1424Srgrimesdb_lex() 1434Srgrimes{ 1444Srgrimes int c; 1454Srgrimes 1464Srgrimes c = db_read_char(); 1474Srgrimes while (c <= ' ' || c > '~') { 1484Srgrimes if (c == '\n' || c == -1) 1494Srgrimes return (tEOL); 1504Srgrimes c = db_read_char(); 1514Srgrimes } 1524Srgrimes 1534Srgrimes if (c >= '0' && c <= '9') { 1544Srgrimes /* number */ 1554Srgrimes int r, digit; 1564Srgrimes 1574Srgrimes if (c > '0') 1584Srgrimes r = db_radix; 1594Srgrimes else { 1604Srgrimes c = db_read_char(); 1614Srgrimes if (c == 'O' || c == 'o') 1624Srgrimes r = 8; 1634Srgrimes else if (c == 'T' || c == 't') 1644Srgrimes r = 10; 1654Srgrimes else if (c == 'X' || c == 'x') 1664Srgrimes r = 16; 1674Srgrimes else { 1684Srgrimes r = db_radix; 1694Srgrimes db_unread_char(c); 1704Srgrimes } 1714Srgrimes c = db_read_char(); 1724Srgrimes } 1734Srgrimes db_tok_number = 0; 1744Srgrimes for (;;) { 1754Srgrimes if (c >= '0' && c <= ((r == 8) ? '7' : '9')) 1764Srgrimes digit = c - '0'; 1774Srgrimes else if (r == 16 && ((c >= 'A' && c <= 'F') || 1784Srgrimes (c >= 'a' && c <= 'f'))) { 1794Srgrimes if (c >= 'a') 1804Srgrimes digit = c - 'a' + 10; 1814Srgrimes else if (c >= 'A') 1824Srgrimes digit = c - 'A' + 10; 1834Srgrimes } 1844Srgrimes else 1854Srgrimes break; 1864Srgrimes db_tok_number = db_tok_number * r + digit; 1874Srgrimes c = db_read_char(); 1884Srgrimes } 1894Srgrimes if ((c >= '0' && c <= '9') || 1904Srgrimes (c >= 'A' && c <= 'Z') || 1914Srgrimes (c >= 'a' && c <= 'z') || 1924Srgrimes (c == '_')) 1934Srgrimes { 1944Srgrimes db_error("Bad character in number\n"); 1954Srgrimes db_flush_lex(); 1964Srgrimes return (tEOF); 1974Srgrimes } 1984Srgrimes db_unread_char(c); 1994Srgrimes return (tNUMBER); 2004Srgrimes } 2014Srgrimes if ((c >= 'A' && c <= 'Z') || 2024Srgrimes (c >= 'a' && c <= 'z') || 2034Srgrimes c == '_' || c == '\\') 2044Srgrimes { 2054Srgrimes /* string */ 2064Srgrimes char *cp; 2074Srgrimes 2084Srgrimes cp = db_tok_string; 2094Srgrimes if (c == '\\') { 2104Srgrimes c = db_read_char(); 2114Srgrimes if (c == '\n' || c == -1) 2124Srgrimes db_error("Bad escape\n"); 2134Srgrimes } 2144Srgrimes *cp++ = c; 2154Srgrimes while (1) { 2164Srgrimes c = db_read_char(); 2174Srgrimes if ((c >= 'A' && c <= 'Z') || 2184Srgrimes (c >= 'a' && c <= 'z') || 2194Srgrimes (c >= '0' && c <= '9') || 2204Srgrimes c == '_' || c == '\\' || c == ':') 2214Srgrimes { 2224Srgrimes if (c == '\\') { 2234Srgrimes c = db_read_char(); 2244Srgrimes if (c == '\n' || c == -1) 2254Srgrimes db_error("Bad escape\n"); 2264Srgrimes } 2274Srgrimes *cp++ = c; 2284Srgrimes if (cp == db_tok_string+sizeof(db_tok_string)) { 2294Srgrimes db_error("String too long\n"); 2304Srgrimes db_flush_lex(); 2314Srgrimes return (tEOF); 2324Srgrimes } 2334Srgrimes continue; 2344Srgrimes } 2354Srgrimes else { 2364Srgrimes *cp = '\0'; 2374Srgrimes break; 2384Srgrimes } 2394Srgrimes } 2404Srgrimes db_unread_char(c); 2414Srgrimes return (tIDENT); 2424Srgrimes } 2434Srgrimes 2444Srgrimes switch (c) { 2454Srgrimes case '+': 2464Srgrimes return (tPLUS); 2474Srgrimes case '-': 2484Srgrimes return (tMINUS); 2494Srgrimes case '.': 2504Srgrimes c = db_read_char(); 2514Srgrimes if (c == '.') 2524Srgrimes return (tDOTDOT); 2534Srgrimes db_unread_char(c); 2544Srgrimes return (tDOT); 2554Srgrimes case '*': 2564Srgrimes return (tSTAR); 2574Srgrimes case '/': 2584Srgrimes return (tSLASH); 2594Srgrimes case '=': 2604Srgrimes return (tEQ); 2614Srgrimes case '%': 2624Srgrimes return (tPCT); 2634Srgrimes case '#': 2644Srgrimes return (tHASH); 2654Srgrimes case '(': 2664Srgrimes return (tLPAREN); 2674Srgrimes case ')': 2684Srgrimes return (tRPAREN); 2694Srgrimes case ',': 2704Srgrimes return (tCOMMA); 2714Srgrimes case '"': 2724Srgrimes return (tDITTO); 2734Srgrimes case '$': 2744Srgrimes return (tDOLLAR); 2754Srgrimes case '!': 2764Srgrimes return (tEXCL); 2774Srgrimes case '<': 2784Srgrimes c = db_read_char(); 2794Srgrimes if (c == '<') 2804Srgrimes return (tSHIFT_L); 2814Srgrimes db_unread_char(c); 2824Srgrimes break; 2834Srgrimes case '>': 2844Srgrimes c = db_read_char(); 2854Srgrimes if (c == '>') 2864Srgrimes return (tSHIFT_R); 2874Srgrimes db_unread_char(c); 2884Srgrimes break; 2894Srgrimes case -1: 2904Srgrimes return (tEOF); 2914Srgrimes } 2924Srgrimes db_printf("Bad character\n"); 2934Srgrimes db_flush_lex(); 2944Srgrimes return (tEOF); 2954Srgrimes} 296