db_lex.c revision 5
1/* 2 * Mach Operating System 3 * Copyright (c) 1991,1990 Carnegie Mellon University 4 * All Rights Reserved. 5 * 6 * Permission to use, copy, modify and distribute this software and its 7 * documentation is hereby granted, provided that both the copyright 8 * notice and this permission notice appear in all copies of the 9 * software, derivative works or modified versions, and any portions 10 * thereof, and that both notices appear in supporting documentation. 11 * 12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS 13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 15 * 16 * Carnegie Mellon requests users of this software to return to 17 * 18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 19 * School of Computer Science 20 * Carnegie Mellon University 21 * Pittsburgh PA 15213-3890 22 * 23 * any improvements or extensions that they make and grant Carnegie the 24 * rights to redistribute these changes. 25 */ 26/* 27 * HISTORY 28 * $Log: db_lex.c,v $ 29 * Revision 1.1 1992/03/25 21:45:13 pace 30 * Initial revision 31 * 32 * Revision 2.3 91/02/05 17:06:36 mrt 33 * Changed to new Mach copyright 34 * [91/01/31 16:18:20 mrt] 35 * 36 * Revision 2.2 90/08/27 21:51:10 dbg 37 * Add 'dotdot' token. 38 * [90/08/22 dbg] 39 * 40 * Allow backslash to quote any character into an identifier. 41 * Allow colon in identifier for symbol table qualification. 42 * [90/08/16 dbg] 43 * Reduce lint. 44 * [90/08/07 dbg] 45 * Created. 46 * [90/07/25 dbg] 47 * 48 */ 49/* 50 * Author: David B. Golub, Carnegie Mellon University 51 * Date: 7/90 52 */ 53/* 54 * Lexical analyzer. 55 */ 56#include <ddb/db_lex.h> 57 58char db_line[120]; 59char * db_lp, *db_endlp; 60 61int 62db_read_line() 63{ 64 int i; 65 66 i = db_readline(db_line, sizeof(db_line)); 67 if (i == 0) 68 return (0); /* EOI */ 69 db_lp = db_line; 70 db_endlp = db_lp + i; 71 return (i); 72} 73 74void 75db_flush_line() 76{ 77 db_lp = db_line; 78 db_endlp = db_line; 79} 80 81int db_look_char = 0; 82 83int 84db_read_char() 85{ 86 int c; 87 88 if (db_look_char != 0) { 89 c = db_look_char; 90 db_look_char = 0; 91 } 92 else if (db_lp >= db_endlp) 93 c = -1; 94 else 95 c = *db_lp++; 96 return (c); 97} 98 99void 100db_unread_char(c) 101{ 102 db_look_char = c; 103} 104 105int db_look_token = 0; 106 107void 108db_unread_token(t) 109 int t; 110{ 111 db_look_token = t; 112} 113 114int 115db_read_token() 116{ 117 int t; 118 119 if (db_look_token) { 120 t = db_look_token; 121 db_look_token = 0; 122 } 123 else 124 t = db_lex(); 125 return (t); 126} 127 128int db_tok_number; 129char db_tok_string[TOK_STRING_SIZE]; 130 131int db_radix = 16; 132 133void 134db_flush_lex() 135{ 136 db_flush_line(); 137 db_look_char = 0; 138 db_look_token = 0; 139} 140 141int 142db_lex() 143{ 144 int c; 145 146 c = db_read_char(); 147 while (c <= ' ' || c > '~') { 148 if (c == '\n' || c == -1) 149 return (tEOL); 150 c = db_read_char(); 151 } 152 153 if (c >= '0' && c <= '9') { 154 /* number */ 155 int r, digit; 156 157 if (c > '0') 158 r = db_radix; 159 else { 160 c = db_read_char(); 161 if (c == 'O' || c == 'o') 162 r = 8; 163 else if (c == 'T' || c == 't') 164 r = 10; 165 else if (c == 'X' || c == 'x') 166 r = 16; 167 else { 168 r = db_radix; 169 db_unread_char(c); 170 } 171 c = db_read_char(); 172 } 173 db_tok_number = 0; 174 for (;;) { 175 if (c >= '0' && c <= ((r == 8) ? '7' : '9')) 176 digit = c - '0'; 177 else if (r == 16 && ((c >= 'A' && c <= 'F') || 178 (c >= 'a' && c <= 'f'))) { 179 if (c >= 'a') 180 digit = c - 'a' + 10; 181 else if (c >= 'A') 182 digit = c - 'A' + 10; 183 } 184 else 185 break; 186 db_tok_number = db_tok_number * r + digit; 187 c = db_read_char(); 188 } 189 if ((c >= '0' && c <= '9') || 190 (c >= 'A' && c <= 'Z') || 191 (c >= 'a' && c <= 'z') || 192 (c == '_')) 193 { 194 db_error("Bad character in number\n"); 195 db_flush_lex(); 196 return (tEOF); 197 } 198 db_unread_char(c); 199 return (tNUMBER); 200 } 201 if ((c >= 'A' && c <= 'Z') || 202 (c >= 'a' && c <= 'z') || 203 c == '_' || c == '\\') 204 { 205 /* string */ 206 char *cp; 207 208 cp = db_tok_string; 209 if (c == '\\') { 210 c = db_read_char(); 211 if (c == '\n' || c == -1) 212 db_error("Bad escape\n"); 213 } 214 *cp++ = c; 215 while (1) { 216 c = db_read_char(); 217 if ((c >= 'A' && c <= 'Z') || 218 (c >= 'a' && c <= 'z') || 219 (c >= '0' && c <= '9') || 220 c == '_' || c == '\\' || c == ':') 221 { 222 if (c == '\\') { 223 c = db_read_char(); 224 if (c == '\n' || c == -1) 225 db_error("Bad escape\n"); 226 } 227 *cp++ = c; 228 if (cp == db_tok_string+sizeof(db_tok_string)) { 229 db_error("String too long\n"); 230 db_flush_lex(); 231 return (tEOF); 232 } 233 continue; 234 } 235 else { 236 *cp = '\0'; 237 break; 238 } 239 } 240 db_unread_char(c); 241 return (tIDENT); 242 } 243 244 switch (c) { 245 case '+': 246 return (tPLUS); 247 case '-': 248 return (tMINUS); 249 case '.': 250 c = db_read_char(); 251 if (c == '.') 252 return (tDOTDOT); 253 db_unread_char(c); 254 return (tDOT); 255 case '*': 256 return (tSTAR); 257 case '/': 258 return (tSLASH); 259 case '=': 260 return (tEQ); 261 case '%': 262 return (tPCT); 263 case '#': 264 return (tHASH); 265 case '(': 266 return (tLPAREN); 267 case ')': 268 return (tRPAREN); 269 case ',': 270 return (tCOMMA); 271 case '"': 272 return (tDITTO); 273 case '$': 274 return (tDOLLAR); 275 case '!': 276 return (tEXCL); 277 case '<': 278 c = db_read_char(); 279 if (c == '<') 280 return (tSHIFT_L); 281 db_unread_char(c); 282 break; 283 case '>': 284 c = db_read_char(); 285 if (c == '>') 286 return (tSHIFT_R); 287 db_unread_char(c); 288 break; 289 case -1: 290 return (tEOF); 291 } 292 db_printf("Bad character\n"); 293 db_flush_lex(); 294 return (tEOF); 295} 296