db_lex.c revision 5
1/*
2 * Mach Operating System
3 * Copyright (c) 1991,1990 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
19 *  School of Computer Science
20 *  Carnegie Mellon University
21 *  Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie the
24 * rights to redistribute these changes.
25 */
26/*
27 * HISTORY
28 * $Log: db_lex.c,v $
29 * Revision 1.1  1992/03/25  21:45:13  pace
30 * Initial revision
31 *
32 * Revision 2.3  91/02/05  17:06:36  mrt
33 * 	Changed to new Mach copyright
34 * 	[91/01/31  16:18:20  mrt]
35 *
36 * Revision 2.2  90/08/27  21:51:10  dbg
37 * 	Add 'dotdot' token.
38 * 	[90/08/22            dbg]
39 *
40 * 	Allow backslash to quote any character into an identifier.
41 * 	Allow colon in identifier for symbol table qualification.
42 * 	[90/08/16            dbg]
43 * 	Reduce lint.
44 * 	[90/08/07            dbg]
45 * 	Created.
46 * 	[90/07/25            dbg]
47 *
48 */
49/*
50 *	Author: David B. Golub, Carnegie Mellon University
51 *	Date:	7/90
52 */
53/*
54 * Lexical analyzer.
55 */
56#include <ddb/db_lex.h>
57
58char	db_line[120];
59char *	db_lp, *db_endlp;
60
61int
62db_read_line()
63{
64	int	i;
65
66	i = db_readline(db_line, sizeof(db_line));
67	if (i == 0)
68	    return (0);	/* EOI */
69	db_lp = db_line;
70	db_endlp = db_lp + i;
71	return (i);
72}
73
74void
75db_flush_line()
76{
77	db_lp = db_line;
78	db_endlp = db_line;
79}
80
81int	db_look_char = 0;
82
83int
84db_read_char()
85{
86	int	c;
87
88	if (db_look_char != 0) {
89	    c = db_look_char;
90	    db_look_char = 0;
91	}
92	else if (db_lp >= db_endlp)
93	    c = -1;
94	else
95	    c = *db_lp++;
96	return (c);
97}
98
99void
100db_unread_char(c)
101{
102	db_look_char = c;
103}
104
105int	db_look_token = 0;
106
107void
108db_unread_token(t)
109	int	t;
110{
111	db_look_token = t;
112}
113
114int
115db_read_token()
116{
117	int	t;
118
119	if (db_look_token) {
120	    t = db_look_token;
121	    db_look_token = 0;
122	}
123	else
124	    t = db_lex();
125	return (t);
126}
127
128int	db_tok_number;
129char	db_tok_string[TOK_STRING_SIZE];
130
131int	db_radix = 16;
132
133void
134db_flush_lex()
135{
136	db_flush_line();
137	db_look_char = 0;
138	db_look_token = 0;
139}
140
141int
142db_lex()
143{
144	int	c;
145
146	c = db_read_char();
147	while (c <= ' ' || c > '~') {
148	    if (c == '\n' || c == -1)
149		return (tEOL);
150	    c = db_read_char();
151	}
152
153	if (c >= '0' && c <= '9') {
154	    /* number */
155	    int	r, digit;
156
157	    if (c > '0')
158		r = db_radix;
159	    else {
160		c = db_read_char();
161		if (c == 'O' || c == 'o')
162		    r = 8;
163		else if (c == 'T' || c == 't')
164		    r = 10;
165		else if (c == 'X' || c == 'x')
166		    r = 16;
167		else {
168		    r = db_radix;
169		    db_unread_char(c);
170		}
171		c = db_read_char();
172	    }
173	    db_tok_number = 0;
174	    for (;;) {
175		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
176		    digit = c - '0';
177		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
178				     (c >= 'a' && c <= 'f'))) {
179		    if (c >= 'a')
180			digit = c - 'a' + 10;
181		    else if (c >= 'A')
182			digit = c - 'A' + 10;
183		}
184		else
185		    break;
186		db_tok_number = db_tok_number * r + digit;
187		c = db_read_char();
188	    }
189	    if ((c >= '0' && c <= '9') ||
190		(c >= 'A' && c <= 'Z') ||
191		(c >= 'a' && c <= 'z') ||
192		(c == '_'))
193	    {
194		db_error("Bad character in number\n");
195		db_flush_lex();
196		return (tEOF);
197	    }
198	    db_unread_char(c);
199	    return (tNUMBER);
200	}
201	if ((c >= 'A' && c <= 'Z') ||
202	    (c >= 'a' && c <= 'z') ||
203	    c == '_' || c == '\\')
204	{
205	    /* string */
206	    char *cp;
207
208	    cp = db_tok_string;
209	    if (c == '\\') {
210		c = db_read_char();
211		if (c == '\n' || c == -1)
212		    db_error("Bad escape\n");
213	    }
214	    *cp++ = c;
215	    while (1) {
216		c = db_read_char();
217		if ((c >= 'A' && c <= 'Z') ||
218		    (c >= 'a' && c <= 'z') ||
219		    (c >= '0' && c <= '9') ||
220		    c == '_' || c == '\\' || c == ':')
221		{
222		    if (c == '\\') {
223			c = db_read_char();
224			if (c == '\n' || c == -1)
225			    db_error("Bad escape\n");
226		    }
227		    *cp++ = c;
228		    if (cp == db_tok_string+sizeof(db_tok_string)) {
229			db_error("String too long\n");
230			db_flush_lex();
231			return (tEOF);
232		    }
233		    continue;
234		}
235		else {
236		    *cp = '\0';
237		    break;
238		}
239	    }
240	    db_unread_char(c);
241	    return (tIDENT);
242	}
243
244	switch (c) {
245	    case '+':
246		return (tPLUS);
247	    case '-':
248		return (tMINUS);
249	    case '.':
250		c = db_read_char();
251		if (c == '.')
252		    return (tDOTDOT);
253		db_unread_char(c);
254		return (tDOT);
255	    case '*':
256		return (tSTAR);
257	    case '/':
258		return (tSLASH);
259	    case '=':
260		return (tEQ);
261	    case '%':
262		return (tPCT);
263	    case '#':
264		return (tHASH);
265	    case '(':
266		return (tLPAREN);
267	    case ')':
268		return (tRPAREN);
269	    case ',':
270		return (tCOMMA);
271	    case '"':
272		return (tDITTO);
273	    case '$':
274		return (tDOLLAR);
275	    case '!':
276		return (tEXCL);
277	    case '<':
278		c = db_read_char();
279		if (c == '<')
280		    return (tSHIFT_L);
281		db_unread_char(c);
282		break;
283	    case '>':
284		c = db_read_char();
285		if (c == '>')
286		    return (tSHIFT_R);
287		db_unread_char(c);
288		break;
289	    case -1:
290		return (tEOF);
291	}
292	db_printf("Bad character\n");
293	db_flush_lex();
294	return (tEOF);
295}
296