db_lex.c revision 2056
1/*
2 * Mach Operating System
3 * Copyright (c) 1991,1990 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
19 *  School of Computer Science
20 *  Carnegie Mellon University
21 *  Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie the
24 * rights to redistribute these changes.
25 *
26 *	$Id: db_lex.c,v 1.3 1993/11/25 01:30:07 wollman Exp $
27 */
28
29/*
30 *	Author: David B. Golub, Carnegie Mellon University
31 *	Date:	7/90
32 */
33/*
34 * Lexical analyzer.
35 */
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <ddb/ddb.h>
39#include <ddb/db_lex.h>
40
41char	db_line[120];
42char *	db_lp, *db_endlp;
43
44int
45db_read_line()
46{
47	int	i;
48
49	i = db_readline(db_line, sizeof(db_line));
50	if (i == 0)
51	    return (0);	/* EOI */
52	db_lp = db_line;
53	db_endlp = db_lp + i;
54	return (i);
55}
56
57void
58db_flush_line()
59{
60	db_lp = db_line;
61	db_endlp = db_line;
62}
63
64int	db_look_char = 0;
65
66int
67db_read_char()
68{
69	int	c;
70
71	if (db_look_char != 0) {
72	    c = db_look_char;
73	    db_look_char = 0;
74	}
75	else if (db_lp >= db_endlp)
76	    c = -1;
77	else
78	    c = *db_lp++;
79	return (c);
80}
81
82void
83db_unread_char(c)
84	int c;
85{
86	db_look_char = c;
87}
88
89int	db_look_token = 0;
90
91void
92db_unread_token(t)
93	int	t;
94{
95	db_look_token = t;
96}
97
98int
99db_read_token()
100{
101	int	t;
102
103	if (db_look_token) {
104	    t = db_look_token;
105	    db_look_token = 0;
106	}
107	else
108	    t = db_lex();
109	return (t);
110}
111
112int	db_tok_number;
113char	db_tok_string[TOK_STRING_SIZE];
114
115int	db_radix = 16;
116
117void
118db_flush_lex()
119{
120	db_flush_line();
121	db_look_char = 0;
122	db_look_token = 0;
123}
124
125int
126db_lex()
127{
128	int	c;
129
130	c = db_read_char();
131	while (c <= ' ' || c > '~') {
132	    if (c == '\n' || c == -1)
133		return (tEOL);
134	    c = db_read_char();
135	}
136
137	if (c >= '0' && c <= '9') {
138	    /* number */
139	    int	r, digit = 0;
140
141	    if (c > '0')
142		r = db_radix;
143	    else {
144		c = db_read_char();
145		if (c == 'O' || c == 'o')
146		    r = 8;
147		else if (c == 'T' || c == 't')
148		    r = 10;
149		else if (c == 'X' || c == 'x')
150		    r = 16;
151		else {
152		    r = db_radix;
153		    db_unread_char(c);
154		}
155		c = db_read_char();
156	    }
157	    db_tok_number = 0;
158	    for (;;) {
159		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
160		    digit = c - '0';
161		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
162				     (c >= 'a' && c <= 'f'))) {
163		    if (c >= 'a')
164			digit = c - 'a' + 10;
165		    else if (c >= 'A')
166			digit = c - 'A' + 10;
167		}
168		else
169		    break;
170		db_tok_number = db_tok_number * r + digit;
171		c = db_read_char();
172	    }
173	    if ((c >= '0' && c <= '9') ||
174		(c >= 'A' && c <= 'Z') ||
175		(c >= 'a' && c <= 'z') ||
176		(c == '_'))
177	    {
178		db_error("Bad character in number\n");
179		db_flush_lex();
180		return (tEOF);
181	    }
182	    db_unread_char(c);
183	    return (tNUMBER);
184	}
185	if ((c >= 'A' && c <= 'Z') ||
186	    (c >= 'a' && c <= 'z') ||
187	    c == '_' || c == '\\')
188	{
189	    /* string */
190	    char *cp;
191
192	    cp = db_tok_string;
193	    if (c == '\\') {
194		c = db_read_char();
195		if (c == '\n' || c == -1)
196		    db_error("Bad escape\n");
197	    }
198	    *cp++ = c;
199	    while (1) {
200		c = db_read_char();
201		if ((c >= 'A' && c <= 'Z') ||
202		    (c >= 'a' && c <= 'z') ||
203		    (c >= '0' && c <= '9') ||
204		    c == '_' || c == '\\' || c == ':')
205		{
206		    if (c == '\\') {
207			c = db_read_char();
208			if (c == '\n' || c == -1)
209			    db_error("Bad escape\n");
210		    }
211		    *cp++ = c;
212		    if (cp == db_tok_string+sizeof(db_tok_string)) {
213			db_error("String too long\n");
214			db_flush_lex();
215			return (tEOF);
216		    }
217		    continue;
218		}
219		else {
220		    *cp = '\0';
221		    break;
222		}
223	    }
224	    db_unread_char(c);
225	    return (tIDENT);
226	}
227
228	switch (c) {
229	    case '+':
230		return (tPLUS);
231	    case '-':
232		return (tMINUS);
233	    case '.':
234		c = db_read_char();
235		if (c == '.')
236		    return (tDOTDOT);
237		db_unread_char(c);
238		return (tDOT);
239	    case '*':
240		return (tSTAR);
241	    case '/':
242		return (tSLASH);
243	    case '=':
244		return (tEQ);
245	    case '%':
246		return (tPCT);
247	    case '#':
248		return (tHASH);
249	    case '(':
250		return (tLPAREN);
251	    case ')':
252		return (tRPAREN);
253	    case ',':
254		return (tCOMMA);
255	    case '"':
256		return (tDITTO);
257	    case '$':
258		return (tDOLLAR);
259	    case '!':
260		return (tEXCL);
261	    case '<':
262		c = db_read_char();
263		if (c == '<')
264		    return (tSHIFT_L);
265		db_unread_char(c);
266		break;
267	    case '>':
268		c = db_read_char();
269		if (c == '>')
270		    return (tSHIFT_R);
271		db_unread_char(c);
272		break;
273	    case -1:
274		return (tEOF);
275	}
276	db_printf("Bad character\n");
277	db_flush_lex();
278	return (tEOF);
279}
280