db_lex.c revision 12720
1/*
2 * Mach Operating System
3 * Copyright (c) 1991,1990 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
19 *  School of Computer Science
20 *  Carnegie Mellon University
21 *  Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie the
24 * rights to redistribute these changes.
25 *
26 *	$Id: db_lex.c,v 1.9 1995/12/07 12:44:52 davidg Exp $
27 */
28
29/*
30 *	Author: David B. Golub, Carnegie Mellon University
31 *	Date:	7/90
32 */
33/*
34 * Lexical analyzer.
35 */
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <vm/vm_param.h>
39#include <ddb/ddb.h>
40#include <ddb/db_lex.h>
41
42static char	db_line[120];
43static char *	db_lp, *db_endlp;
44
45static int	db_lex __P((void));
46static void 	db_flush_line __P((void));
47static int 	db_read_char __P((void));
48static void 	db_unread_char __P((int));
49
50int
51db_read_line()
52{
53	int	i;
54
55	i = db_readline(db_line, sizeof(db_line));
56	if (i == 0)
57	    return (0);	/* EOI */
58	db_lp = db_line;
59	db_endlp = db_lp + i;
60	return (i);
61}
62
63static void
64db_flush_line()
65{
66	db_lp = db_line;
67	db_endlp = db_line;
68}
69
70static int	db_look_char = 0;
71
72static int
73db_read_char()
74{
75	int	c;
76
77	if (db_look_char != 0) {
78	    c = db_look_char;
79	    db_look_char = 0;
80	}
81	else if (db_lp >= db_endlp)
82	    c = -1;
83	else
84	    c = *db_lp++;
85	return (c);
86}
87
88static void
89db_unread_char(c)
90	int c;
91{
92	db_look_char = c;
93}
94
95static int	db_look_token = 0;
96
97void
98db_unread_token(t)
99	int	t;
100{
101	db_look_token = t;
102}
103
104int
105db_read_token()
106{
107	int	t;
108
109	if (db_look_token) {
110	    t = db_look_token;
111	    db_look_token = 0;
112	}
113	else
114	    t = db_lex();
115	return (t);
116}
117
118int	db_tok_number;
119char	db_tok_string[TOK_STRING_SIZE];
120
121int	db_radix = 16;
122
123void
124db_flush_lex()
125{
126	db_flush_line();
127	db_look_char = 0;
128	db_look_token = 0;
129}
130
131static int
132db_lex()
133{
134	int	c;
135
136	c = db_read_char();
137	while (c <= ' ' || c > '~') {
138	    if (c == '\n' || c == -1)
139		return (tEOL);
140	    c = db_read_char();
141	}
142
143	if (c >= '0' && c <= '9') {
144	    /* number */
145	    int	r, digit = 0;
146
147	    if (c > '0')
148		r = db_radix;
149	    else {
150		c = db_read_char();
151		if (c == 'O' || c == 'o')
152		    r = 8;
153		else if (c == 'T' || c == 't')
154		    r = 10;
155		else if (c == 'X' || c == 'x')
156		    r = 16;
157		else {
158		    r = db_radix;
159		    db_unread_char(c);
160		}
161		c = db_read_char();
162	    }
163	    db_tok_number = 0;
164	    for (;;) {
165		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
166		    digit = c - '0';
167		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
168				     (c >= 'a' && c <= 'f'))) {
169		    if (c >= 'a')
170			digit = c - 'a' + 10;
171		    else if (c >= 'A')
172			digit = c - 'A' + 10;
173		}
174		else
175		    break;
176		db_tok_number = db_tok_number * r + digit;
177		c = db_read_char();
178	    }
179	    if ((c >= '0' && c <= '9') ||
180		(c >= 'A' && c <= 'Z') ||
181		(c >= 'a' && c <= 'z') ||
182		(c == '_'))
183	    {
184		db_error("Bad character in number\n");
185		db_flush_lex();
186		return (tEOF);
187	    }
188	    db_unread_char(c);
189	    return (tNUMBER);
190	}
191	if ((c >= 'A' && c <= 'Z') ||
192	    (c >= 'a' && c <= 'z') ||
193	    c == '_' || c == '\\')
194	{
195	    /* string */
196	    char *cp;
197
198	    cp = db_tok_string;
199	    if (c == '\\') {
200		c = db_read_char();
201		if (c == '\n' || c == -1)
202		    db_error("Bad escape\n");
203	    }
204	    *cp++ = c;
205	    while (1) {
206		c = db_read_char();
207		if ((c >= 'A' && c <= 'Z') ||
208		    (c >= 'a' && c <= 'z') ||
209		    (c >= '0' && c <= '9') ||
210		    c == '_' || c == '\\' || c == ':')
211		{
212		    if (c == '\\') {
213			c = db_read_char();
214			if (c == '\n' || c == -1)
215			    db_error("Bad escape\n");
216		    }
217		    *cp++ = c;
218		    if (cp == db_tok_string+sizeof(db_tok_string)) {
219			db_error("String too long\n");
220			db_flush_lex();
221			return (tEOF);
222		    }
223		    continue;
224		}
225		else {
226		    *cp = '\0';
227		    break;
228		}
229	    }
230	    db_unread_char(c);
231	    return (tIDENT);
232	}
233
234	switch (c) {
235	    case '+':
236		return (tPLUS);
237	    case '-':
238		return (tMINUS);
239	    case '.':
240		c = db_read_char();
241		if (c == '.')
242		    return (tDOTDOT);
243		db_unread_char(c);
244		return (tDOT);
245	    case '*':
246		return (tSTAR);
247	    case '/':
248		return (tSLASH);
249	    case '=':
250		return (tEQ);
251	    case '%':
252		return (tPCT);
253	    case '#':
254		return (tHASH);
255	    case '(':
256		return (tLPAREN);
257	    case ')':
258		return (tRPAREN);
259	    case ',':
260		return (tCOMMA);
261	    case '"':
262		return (tDITTO);
263	    case '$':
264		return (tDOLLAR);
265	    case '!':
266		return (tEXCL);
267	    case '<':
268		c = db_read_char();
269		if (c == '<')
270		    return (tSHIFT_L);
271		db_unread_char(c);
272		break;
273	    case '>':
274		c = db_read_char();
275		if (c == '>')
276		    return (tSHIFT_R);
277		db_unread_char(c);
278		break;
279	    case -1:
280		return (tEOF);
281	}
282	db_printf("Bad character\n");
283	db_flush_lex();
284	return (tEOF);
285}
286