db_lex.c revision 623
1/*
2 * Mach Operating System
3 * Copyright (c) 1991,1990 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
19 *  School of Computer Science
20 *  Carnegie Mellon University
21 *  Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie the
24 * rights to redistribute these changes.
25 *
26 *	$Id$
27 */
28
29/*
30 *	Author: David B. Golub, Carnegie Mellon University
31 *	Date:	7/90
32 */
33/*
34 * Lexical analyzer.
35 */
36#include <ddb/db_lex.h>
37
38char	db_line[120];
39char *	db_lp, *db_endlp;
40
41int
42db_read_line()
43{
44	int	i;
45
46	i = db_readline(db_line, sizeof(db_line));
47	if (i == 0)
48	    return (0);	/* EOI */
49	db_lp = db_line;
50	db_endlp = db_lp + i;
51	return (i);
52}
53
54void
55db_flush_line()
56{
57	db_lp = db_line;
58	db_endlp = db_line;
59}
60
61int	db_look_char = 0;
62
63int
64db_read_char()
65{
66	int	c;
67
68	if (db_look_char != 0) {
69	    c = db_look_char;
70	    db_look_char = 0;
71	}
72	else if (db_lp >= db_endlp)
73	    c = -1;
74	else
75	    c = *db_lp++;
76	return (c);
77}
78
79void
80db_unread_char(c)
81{
82	db_look_char = c;
83}
84
85int	db_look_token = 0;
86
87void
88db_unread_token(t)
89	int	t;
90{
91	db_look_token = t;
92}
93
94int
95db_read_token()
96{
97	int	t;
98
99	if (db_look_token) {
100	    t = db_look_token;
101	    db_look_token = 0;
102	}
103	else
104	    t = db_lex();
105	return (t);
106}
107
108int	db_tok_number;
109char	db_tok_string[TOK_STRING_SIZE];
110
111int	db_radix = 16;
112
113void
114db_flush_lex()
115{
116	db_flush_line();
117	db_look_char = 0;
118	db_look_token = 0;
119}
120
121int
122db_lex()
123{
124	int	c;
125
126	c = db_read_char();
127	while (c <= ' ' || c > '~') {
128	    if (c == '\n' || c == -1)
129		return (tEOL);
130	    c = db_read_char();
131	}
132
133	if (c >= '0' && c <= '9') {
134	    /* number */
135	    int	r, digit;
136
137	    if (c > '0')
138		r = db_radix;
139	    else {
140		c = db_read_char();
141		if (c == 'O' || c == 'o')
142		    r = 8;
143		else if (c == 'T' || c == 't')
144		    r = 10;
145		else if (c == 'X' || c == 'x')
146		    r = 16;
147		else {
148		    r = db_radix;
149		    db_unread_char(c);
150		}
151		c = db_read_char();
152	    }
153	    db_tok_number = 0;
154	    for (;;) {
155		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
156		    digit = c - '0';
157		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
158				     (c >= 'a' && c <= 'f'))) {
159		    if (c >= 'a')
160			digit = c - 'a' + 10;
161		    else if (c >= 'A')
162			digit = c - 'A' + 10;
163		}
164		else
165		    break;
166		db_tok_number = db_tok_number * r + digit;
167		c = db_read_char();
168	    }
169	    if ((c >= '0' && c <= '9') ||
170		(c >= 'A' && c <= 'Z') ||
171		(c >= 'a' && c <= 'z') ||
172		(c == '_'))
173	    {
174		db_error("Bad character in number\n");
175		db_flush_lex();
176		return (tEOF);
177	    }
178	    db_unread_char(c);
179	    return (tNUMBER);
180	}
181	if ((c >= 'A' && c <= 'Z') ||
182	    (c >= 'a' && c <= 'z') ||
183	    c == '_' || c == '\\')
184	{
185	    /* string */
186	    char *cp;
187
188	    cp = db_tok_string;
189	    if (c == '\\') {
190		c = db_read_char();
191		if (c == '\n' || c == -1)
192		    db_error("Bad escape\n");
193	    }
194	    *cp++ = c;
195	    while (1) {
196		c = db_read_char();
197		if ((c >= 'A' && c <= 'Z') ||
198		    (c >= 'a' && c <= 'z') ||
199		    (c >= '0' && c <= '9') ||
200		    c == '_' || c == '\\' || c == ':')
201		{
202		    if (c == '\\') {
203			c = db_read_char();
204			if (c == '\n' || c == -1)
205			    db_error("Bad escape\n");
206		    }
207		    *cp++ = c;
208		    if (cp == db_tok_string+sizeof(db_tok_string)) {
209			db_error("String too long\n");
210			db_flush_lex();
211			return (tEOF);
212		    }
213		    continue;
214		}
215		else {
216		    *cp = '\0';
217		    break;
218		}
219	    }
220	    db_unread_char(c);
221	    return (tIDENT);
222	}
223
224	switch (c) {
225	    case '+':
226		return (tPLUS);
227	    case '-':
228		return (tMINUS);
229	    case '.':
230		c = db_read_char();
231		if (c == '.')
232		    return (tDOTDOT);
233		db_unread_char(c);
234		return (tDOT);
235	    case '*':
236		return (tSTAR);
237	    case '/':
238		return (tSLASH);
239	    case '=':
240		return (tEQ);
241	    case '%':
242		return (tPCT);
243	    case '#':
244		return (tHASH);
245	    case '(':
246		return (tLPAREN);
247	    case ')':
248		return (tRPAREN);
249	    case ',':
250		return (tCOMMA);
251	    case '"':
252		return (tDITTO);
253	    case '$':
254		return (tDOLLAR);
255	    case '!':
256		return (tEXCL);
257	    case '<':
258		c = db_read_char();
259		if (c == '<')
260		    return (tSHIFT_L);
261		db_unread_char(c);
262		break;
263	    case '>':
264		c = db_read_char();
265		if (c == '>')
266		    return (tSHIFT_R);
267		db_unread_char(c);
268		break;
269	    case -1:
270		return (tEOF);
271	}
272	db_printf("Bad character\n");
273	db_flush_lex();
274	return (tEOF);
275}
276