1/*-
2 * SPDX-License-Identifier: MIT-CMU
3 *
4 * Mach Operating System
5 * Copyright (c) 1991,1990 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28/*
29 *	Author: David B. Golub, Carnegie Mellon University
30 *	Date:	7/90
31 */
32/*
33 * Lexical analyzer.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38
39#include <sys/param.h>
40#include <sys/libkern.h>
41
42#include <ddb/ddb.h>
43#include <ddb/db_lex.h>
44
45static char	db_line[DB_MAXLINE];
46static char *	db_lp, *db_endlp;
47
48static int	db_lex(void);
49static void 	db_flush_line(void);
50static int 	db_read_char(void);
51static void 	db_unread_char(int);
52
53int
54db_read_line(void)
55{
56	int	i;
57
58	i = db_readline(db_line, sizeof(db_line));
59	if (i == 0)
60	    return (0);	/* EOI */
61	db_lp = db_line;
62	db_endlp = db_lp + i;
63	return (i);
64}
65
66/*
67 * Simulate a line of input into DDB.
68 */
69void
70db_inject_line(const char *command)
71{
72
73	strlcpy(db_line, command, sizeof(db_line));
74	db_lp = db_line;
75	db_endlp = db_lp + strlen(command);
76}
77
78/*
79 * In rare cases, we may want to pull the remainder of the line input
80 * verbatim, rather than lexing it.  For example, when assigning literal
81 * values associated with scripts.  In that case, return a static pointer to
82 * the current location in the input buffer.  The caller must be aware that
83 * the contents are not stable if other lex/input calls are made.
84 */
85char *
86db_get_line(void)
87{
88
89	return (db_lp);
90}
91
92static void
93db_flush_line()
94{
95	db_lp = db_line;
96	db_endlp = db_line;
97}
98
99static int	db_look_char = 0;
100
101static int
102db_read_char(void)
103{
104	int	c;
105
106	if (db_look_char != 0) {
107	    c = db_look_char;
108	    db_look_char = 0;
109	}
110	else if (db_lp >= db_endlp)
111	    c = -1;
112	else
113	    c = *db_lp++;
114	return (c);
115}
116
117static void
118db_unread_char(c)
119	int c;
120{
121	db_look_char = c;
122}
123
124static int	db_look_token = 0;
125
126void
127db_unread_token(t)
128	int	t;
129{
130	db_look_token = t;
131}
132
133int
134db_read_token()
135{
136	int	t;
137
138	if (db_look_token) {
139	    t = db_look_token;
140	    db_look_token = 0;
141	}
142	else
143	    t = db_lex();
144	return (t);
145}
146
147db_expr_t	db_tok_number;
148char	db_tok_string[TOK_STRING_SIZE];
149
150db_expr_t	db_radix = 16;
151
152void
153db_flush_lex(void)
154{
155	db_flush_line();
156	db_look_char = 0;
157	db_look_token = 0;
158}
159
160static int
161db_lex(void)
162{
163	int	c;
164
165	c = db_read_char();
166	while (c <= ' ' || c > '~') {
167	    if (c == '\n' || c == -1)
168		return (tEOL);
169	    c = db_read_char();
170	}
171
172	if (c >= '0' && c <= '9') {
173	    /* number */
174	    int	r, digit = 0;
175
176	    if (c > '0')
177		r = db_radix;
178	    else {
179		c = db_read_char();
180		if (c == 'O' || c == 'o')
181		    r = 8;
182		else if (c == 'T' || c == 't')
183		    r = 10;
184		else if (c == 'X' || c == 'x')
185		    r = 16;
186		else {
187		    r = db_radix;
188		    db_unread_char(c);
189		}
190		c = db_read_char();
191	    }
192	    db_tok_number = 0;
193	    for (;;) {
194		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
195		    digit = c - '0';
196		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
197				     (c >= 'a' && c <= 'f'))) {
198		    if (c >= 'a')
199			digit = c - 'a' + 10;
200		    else if (c >= 'A')
201			digit = c - 'A' + 10;
202		}
203		else
204		    break;
205		db_tok_number = db_tok_number * r + digit;
206		c = db_read_char();
207	    }
208	    if ((c >= '0' && c <= '9') ||
209		(c >= 'A' && c <= 'Z') ||
210		(c >= 'a' && c <= 'z') ||
211		(c == '_'))
212	    {
213		db_error("Bad character in number\n");
214		db_flush_lex();
215		return (tEOF);
216	    }
217	    db_unread_char(c);
218	    return (tNUMBER);
219	}
220	if ((c >= 'A' && c <= 'Z') ||
221	    (c >= 'a' && c <= 'z') ||
222	    c == '_' || c == '\\')
223	{
224	    /* string */
225	    char *cp;
226
227	    cp = db_tok_string;
228	    if (c == '\\') {
229		c = db_read_char();
230		if (c == '\n' || c == -1)
231		    db_error("Bad escape\n");
232	    }
233	    *cp++ = c;
234	    while (1) {
235		c = db_read_char();
236		if ((c >= 'A' && c <= 'Z') ||
237		    (c >= 'a' && c <= 'z') ||
238		    (c >= '0' && c <= '9') ||
239		    c == '_' || c == '\\' || c == ':' || c == '.')
240		{
241		    if (c == '\\') {
242			c = db_read_char();
243			if (c == '\n' || c == -1)
244			    db_error("Bad escape\n");
245		    }
246		    *cp++ = c;
247		    if (cp == db_tok_string+sizeof(db_tok_string)) {
248			db_error("String too long\n");
249			db_flush_lex();
250			return (tEOF);
251		    }
252		    continue;
253		}
254		else {
255		    *cp = '\0';
256		    break;
257		}
258	    }
259	    db_unread_char(c);
260	    return (tIDENT);
261	}
262
263	switch (c) {
264	    case '+':
265		return (tPLUS);
266	    case '-':
267		return (tMINUS);
268	    case '.':
269		c = db_read_char();
270		if (c == '.')
271		    return (tDOTDOT);
272		db_unread_char(c);
273		return (tDOT);
274	    case '*':
275		return (tSTAR);
276	    case '/':
277		return (tSLASH);
278	    case '=':
279		c = db_read_char();
280		if (c == '=')
281		    return (tLOG_EQ);
282		db_unread_char(c);
283		return (tEQ);
284	    case '%':
285		return (tPCT);
286	    case '#':
287		return (tHASH);
288	    case '(':
289		return (tLPAREN);
290	    case ')':
291		return (tRPAREN);
292	    case ',':
293		return (tCOMMA);
294	    case '"':
295		return (tDITTO);
296	    case '$':
297		return (tDOLLAR);
298	    case '!':
299		c = db_read_char();
300		if (c == '='){
301			return (tLOG_NOT_EQ);
302		}
303		db_unread_char(c);
304		return (tEXCL);
305	    case ';':
306		return (tSEMI);
307	    case '&':
308		c = db_read_char();
309		if (c == '&')
310		    return (tLOG_AND);
311		db_unread_char(c);
312		return (tBIT_AND);
313	    case '|':
314		c = db_read_char();
315		if (c == '|')
316		    return (tLOG_OR);
317		db_unread_char(c);
318		return (tBIT_OR);
319	    case '<':
320		c = db_read_char();
321		if (c == '<')
322		    return (tSHIFT_L);
323		if (c == '=')
324		    return (tLESS_EQ);
325		db_unread_char(c);
326		return (tLESS);
327	    case '>':
328		c = db_read_char();
329		if (c == '>')
330		    return (tSHIFT_R);
331		if (c == '=')
332		    return (tGREATER_EQ);
333		db_unread_char(c);
334		return (tGREATER);
335	    case '?':
336		return (tQUESTION);
337	    case '~':
338		return (tBIT_NOT);
339	    case -1:
340		return (tEOF);
341	}
342	db_printf("Bad character\n");
343	db_flush_lex();
344	return (tEOF);
345}
346