1/*-
2 * SPDX-License-Identifier: MIT-CMU
3 *
4 * Mach Operating System
5 * Copyright (c) 1991,1990 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28/*
29 *	Author: David B. Golub, Carnegie Mellon University
30 *	Date:	7/90
31 */
32/*
33 * Lexical analyzer.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38
39#include <sys/param.h>
40#include <sys/libkern.h>
41#include <sys/lock.h>
42
43#include <ddb/ddb.h>
44#include <ddb/db_lex.h>
45
46static char	db_line[DB_MAXLINE];
47static char *	db_lp, *db_endlp;
48
49static int	db_lex(int);
50static void 	db_flush_line(void);
51static int 	db_read_char(void);
52static void 	db_unread_char(int);
53
54int
55db_read_line(void)
56{
57	int	i;
58
59	i = db_readline(db_line, sizeof(db_line));
60	if (i == 0)
61	    return (0);	/* EOI */
62	db_lp = db_line;
63	db_endlp = db_lp + i;
64	return (i);
65}
66
67/*
68 * Simulate a line of input into DDB.
69 */
70void
71db_inject_line(const char *command)
72{
73
74	strlcpy(db_line, command, sizeof(db_line));
75	db_lp = db_line;
76	db_endlp = db_lp + strlen(command);
77}
78
79/*
80 * In rare cases, we may want to pull the remainder of the line input
81 * verbatim, rather than lexing it.  For example, when assigning literal
82 * values associated with scripts.  In that case, return a static pointer to
83 * the current location in the input buffer.  The caller must be aware that
84 * the contents are not stable if other lex/input calls are made.
85 */
86char *
87db_get_line(void)
88{
89
90	return (db_lp);
91}
92
93static void
94db_flush_line()
95{
96	db_lp = db_line;
97	db_endlp = db_line;
98}
99
100static int	db_look_char = 0;
101
102static int
103db_read_char(void)
104{
105	int	c;
106
107	if (db_look_char != 0) {
108	    c = db_look_char;
109	    db_look_char = 0;
110	}
111	else if (db_lp >= db_endlp)
112	    c = -1;
113	else
114	    c = *db_lp++;
115	return (c);
116}
117
118static void
119db_unread_char(c)
120	int c;
121{
122	db_look_char = c;
123}
124
125static int	db_look_token = 0;
126
127void
128db_unread_token(int t)
129{
130	db_look_token = t;
131}
132
133int
134db_read_token_flags(int flags)
135{
136	int	t;
137
138	MPASS((flags & ~(DRT_VALID_FLAGS_MASK)) == 0);
139
140	if (db_look_token) {
141	    t = db_look_token;
142	    db_look_token = 0;
143	}
144	else
145	    t = db_lex(flags);
146	return (t);
147}
148
149db_expr_t	db_tok_number;
150char	db_tok_string[TOK_STRING_SIZE];
151
152db_expr_t	db_radix = 16;
153
154void
155db_flush_lex(void)
156{
157	db_flush_line();
158	db_look_char = 0;
159	db_look_token = 0;
160}
161
162static int
163db_lex(int flags)
164{
165	int	c, n, radix_mode;
166	bool	lex_wspace, lex_hex_numbers;
167
168	switch (flags & DRT_RADIX_MASK) {
169	case DRT_DEFAULT_RADIX:
170		radix_mode = -1;
171		break;
172	case DRT_OCTAL:
173		radix_mode = 8;
174		break;
175	case DRT_DECIMAL:
176		radix_mode = 10;
177		break;
178	case DRT_HEXADECIMAL:
179		radix_mode = 16;
180		break;
181	}
182
183	lex_wspace = ((flags & DRT_WSPACE) != 0);
184	lex_hex_numbers = ((flags & DRT_HEX) != 0);
185
186	c = db_read_char();
187	for (n = 0; c <= ' ' || c > '~'; n++) {
188	    if (c == '\n' || c == -1)
189		return (tEOL);
190	    c = db_read_char();
191	}
192	if (lex_wspace && n != 0) {
193	    db_unread_char(c);
194	    return (tWSPACE);
195	}
196
197	if ((c >= '0' && c <= '9') ||
198	   (lex_hex_numbers &&
199	   ((c >= 'a' && c <= 'f') ||
200	   (c >= 'A' && c <= 'F')))) {
201	    /* number */
202	    int	r, digit = 0;
203
204	    if (radix_mode != -1)
205		r = radix_mode;
206	    else if (c != '0')
207		r = db_radix;
208	    else {
209		c = db_read_char();
210		if (c == 'O' || c == 'o')
211		    r = 8;
212		else if (c == 'T' || c == 't')
213		    r = 10;
214		else if (c == 'X' || c == 'x')
215		    r = 16;
216		else {
217		    r = db_radix;
218		    db_unread_char(c);
219		}
220		c = db_read_char();
221	    }
222	    db_tok_number = 0;
223	    for (;;) {
224		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
225		    digit = c - '0';
226		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
227				     (c >= 'a' && c <= 'f'))) {
228		    if (c >= 'a')
229			digit = c - 'a' + 10;
230		    else if (c >= 'A')
231			digit = c - 'A' + 10;
232		}
233		else
234		    break;
235		db_tok_number = db_tok_number * r + digit;
236		c = db_read_char();
237	    }
238	    if ((c >= '0' && c <= '9') ||
239		(c >= 'A' && c <= 'Z') ||
240		(c >= 'a' && c <= 'z') ||
241		(c == '_'))
242	    {
243		db_error("Bad character in number\n");
244		db_flush_lex();
245		return (tEOF);
246	    }
247	    db_unread_char(c);
248	    return (tNUMBER);
249	}
250	if ((c >= 'A' && c <= 'Z') ||
251	    (c >= 'a' && c <= 'z') ||
252	    c == '_' || c == '\\')
253	{
254	    /* string */
255	    char *cp;
256
257	    cp = db_tok_string;
258	    if (c == '\\') {
259		c = db_read_char();
260		if (c == '\n' || c == -1)
261		    db_error("Bad escape\n");
262	    }
263	    *cp++ = c;
264	    while (1) {
265		c = db_read_char();
266		if ((c >= 'A' && c <= 'Z') ||
267		    (c >= 'a' && c <= 'z') ||
268		    (c >= '0' && c <= '9') ||
269		    c == '_' || c == '\\' || c == ':' || c == '.')
270		{
271		    if (c == '\\') {
272			c = db_read_char();
273			if (c == '\n' || c == -1)
274			    db_error("Bad escape\n");
275		    }
276		    *cp++ = c;
277		    if (cp == db_tok_string+sizeof(db_tok_string)) {
278			db_error("String too long\n");
279			db_flush_lex();
280			return (tEOF);
281		    }
282		    continue;
283		}
284		else {
285		    *cp = '\0';
286		    break;
287		}
288	    }
289	    db_unread_char(c);
290	    return (tIDENT);
291	}
292
293	switch (c) {
294	    case '+':
295		return (tPLUS);
296	    case '-':
297		return (tMINUS);
298	    case '.':
299		c = db_read_char();
300		if (c == '.')
301		    return (tDOTDOT);
302		db_unread_char(c);
303		return (tDOT);
304	    case '*':
305		return (tSTAR);
306	    case '/':
307		return (tSLASH);
308	    case '=':
309		c = db_read_char();
310		if (c == '=')
311		    return (tLOG_EQ);
312		db_unread_char(c);
313		return (tEQ);
314	    case '%':
315		return (tPCT);
316	    case '#':
317		return (tHASH);
318	    case '(':
319		return (tLPAREN);
320	    case ')':
321		return (tRPAREN);
322	    case ',':
323		return (tCOMMA);
324	    case '"':
325		return (tDITTO);
326	    case '$':
327		return (tDOLLAR);
328	    case '!':
329		c = db_read_char();
330		if (c == '='){
331			return (tLOG_NOT_EQ);
332		}
333		db_unread_char(c);
334		return (tEXCL);
335	    case ':':
336		c = db_read_char();
337		if (c == ':')
338			return (tCOLONCOLON);
339		db_unread_char(c);
340		return (tCOLON);
341	    case ';':
342		return (tSEMI);
343	    case '&':
344		c = db_read_char();
345		if (c == '&')
346		    return (tLOG_AND);
347		db_unread_char(c);
348		return (tBIT_AND);
349	    case '|':
350		c = db_read_char();
351		if (c == '|')
352		    return (tLOG_OR);
353		db_unread_char(c);
354		return (tBIT_OR);
355	    case '<':
356		c = db_read_char();
357		if (c == '<')
358		    return (tSHIFT_L);
359		if (c == '=')
360		    return (tLESS_EQ);
361		db_unread_char(c);
362		return (tLESS);
363	    case '>':
364		c = db_read_char();
365		if (c == '>')
366		    return (tSHIFT_R);
367		if (c == '=')
368		    return (tGREATER_EQ);
369		db_unread_char(c);
370		return (tGREATER);
371	    case '?':
372		return (tQUESTION);
373	    case '~':
374		return (tBIT_NOT);
375	    case -1:
376		return (tEOF);
377	}
378	db_printf("Bad character\n");
379	db_flush_lex();
380	return (tEOF);
381}
382