1/*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 *   list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 *   this list of conditions and the following disclaimer in the documentation
16 *   and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * The lexer for bc.
33 *
34 */
35
36#if BC_ENABLED
37
38#include <assert.h>
39#include <ctype.h>
40#include <string.h>
41
42#include <bc.h>
43#include <vm.h>
44
45static void bc_lex_identifier(BcLex *l) {
46
47	size_t i;
48	const char *buf = l->buf + l->i - 1;
49
50	for (i = 0; i < bc_lex_kws_len; ++i) {
51
52		const BcLexKeyword *kw = bc_lex_kws + i;
53		size_t n = BC_LEX_KW_LEN(kw);
54
55		if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') {
56
57			l->t = BC_LEX_KW_AUTO + (BcLexType) i;
58
59			if (!BC_LEX_KW_POSIX(kw))
60				bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name);
61
62			// We minus 1 because the index has already been incremented.
63			l->i += n - 1;
64			return;
65		}
66	}
67
68	bc_lex_name(l);
69
70	if (BC_ERR(l->str.len - 1 > 1))
71		bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v);
72}
73
74static void bc_lex_string(BcLex *l) {
75
76	size_t len, nlines = 0, i = l->i;
77	const char *buf = l->buf;
78	char c;
79
80	l->t = BC_LEX_STR;
81
82	for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n';
83
84	if (BC_ERR(c == '\0')) {
85		l->i = i;
86		bc_lex_err(l, BC_ERR_PARSE_STRING);
87	}
88
89	len = i - l->i;
90	bc_vec_string(&l->str, len, l->buf + l->i);
91
92	l->i = i + 1;
93	l->line += nlines;
94}
95
96static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) {
97	if (l->buf[l->i] == '=') {
98		l->i += 1;
99		l->t = with;
100	}
101	else l->t = without;
102}
103
104void bc_lex_token(BcLex *l) {
105
106	char c = l->buf[l->i++], c2;
107
108	// This is the workhorse of the lexer.
109	switch (c) {
110
111		case '\0':
112		case '\n':
113		case '\t':
114		case '\v':
115		case '\f':
116		case '\r':
117		case ' ':
118		{
119			bc_lex_commonTokens(l, c);
120			break;
121		}
122
123		case '!':
124		{
125			bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
126
127			if (l->t == BC_LEX_OP_BOOL_NOT)
128				bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!");
129
130			break;
131		}
132
133		case '"':
134		{
135			bc_lex_string(l);
136			break;
137		}
138
139		case '#':
140		{
141			bc_lex_err(l, BC_ERR_POSIX_COMMENT);
142			bc_lex_lineComment(l);
143			break;
144		}
145
146		case '%':
147		{
148			bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS);
149			break;
150		}
151
152		case '&':
153		{
154			c2 = l->buf[l->i];
155			if (BC_NO_ERR(c2 == '&')) {
156
157				bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&");
158
159				l->i += 1;
160				l->t = BC_LEX_OP_BOOL_AND;
161			}
162			else bc_lex_invalidChar(l, c);
163
164			break;
165		}
166#if BC_ENABLE_EXTRA_MATH
167		case '$':
168		{
169			l->t = BC_LEX_OP_TRUNC;
170			break;
171		}
172
173		case '@':
174		{
175			bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES);
176			break;
177		}
178#endif // BC_ENABLE_EXTRA_MATH
179		case '(':
180		case ')':
181		{
182			l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN);
183			break;
184		}
185
186		case '*':
187		{
188			bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY);
189			break;
190		}
191
192		case '+':
193		{
194			c2 = l->buf[l->i];
195			if (c2 == '+') {
196				l->i += 1;
197				l->t = BC_LEX_OP_INC;
198			}
199			else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS);
200			break;
201		}
202
203		case ',':
204		{
205			l->t = BC_LEX_COMMA;
206			break;
207		}
208
209		case '-':
210		{
211			c2 = l->buf[l->i];
212			if (c2 == '-') {
213				l->i += 1;
214				l->t = BC_LEX_OP_DEC;
215			}
216			else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS);
217			break;
218		}
219
220		case '.':
221		{
222			c2 = l->buf[l->i];
223			if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c);
224			else {
225				l->t = BC_LEX_KW_LAST;
226				bc_lex_err(l, BC_ERR_POSIX_DOT);
227			}
228			break;
229		}
230
231		case '/':
232		{
233			c2 = l->buf[l->i];
234			if (c2 =='*') bc_lex_comment(l);
235			else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE);
236			break;
237		}
238
239		case '0':
240		case '1':
241		case '2':
242		case '3':
243		case '4':
244		case '5':
245		case '6':
246		case '7':
247		case '8':
248		case '9':
249		case 'A':
250		case 'B':
251		case 'C':
252		case 'D':
253		case 'E':
254		case 'F':
255		// Apparently, GNU bc (and maybe others) allows any uppercase letter as
256		// a number. When single digits, they act like the ones above. When
257		// multi-digit, any letter above the input base is automatically set to
258		// the biggest allowable digit in the input base.
259		case 'G':
260		case 'H':
261		case 'I':
262		case 'J':
263		case 'K':
264		case 'L':
265		case 'M':
266		case 'N':
267		case 'O':
268		case 'P':
269		case 'Q':
270		case 'R':
271		case 'S':
272		case 'T':
273		case 'U':
274		case 'V':
275		case 'W':
276		case 'X':
277		case 'Y':
278		case 'Z':
279		{
280			bc_lex_number(l, c);
281			break;
282		}
283
284		case ';':
285		{
286			l->t = BC_LEX_SCOLON;
287			break;
288		}
289
290		case '<':
291		{
292#if BC_ENABLE_EXTRA_MATH
293			c2 = l->buf[l->i];
294
295			if (c2 == '<') {
296				l->i += 1;
297				bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT);
298				break;
299			}
300#endif // BC_ENABLE_EXTRA_MATH
301			bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT);
302			break;
303		}
304
305		case '=':
306		{
307			bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
308			break;
309		}
310
311		case '>':
312		{
313#if BC_ENABLE_EXTRA_MATH
314			c2 = l->buf[l->i];
315
316			if (c2 == '>') {
317				l->i += 1;
318				bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT);
319				break;
320			}
321#endif // BC_ENABLE_EXTRA_MATH
322			bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT);
323			break;
324		}
325
326		case '[':
327		case ']':
328		{
329			l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET);
330			break;
331		}
332
333		case '\\':
334		{
335			if (BC_NO_ERR(l->buf[l->i] == '\n')) {
336				l->i += 1;
337				l->t = BC_LEX_WHITESPACE;
338			}
339			else bc_lex_invalidChar(l, c);
340			break;
341		}
342
343		case '^':
344		{
345			bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER);
346			break;
347		}
348
349		case 'a':
350		case 'b':
351		case 'c':
352		case 'd':
353		case 'e':
354		case 'f':
355		case 'g':
356		case 'h':
357		case 'i':
358		case 'j':
359		case 'k':
360		case 'l':
361		case 'm':
362		case 'n':
363		case 'o':
364		case 'p':
365		case 'q':
366		case 'r':
367		case 's':
368		case 't':
369		case 'u':
370		case 'v':
371		case 'w':
372		case 'x':
373		case 'y':
374		case 'z':
375		{
376			bc_lex_identifier(l);
377			break;
378		}
379
380		case '{':
381		case '}':
382		{
383			l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE);
384			break;
385		}
386
387		case '|':
388		{
389			c2 = l->buf[l->i];
390
391			if (BC_NO_ERR(c2 == '|')) {
392
393				bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||");
394
395				l->i += 1;
396				l->t = BC_LEX_OP_BOOL_OR;
397			}
398			else bc_lex_invalidChar(l, c);
399
400			break;
401		}
402
403		default:
404		{
405			bc_lex_invalidChar(l, c);
406		}
407	}
408}
409#endif // BC_ENABLED
410