1/*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 *   list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 *   this list of conditions and the following disclaimer in the documentation
16 *   and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * Common code for the lexers.
33 *
34 */
35
36#include <assert.h>
37#include <ctype.h>
38#include <stdbool.h>
39#include <string.h>
40
41#include <lex.h>
42#include <vm.h>
43#include <bc.h>
44
45void bc_lex_invalidChar(BcLex *l, char c) {
46	l->t = BC_LEX_INVALID;
47	bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
48}
49
50void bc_lex_lineComment(BcLex *l) {
51	l->t = BC_LEX_WHITESPACE;
52	while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1;
53}
54
55void bc_lex_comment(BcLex *l) {
56
57	size_t i, nlines = 0;
58	const char *buf = l->buf;
59	bool end = false;
60	char c;
61
62	l->i += 1;
63	l->t = BC_LEX_WHITESPACE;
64
65	for (i = l->i; !end; i += !end) {
66
67		for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
68
69		if (BC_ERR(!c || buf[i + 1] == '\0')) {
70			l->i = i;
71			bc_lex_err(l, BC_ERR_PARSE_COMMENT);
72		}
73
74		end = buf[i + 1] == '/';
75	}
76
77	l->i = i + 2;
78	l->line += nlines;
79}
80
81void bc_lex_whitespace(BcLex *l) {
82	char c;
83	l->t = BC_LEX_WHITESPACE;
84	for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]);
85}
86
87void bc_lex_commonTokens(BcLex *l, char c) {
88	if (!c) l->t = BC_LEX_EOF;
89	else if (c == '\n') l->t = BC_LEX_NLINE;
90	else bc_lex_whitespace(l);
91}
92
93static size_t bc_lex_num(BcLex *l, char start, bool int_only) {
94
95	const char *buf = l->buf + l->i;
96	size_t i;
97	char c;
98	bool last_pt, pt = (start == '.');
99
100	for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
101	                             (c == '\\' && buf[i + 1] == '\n')); ++i)
102	{
103		if (c == '\\') {
104
105			if (buf[i + 1] == '\n') {
106
107				i += 2;
108
109				// Make sure to eat whitespace at the beginning of the line.
110				while(isspace(buf[i]) && buf[i] != '\n') i += 1;
111
112				c = buf[i];
113
114				if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
115			}
116			else break;
117		}
118
119		last_pt = (c == '.');
120		if (pt && last_pt) break;
121		pt = pt || last_pt;
122
123		bc_vec_push(&l->str, &c);
124	}
125
126	return i;
127}
128
129void bc_lex_number(BcLex *l, char start) {
130
131	l->t = BC_LEX_NUMBER;
132
133	bc_vec_popAll(&l->str);
134	bc_vec_push(&l->str, &start);
135
136	l->i += bc_lex_num(l, start, false);
137
138#if BC_ENABLE_EXTRA_MATH
139	{
140		char c = l->buf[l->i];
141
142		if (c == 'e') {
143
144#if BC_ENABLED
145			if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM);
146#endif // BC_ENABLED
147
148			bc_vec_push(&l->str, &c);
149			l->i += 1;
150			c = l->buf[l->i];
151
152			if (c == BC_LEX_NEG_CHAR) {
153				bc_vec_push(&l->str, &c);
154				l->i += 1;
155				c = l->buf[l->i];
156			}
157
158			if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
159				bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
160
161			l->i += bc_lex_num(l, 0, true);
162		}
163	}
164#endif // BC_ENABLE_EXTRA_MATH
165
166	bc_vec_pushByte(&l->str, '\0');
167}
168
169void bc_lex_name(BcLex *l) {
170
171	size_t i = 0;
172	const char *buf = l->buf + l->i - 1;
173	char c = buf[i];
174
175	l->t = BC_LEX_NAME;
176
177	while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i];
178
179	bc_vec_string(&l->str, i, buf);
180
181	// Increment the index. We minus 1 because it has already been incremented.
182	l->i += i - 1;
183}
184
185void bc_lex_init(BcLex *l) {
186	BC_SIG_ASSERT_LOCKED;
187	assert(l != NULL);
188	bc_vec_init(&l->str, sizeof(char), NULL);
189}
190
191void bc_lex_free(BcLex *l) {
192	BC_SIG_ASSERT_LOCKED;
193	assert(l != NULL);
194	bc_vec_free(&l->str);
195}
196
197void bc_lex_file(BcLex *l, const char *file) {
198	assert(l != NULL && file != NULL);
199	l->line = 1;
200	vm.file = file;
201}
202
203void bc_lex_next(BcLex *l) {
204
205	assert(l != NULL);
206
207	l->last = l->t;
208	l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
209
210	if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF);
211
212	l->t = BC_LEX_EOF;
213
214	if (l->i == l->len) return;
215
216	// Loop until failure or we don't have whitespace. This
217	// is so the parser doesn't get inundated with whitespace.
218	do {
219		vm.next(l);
220	} while (l->t == BC_LEX_WHITESPACE);
221}
222
223void bc_lex_text(BcLex *l, const char *text) {
224	assert(l != NULL && text != NULL);
225	l->buf = text;
226	l->i = 0;
227	l->len = strlen(text);
228	l->t = l->last = BC_LEX_INVALID;
229	bc_lex_next(l);
230}
231