1/*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 *   list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 *   this list of conditions and the following disclaimer in the documentation
16 *   and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * Definitions for bc only.
33 *
34 */
35
36#ifndef BC_BC_H
37#define BC_BC_H
38
39#if BC_ENABLED
40
41#include <limits.h>
42#include <stdbool.h>
43
44#include <status.h>
45#include <lex.h>
46#include <parse.h>
47
48/**
49 * The main function for bc. It just sets variables and passes its arguments
50 * through to @a bc_vm_boot().
51 */
52void
53bc_main(int argc, char* argv[]);
54
55// These are references to the help text, the library text, and the "filename"
56// for the library.
57extern const char bc_help[];
58extern const char bc_lib[];
59extern const char* bc_lib_name;
60
61// These are references to the second math library and its "filename."
62#if BC_ENABLE_EXTRA_MATH
63extern const char bc_lib2[];
64extern const char* bc_lib2_name;
65#endif // BC_ENABLE_EXTRA_MATH
66
67/**
68 * A struct containing information about a bc keyword.
69 */
70typedef struct BcLexKeyword
71{
72	/// Holds the length of the keyword along with a bit that, if set, means the
73	/// keyword is used in POSIX bc.
74	uchar data;
75
76	/// The keyword text.
77	const char name[14];
78} BcLexKeyword;
79
80/// Sets the most significant bit. Used for setting the POSIX bit in
81/// BcLexKeyword's data field.
82#define BC_LEX_CHAR_MSB(bit) ((bit) << (CHAR_BIT - 1))
83
84/// Returns non-zero if the keyword is POSIX, zero otherwise.
85#define BC_LEX_KW_POSIX(kw) ((kw)->data & (BC_LEX_CHAR_MSB(1)))
86
87/// Returns the length of the keyword.
88#define BC_LEX_KW_LEN(kw) ((size_t) ((kw)->data & ~(BC_LEX_CHAR_MSB(1))))
89
90/// A macro to easily build a keyword entry. See bc_lex_kws in src/data.c.
91#define BC_LEX_KW_ENTRY(a, b, c)                                              \
92	{                                                                         \
93		.data = ((b) & ~(BC_LEX_CHAR_MSB(1))) | BC_LEX_CHAR_MSB(c), .name = a \
94	}
95
96#if BC_ENABLE_EXTRA_MATH
97
98/// A macro for the number of keywords bc has. This has to be updated if any are
99/// added. This is for the redefined_kws field of the BcVm struct.
100#define BC_LEX_NKWS (37)
101
102#else // BC_ENABLE_EXTRA_MATH
103
104/// A macro for the number of keywords bc has. This has to be updated if any are
105/// added. This is for the redefined_kws field of the BcVm struct.
106#define BC_LEX_NKWS (33)
107
108#endif // BC_ENABLE_EXTRA_MATH
109
110// The array of keywords and its length.
111extern const BcLexKeyword bc_lex_kws[];
112extern const size_t bc_lex_kws_len;
113
114/**
115 * The @a BcLexNext function for bc. (See include/lex.h for a definition of
116 * @a BcLexNext.)
117 * @param l  The lexer.
118 */
119void
120bc_lex_token(BcLex* l);
121
122// The following section is for flags needed when parsing bc code. These flags
123// are complicated, but necessary. Why you ask? Because bc's standard is awful.
124//
125// If you don't believe me, go read the bc Parsing section of the Development
126// manual (manuals/development.md). Then come back.
127//
128// In other words, these flags are the sign declaring, "Here be dragons."
129
130/**
131 * This returns a pointer to the set of flags at the top of the flag stack.
132 * @a p is expected to be a BcParse pointer.
133 * @param p  The parser.
134 * @return   A pointer to the top flag set.
135 */
136#define BC_PARSE_TOP_FLAG_PTR(p) ((uint16_t*) bc_vec_top(&(p)->flags))
137
138/**
139 * This returns the flag set at the top of the flag stack. @a p is expected to
140 * be a BcParse pointer.
141 * @param p  The parser.
142 * @return   The top flag set.
143 */
144#define BC_PARSE_TOP_FLAG(p) (*(BC_PARSE_TOP_FLAG_PTR(p)))
145
146// After this point, all flag #defines are in sets of 2: one to define the flag,
147// and one to define a way to grab the flag from the flag set at the top of the
148// flag stack. All `p` arguments are pointers to a BcParse.
149
150// This flag is set if the parser has seen a left brace.
151#define BC_PARSE_FLAG_BRACE (UINTMAX_C(1) << 0)
152#define BC_PARSE_BRACE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BRACE)
153
154// This flag is set if the parser is parsing inside of the braces of a function
155// body.
156#define BC_PARSE_FLAG_FUNC_INNER (UINTMAX_C(1) << 1)
157#define BC_PARSE_FUNC_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC_INNER)
158
159// This flag is set if the parser is parsing a function. It is different from
160// the one above because it is set if it is parsing a function body *or* header,
161// not just if it's parsing a function body.
162#define BC_PARSE_FLAG_FUNC (UINTMAX_C(1) << 2)
163#define BC_PARSE_FUNC(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC)
164
165// This flag is set if the parser is expecting to parse a body, whether of a
166// function, an if statement, or a loop.
167#define BC_PARSE_FLAG_BODY (UINTMAX_C(1) << 3)
168#define BC_PARSE_BODY(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BODY)
169
170// This flag is set if bc is parsing a loop. This is important because the break
171// and continue keywords are only valid inside of a loop.
172#define BC_PARSE_FLAG_LOOP (UINTMAX_C(1) << 4)
173#define BC_PARSE_LOOP(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP)
174
175// This flag is set if bc is parsing the body of a loop. It is different from
176// the one above the same way @a BC_PARSE_FLAG_FUNC_INNER is different from
177// @a BC_PARSE_FLAG_FUNC.
178#define BC_PARSE_FLAG_LOOP_INNER (UINTMAX_C(1) << 5)
179#define BC_PARSE_LOOP_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP_INNER)
180
181// This flag is set if bc is parsing an if statement.
182#define BC_PARSE_FLAG_IF (UINTMAX_C(1) << 6)
183#define BC_PARSE_IF(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF)
184
185// This flag is set if bc is parsing an else statement. This is important
186// because of "else if" constructions, among other things.
187#define BC_PARSE_FLAG_ELSE (UINTMAX_C(1) << 7)
188#define BC_PARSE_ELSE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_ELSE)
189
190// This flag is set if bc just finished parsing an if statement and its body.
191// It tells the parser that it can probably expect an else statement next. This
192// flag is, thus, one of the most subtle.
193#define BC_PARSE_FLAG_IF_END (UINTMAX_C(1) << 8)
194#define BC_PARSE_IF_END(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF_END)
195
196/**
197 * This returns true if bc is in a state where it should not execute any code
198 * at all.
199 * @param p  The parser.
200 * @return   True if execution cannot proceed, false otherwise.
201 */
202#define BC_PARSE_NO_EXEC(p) ((p)->flags.len != 1 || BC_PARSE_TOP_FLAG(p) != 0)
203
204/**
205 * This returns true if the token @a t is a statement delimiter, which is
206 * either a newline or a semicolon.
207 * @param t  The token to check.
208 * @return   True if t is a statement delimiter token; false otherwise.
209 */
210#define BC_PARSE_DELIMITER(t) \
211	((t) == BC_LEX_SCOLON || (t) == BC_LEX_NLINE || (t) == BC_LEX_EOF)
212
213/**
214 * This is poorly named, but it basically returns whether or not the current
215 * state is valid for the end of an else statement.
216 * @param f  The flag set to be checked.
217 * @return   True if the state is valid for the end of an else statement.
218 */
219#define BC_PARSE_BLOCK_STMT(f) \
220	((f) & (BC_PARSE_FLAG_ELSE | BC_PARSE_FLAG_LOOP_INNER))
221
222/**
223 * This returns the value of the data for an operator with precedence @a p and
224 * associativity @a l (true if left associative, false otherwise). This is used
225 * to construct an array of operators, bc_parse_ops, in src/data.c.
226 * @param p  The precedence.
227 * @param l  True if the operator is left associative, false otherwise.
228 * @return   The data for the operator.
229 */
230#define BC_PARSE_OP(p, l) (((p) & ~(BC_LEX_CHAR_MSB(1))) | (BC_LEX_CHAR_MSB(l)))
231
232/**
233 * Returns the operator data for the lex token @a t.
234 * @param t  The token to return operator data for.
235 * @return   The operator data for @a t.
236 */
237#define BC_PARSE_OP_DATA(t) bc_parse_ops[((t) -BC_LEX_OP_INC)]
238
239/**
240 * Returns non-zero if operator @a op is left associative, zero otherwise.
241 * @param op  The operator to test for associativity.
242 * @return    Non-zero if the operator is left associative, zero otherwise.
243 */
244#define BC_PARSE_OP_LEFT(op) (BC_PARSE_OP_DATA(op) & BC_LEX_CHAR_MSB(1))
245
246/**
247 * Returns the precedence of operator @a op. Lower number means higher
248 * precedence.
249 * @param op  The operator to return the precedence of.
250 * @return    The precedence of @a op.
251 */
252#define BC_PARSE_OP_PREC(op) (BC_PARSE_OP_DATA(op) & ~(BC_LEX_CHAR_MSB(1)))
253
254/**
255 * A macro to easily define a series of bits for whether a lex token is an
256 * expression token or not. It takes 8 expression bits, corresponding to the 8
257 * bits in a uint8_t. You can see this in use for bc_parse_exprs in src/data.c.
258 * @param e1  The first bit.
259 * @param e2  The second bit.
260 * @param e3  The third bit.
261 * @param e4  The fourth bit.
262 * @param e5  The fifth bit.
263 * @param e6  The sixth bit.
264 * @param e7  The seventh bit.
265 * @param e8  The eighth bit.
266 * @return    An expression entry for bc_parse_exprs[].
267 */
268#define BC_PARSE_EXPR_ENTRY(e1, e2, e3, e4, e5, e6, e7, e8)               \
269	((UINTMAX_C(e1) << 7) | (UINTMAX_C(e2) << 6) | (UINTMAX_C(e3) << 5) | \
270	 (UINTMAX_C(e4) << 4) | (UINTMAX_C(e5) << 3) | (UINTMAX_C(e6) << 2) | \
271	 (UINTMAX_C(e7) << 1) | (UINTMAX_C(e8) << 0))
272
273/**
274 * Returns true if token @a i is a token that belongs in an expression.
275 * @param i  The token to test.
276 * @return   True if i is an expression token, false otherwise.
277 */
278#define BC_PARSE_EXPR(i) \
279	(bc_parse_exprs[(((i) & (uchar) ~(0x07)) >> 3)] & (1 << (7 - ((i) & 0x07))))
280
281/**
282 * Returns the operator (by lex token) that is at the top of the operator
283 * stack.
284 * @param p  The parser.
285 * @return   The operator that is at the top of the operator stack, as a lex
286 *           token.
287 */
288#define BC_PARSE_TOP_OP(p) (*((BcLexType*) bc_vec_top(&(p)->ops)))
289
290/**
291 * Returns true if bc has a "leaf" token. A "leaf" token is one that can stand
292 * alone in an expression. For example, a number by itself can be an expression,
293 * but a binary operator, while valid for an expression, cannot be alone in the
294 * expression. It must have an expression to the left and right of itself. See
295 * the documentation for @a bc_parse_expr_err() in src/bc_parse.c.
296 * @param prev      The previous token as an instruction.
297 * @param bin_last  True if that last operator was a binary operator, false
298 *                  otherwise.
299 * @param rparen    True if the last operator was a right paren.
300 * return           True if the last token was a leaf token, false otherwise.
301 */
302#define BC_PARSE_LEAF(prev, bin_last, rparen) \
303	(!(bin_last) && ((rparen) || bc_parse_inst_isLeaf(prev)))
304
305/**
306 * This returns true if the token @a t should be treated as though it's a
307 * variable. This goes for actual variables, array elements, and globals.
308 * @param t  The token to test.
309 * @return   True if @a t should be treated as though it's a variable, false
310 *           otherwise.
311 */
312#if BC_ENABLE_EXTRA_MATH
313#define BC_PARSE_INST_VAR(t) \
314	((t) >= BC_INST_VAR && (t) <= BC_INST_SEED && (t) != BC_INST_ARRAY)
315#else // BC_ENABLE_EXTRA_MATH
316#define BC_PARSE_INST_VAR(t) \
317	((t) >= BC_INST_VAR && (t) <= BC_INST_SCALE && (t) != BC_INST_ARRAY)
318#endif // BC_ENABLE_EXTRA_MATH
319
320/**
321 * Returns true if the previous token @a p (in the form of a bytecode
322 * instruction) is a prefix operator. The fact that it is for bytecode
323 * instructions is what makes it different from @a BC_PARSE_OP_PREFIX below.
324 * @param p  The previous token.
325 * @return   True if @a p is a prefix operator.
326 */
327#define BC_PARSE_PREV_PREFIX(p) ((p) >= BC_INST_NEG && (p) <= BC_INST_BOOL_NOT)
328
329/**
330 * Returns true if token @a t is a prefix operator.
331 * @param t  The token to test.
332 * @return   True if @a t is a prefix operator, false otherwise.
333 */
334#define BC_PARSE_OP_PREFIX(t) ((t) == BC_LEX_OP_BOOL_NOT || (t) == BC_LEX_NEG)
335
336/**
337 * We can calculate the conversion between tokens and bytecode instructions by
338 * subtracting the position of the first operator in the lex enum and adding the
339 * position of the first in the instruction enum. Note: This only works for
340 * binary operators.
341 * @param t  The token to turn into an instruction.
342 * @return   The token as an instruction.
343 */
344#define BC_PARSE_TOKEN_INST(t) ((uchar) ((t) -BC_LEX_NEG + BC_INST_NEG))
345
346/**
347 * Returns true if the token is a bc keyword.
348 * @param t  The token to check.
349 * @return   True if @a t is a bc keyword, false otherwise.
350 */
351#define BC_PARSE_IS_KEYWORD(t) ((t) >= BC_LEX_KW_AUTO && (t) <= BC_LEX_KW_ELSE)
352
353/// A struct that holds data about what tokens should be expected next. There
354/// are a few instances of these, all named because they are used in specific
355/// cases. Basically, in certain situations, it's useful to use the same code,
356/// but have a list of valid tokens.
357///
358/// Obviously, @a len is the number of tokens in the @a tokens array. If more
359/// than 4 is needed in the future, @a tokens will have to be changed.
360typedef struct BcParseNext
361{
362	/// The number of tokens in the tokens array.
363	uchar len;
364
365	/// The tokens that can be expected next.
366	uchar tokens[4];
367
368} BcParseNext;
369
370/// A macro to construct an array literal of tokens from a parameter list.
371#define BC_PARSE_NEXT_TOKENS(...) .tokens = { __VA_ARGS__ }
372
373/// A macro to generate a BcParseNext literal from BcParseNext data. See
374/// src/data.c for examples.
375#define BC_PARSE_NEXT(a, ...)                                 \
376	{                                                         \
377		.len = (uchar) (a), BC_PARSE_NEXT_TOKENS(__VA_ARGS__) \
378	}
379
380/// A status returned by @a bc_parse_expr_err(). It can either return success or
381/// an error indicating an empty expression.
382typedef enum BcParseStatus
383{
384	BC_PARSE_STATUS_SUCCESS,
385	BC_PARSE_STATUS_EMPTY_EXPR,
386
387} BcParseStatus;
388
389/**
390 * The @a BcParseExpr function for bc. (See include/parse.h for a definition of
391 * @a BcParseExpr.)
392 * @param p      The parser.
393 * @param flags  Flags that define the requirements that the parsed code must
394 *               meet or an error will result. See @a BcParseExpr for more info.
395 */
396void
397bc_parse_expr(BcParse* p, uint8_t flags);
398
399/**
400 * The @a BcParseParse function for bc. (See include/parse.h for a definition of
401 * @a BcParseParse.)
402 * @param p  The parser.
403 */
404void
405bc_parse_parse(BcParse* p);
406
407/**
408 * Ends a series of if statements. This is to ensure that full parses happen
409 * when a file finishes or before defining a function. Without this, bc thinks
410 * that it cannot parse any further. But if we reach the end of a file or a
411 * function definition, we know we can add an empty else clause.
412 * @param p  The parser.
413 */
414void
415bc_parse_endif(BcParse* p);
416
417/// References to the signal message and its length.
418extern const char bc_sig_msg[];
419extern const uchar bc_sig_msg_len;
420
421/// A reference to an array of bits that are set if the corresponding lex token
422/// is valid in an expression.
423extern const uint8_t bc_parse_exprs[];
424
425/// A reference to an array of bc operators.
426extern const uchar bc_parse_ops[];
427
428// References to the various instances of BcParseNext's.
429
430/// A reference to what tokens are valid as next tokens when parsing normal
431/// expressions. More accurately. these are the tokens that are valid for
432/// *ending* the expression.
433extern const BcParseNext bc_parse_next_expr;
434
435/// A reference to what tokens are valid as next tokens when parsing function
436/// parameters (well, actually arguments).
437extern const BcParseNext bc_parse_next_arg;
438
439/// A reference to what tokens are valid as next tokens when parsing a print
440/// statement.
441extern const BcParseNext bc_parse_next_print;
442
443/// A reference to what tokens are valid as next tokens when parsing things like
444/// loop headers and builtin functions where the only thing expected is a right
445/// paren.
446///
447/// The name is an artifact of history, and is related to @a BC_PARSE_REL (see
448/// include/parse.h). It refers to how POSIX only allows some operators as part
449/// of the conditional of for loops, while loops, and if statements.
450extern const BcParseNext bc_parse_next_rel;
451
452// What tokens are valid as next tokens when parsing an array element
453// expression.
454extern const BcParseNext bc_parse_next_elem;
455
456/// A reference to what tokens are valid as next tokens when parsing the first
457/// two parts of a for loop header.
458extern const BcParseNext bc_parse_next_for;
459
460/// A reference to what tokens are valid as next tokens when parsing a read
461/// expression.
462extern const BcParseNext bc_parse_next_read;
463
464/// A reference to what tokens are valid as next tokens when parsing a builtin
465/// function with multiple arguments.
466extern const BcParseNext bc_parse_next_builtin;
467
468#else // BC_ENABLED
469
470// If bc is not enabled, execution is always possible because dc has strict
471// rules that ensure execution can always proceed safely.
472#define BC_PARSE_NO_EXEC(p) (0)
473
474#endif // BC_ENABLED
475
476#endif // BC_BC_H
477