1/*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 *   list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 *   this list of conditions and the following disclaimer in the documentation
16 *   and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * Definitions for program data.
33 *
34 */
35
36#ifndef BC_LANG_H
37#define BC_LANG_H
38
39#include <stdbool.h>
40
41// These have to come first to silence a warning on BC_C11 below.
42#include <status.h>
43#include <vector.h>
44#include <num.h>
45
46#if BC_C11
47#include <assert.h>
48#endif // BC_C11
49
50/// The instructions for bytecode.
51typedef enum BcInst
52{
53#if BC_ENABLED
54	/// Postfix increment and decrement. Prefix are translated into
55	/// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS.
56	BC_INST_INC = 0,
57	BC_INST_DEC,
58#endif // BC_ENABLED
59
60	/// Unary negation.
61	BC_INST_NEG,
62
63	/// Boolean not.
64	BC_INST_BOOL_NOT,
65
66#if BC_ENABLE_EXTRA_MATH
67	/// Truncation operator.
68	BC_INST_TRUNC,
69#endif // BC_ENABLE_EXTRA_MATH
70
71	/// These should be self-explanatory.
72	BC_INST_POWER,
73	BC_INST_MULTIPLY,
74	BC_INST_DIVIDE,
75	BC_INST_MODULUS,
76	BC_INST_PLUS,
77	BC_INST_MINUS,
78
79#if BC_ENABLE_EXTRA_MATH
80	/// Places operator.
81	BC_INST_PLACES,
82
83	/// Shift operators.
84	BC_INST_LSHIFT,
85	BC_INST_RSHIFT,
86#endif // BC_ENABLE_EXTRA_MATH
87
88	/// Comparison operators.
89	BC_INST_REL_EQ,
90	BC_INST_REL_LE,
91	BC_INST_REL_GE,
92	BC_INST_REL_NE,
93	BC_INST_REL_LT,
94	BC_INST_REL_GT,
95
96	/// Boolean or and and.
97	BC_INST_BOOL_OR,
98	BC_INST_BOOL_AND,
99
100#if BC_ENABLED
101	/// Same as the normal operators, but assigment. So ^=, *=, /=, etc.
102	BC_INST_ASSIGN_POWER,
103	BC_INST_ASSIGN_MULTIPLY,
104	BC_INST_ASSIGN_DIVIDE,
105	BC_INST_ASSIGN_MODULUS,
106	BC_INST_ASSIGN_PLUS,
107	BC_INST_ASSIGN_MINUS,
108#if BC_ENABLE_EXTRA_MATH
109	/// Places and shift assignment operators.
110	BC_INST_ASSIGN_PLACES,
111	BC_INST_ASSIGN_LSHIFT,
112	BC_INST_ASSIGN_RSHIFT,
113#endif // BC_ENABLE_EXTRA_MATH
114
115	/// Normal assignment.
116	BC_INST_ASSIGN,
117
118	/// bc and dc detect when the value from an assignment is not necessary.
119	/// For example, a plain assignment statement means the value is never used.
120	/// In those cases, we can get lots of performance back by not even creating
121	/// a copy at all. In fact, it saves a copy, a push onto the results stack,
122	/// a pop from the results stack, and a free. Definitely worth it to detect.
123	BC_INST_ASSIGN_POWER_NO_VAL,
124	BC_INST_ASSIGN_MULTIPLY_NO_VAL,
125	BC_INST_ASSIGN_DIVIDE_NO_VAL,
126	BC_INST_ASSIGN_MODULUS_NO_VAL,
127	BC_INST_ASSIGN_PLUS_NO_VAL,
128	BC_INST_ASSIGN_MINUS_NO_VAL,
129#if BC_ENABLE_EXTRA_MATH
130	/// Same as above.
131	BC_INST_ASSIGN_PLACES_NO_VAL,
132	BC_INST_ASSIGN_LSHIFT_NO_VAL,
133	BC_INST_ASSIGN_RSHIFT_NO_VAL,
134#endif // BC_ENABLE_EXTRA_MATH
135#endif // BC_ENABLED
136
137	/// Normal assignment that pushes no value on the stack.
138	BC_INST_ASSIGN_NO_VAL,
139
140	/// Push a constant onto the results stack.
141	BC_INST_NUM,
142
143	/// Push a variable onto the results stack.
144	BC_INST_VAR,
145
146	/// Push an array element onto the results stack.
147	BC_INST_ARRAY_ELEM,
148
149	/// Push an array onto the results stack. This is different from pushing an
150	/// array *element* onto the results stack; it pushes a reference to the
151	/// whole array. This is needed in bc for function arguments that are
152	/// arrays. It is also needed for returning the length of an array.
153	BC_INST_ARRAY,
154
155	/// Push a zero or a one onto the stack. These are special cased because it
156	/// does help performance, particularly for one since inc/dec operators
157	/// use it.
158	BC_INST_ZERO,
159	BC_INST_ONE,
160
161#if BC_ENABLED
162	/// Push the last printed value onto the stack.
163	BC_INST_LAST,
164#endif // BC_ENABLED
165
166	/// Push the value of any of the globals onto the stack.
167	BC_INST_IBASE,
168	BC_INST_OBASE,
169	BC_INST_SCALE,
170
171#if BC_ENABLE_EXTRA_MATH
172	/// Push the value of the seed global onto the stack.
173	BC_INST_SEED,
174#endif // BC_ENABLE_EXTRA_MATH
175
176	/// These are builtin functions.
177	BC_INST_LENGTH,
178	BC_INST_SCALE_FUNC,
179	BC_INST_SQRT,
180	BC_INST_ABS,
181	BC_INST_IS_NUMBER,
182	BC_INST_IS_STRING,
183
184#if BC_ENABLE_EXTRA_MATH
185	/// Another builtin function.
186	BC_INST_IRAND,
187#endif // BC_ENABLE_EXTRA_MATH
188
189	/// Asciify.
190	BC_INST_ASCIIFY,
191
192	/// Another builtin function.
193	BC_INST_READ,
194
195#if BC_ENABLE_EXTRA_MATH
196	/// Another builtin function.
197	BC_INST_RAND,
198#endif // BC_ENABLE_EXTRA_MATH
199
200	/// Return the max for the various globals.
201	BC_INST_MAXIBASE,
202	BC_INST_MAXOBASE,
203	BC_INST_MAXSCALE,
204#if BC_ENABLE_EXTRA_MATH
205	/// Return the max value returned by rand().
206	BC_INST_MAXRAND,
207#endif // BC_ENABLE_EXTRA_MATH
208
209	/// bc line_length() builtin function.
210	BC_INST_LINE_LENGTH,
211
212#if BC_ENABLED
213
214	/// bc global_stacks() builtin function.
215	BC_INST_GLOBAL_STACKS,
216
217#endif // BC_ENABLED
218
219	/// bc leading_zero() builtin function.
220	BC_INST_LEADING_ZERO,
221
222	/// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc.
223	/// dc uses this instruction to print, but not pop. That's valid in dc.
224	/// However, in bc, it is *never* valid to print without popping. In bc,
225	/// BC_INST_PRINT_POP is used to indicate when a string should be printed
226	/// because of a print statement or whether it should be printed raw. The
227	/// reason for this is because a print statement handles escaped characters.
228	/// So BC_INST_PRINT_POP is for printing a string from a print statement,
229	/// BC_INST_PRINT_STR is for printing a string by itself.
230	///
231	/// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints.
232	///
233	/// Oh, and BC_INST_STR pushes a string onto the results stack.
234	BC_INST_PRINT,
235	BC_INST_PRINT_POP,
236	BC_INST_STR,
237#if BC_ENABLED
238	BC_INST_PRINT_STR,
239
240	/// Jumps unconditionally.
241	BC_INST_JUMP,
242
243	/// Jumps if the top of the results stack is zero (condition failed). It
244	/// turns out that we only want to jump when conditions fail to "skip" code.
245	BC_INST_JUMP_ZERO,
246
247	/// Call a function.
248	BC_INST_CALL,
249
250	/// Return the top of the stack to the caller.
251	BC_INST_RET,
252
253	/// Return 0 to the caller.
254	BC_INST_RET0,
255
256	/// Special return instruction for void functions.
257	BC_INST_RET_VOID,
258
259	/// Special halt instruction.
260	BC_INST_HALT,
261#endif // BC_ENABLED
262
263	/// Pop an item off of the results stack.
264	BC_INST_POP,
265
266	/// Swaps the top two items on the results stack.
267	BC_INST_SWAP,
268
269	/// Modular exponentiation.
270	BC_INST_MODEXP,
271
272	/// Do divide and modulus at the same time.
273	BC_INST_DIVMOD,
274
275	/// Turns a number into a string and prints it.
276	BC_INST_PRINT_STREAM,
277
278#if DC_ENABLED
279
280	/// dc extended registers command.
281	BC_INST_EXTENDED_REGISTERS,
282
283	/// dc's return; it pops an executing string off of the stack.
284	BC_INST_POP_EXEC,
285
286	/// Unconditionally execute a string.
287	BC_INST_EXECUTE,
288
289	/// Conditionally execute a string.
290	BC_INST_EXEC_COND,
291
292	/// Prints each item on the results stack, separated by newlines.
293	BC_INST_PRINT_STACK,
294
295	/// Pops everything off of the results stack.
296	BC_INST_CLEAR_STACK,
297
298	/// Pushes the current length of a register stack onto the results stack.
299	BC_INST_REG_STACK_LEN,
300
301	/// Pushes the current length of the results stack onto the results stack.
302	BC_INST_STACK_LEN,
303
304	/// Pushes a copy of the item on the top of the results stack onto the
305	/// results stack.
306	BC_INST_DUPLICATE,
307
308	/// Copies the value in a register and pushes the copy onto the results
309	/// stack.
310	BC_INST_LOAD,
311
312	/// Pops an item off of a register stack and pushes it onto the results
313	/// stack.
314	BC_INST_PUSH_VAR,
315
316	/// Pops an item off of the results stack and pushes it onto a register's
317	/// stack.
318	BC_INST_PUSH_TO_VAR,
319
320	/// Quit.
321	BC_INST_QUIT,
322
323	/// Quit executing some number of strings.
324	BC_INST_NQUIT,
325
326	/// Push the depth of the execution stack onto the stack.
327	BC_INST_EXEC_STACK_LEN,
328
329#endif // DC_ENABLED
330
331	/// Invalid instruction.
332	BC_INST_INVALID,
333
334} BcInst;
335
336#if BC_C11
337_Static_assert(BC_INST_INVALID <= UCHAR_MAX,
338               "Too many instructions to fit into an unsigned char");
339#endif // BC_C11
340
341/// Used by maps to identify where items are in the array.
342typedef struct BcId
343{
344	/// The name of the item.
345	char* name;
346
347	/// The index into the array where the item is.
348	size_t idx;
349
350} BcId;
351
352/// The location of a var, array, or array element.
353typedef struct BcLoc
354{
355	/// The index of the var or array.
356	size_t loc;
357
358	/// The index of the array or variable in the array stack. This is to
359	/// prevent a bug with getting the wrong array element or variable after a
360	/// function call. See the tests/bc/scripts/array.bc test for the array
361	/// case; the variable case is in various variable tests.
362	size_t stack_idx;
363
364	/// The index of the array element. Only used for array elements.
365	size_t idx;
366
367} BcLoc;
368
369/// An entry for a constant.
370typedef struct BcConst
371{
372	/// The original string as parsed from the source code.
373	char* val;
374
375	/// The last base that the constant was parsed in.
376	BcBigDig base;
377
378	/// The parsed constant.
379	BcNum num;
380
381} BcConst;
382
383/// A function. This is also used in dc, not just bc. The reason is that strings
384/// are executed in dc, and they are converted to functions in order to be
385/// executed.
386typedef struct BcFunc
387{
388	/// The bytecode instructions.
389	BcVec code;
390
391#if BC_ENABLED
392
393	/// The labels. This is a vector of indices. The index is the index into
394	/// the bytecode vector where the label is.
395	BcVec labels;
396
397	/// The autos for the function. The first items are the parameters, and the
398	/// arguments to the parameters must match the types in this vector.
399	BcVec autos;
400
401	/// The number of parameters the function takes.
402	size_t nparams;
403
404#endif // BC_ENABLED
405
406	/// The function's name.
407	const char* name;
408
409#if BC_ENABLED
410	/// True if the function is a void function.
411	bool voidfn;
412#endif // BC_ENABLED
413
414} BcFunc;
415
416/// Types of results that can be pushed onto the results stack.
417typedef enum BcResultType
418{
419	/// Result is a variable.
420	BC_RESULT_VAR,
421
422	/// Result is an array element.
423	BC_RESULT_ARRAY_ELEM,
424
425	/// Result is an array. This is only allowed for function arguments or
426	/// returning the length of the array.
427	BC_RESULT_ARRAY,
428
429	/// Result is a string.
430	BC_RESULT_STR,
431
432	/// Result is a temporary. This is used for the result of almost all
433	/// expressions.
434	BC_RESULT_TEMP,
435
436	/// Special casing the two below gave performance improvements.
437
438	/// Result is a 0.
439	BC_RESULT_ZERO,
440
441	/// Result is a 1. Useful for inc/dec operators.
442	BC_RESULT_ONE,
443
444#if BC_ENABLED
445
446	/// Result is the special "last" variable.
447	BC_RESULT_LAST,
448
449	/// Result is the return value of a void function.
450	BC_RESULT_VOID,
451#endif // BC_ENABLED
452
453	/// Result is the value of ibase.
454	BC_RESULT_IBASE,
455
456	/// Result is the value of obase.
457	BC_RESULT_OBASE,
458
459	/// Result is the value of scale.
460	BC_RESULT_SCALE,
461
462#if BC_ENABLE_EXTRA_MATH
463
464	/// Result is the value of seed.
465	BC_RESULT_SEED,
466
467#endif // BC_ENABLE_EXTRA_MATH
468
469} BcResultType;
470
471/// A union to store data for various result types.
472typedef union BcResultData
473{
474	/// A number. Strings are stored here too; they are numbers with
475	/// cap == 0 && num == NULL. The string's index into the strings vector is
476	/// stored in the scale field. But this is only used for strings stored in
477	/// variables.
478	BcNum n;
479
480	/// A vector.
481	BcVec v;
482
483	/// A variable, array, or array element reference. This could also be a
484	/// string if a string is not stored in a variable (dc only).
485	BcLoc loc;
486
487} BcResultData;
488
489/// A tagged union for results.
490typedef struct BcResult
491{
492	/// The tag. The type of the result.
493	BcResultType t;
494
495	/// The data. The data for the result.
496	BcResultData d;
497
498} BcResult;
499
500/// An instruction pointer. This is how bc knows where in the bytecode vector,
501/// and which function, the current execution is.
502typedef struct BcInstPtr
503{
504	/// The index of the currently executing function in the fns vector.
505	size_t func;
506
507	/// The index into the bytecode vector of the *next* instruction.
508	size_t idx;
509
510	/// The length of the results vector when this function started executing.
511	/// This is mostly used for bc where functions should not affect the results
512	/// of their callers.
513	size_t len;
514
515} BcInstPtr;
516
517/// Types of identifiers.
518typedef enum BcType
519{
520	/// Variable.
521	BC_TYPE_VAR,
522
523	/// Array.
524	BC_TYPE_ARRAY,
525
526#if BC_ENABLED
527
528	/// Array reference.
529	BC_TYPE_REF,
530
531#endif // BC_ENABLED
532
533} BcType;
534
535#if BC_ENABLED
536/// An auto variable in bc.
537typedef struct BcAuto
538{
539	/// The index of the variable in the vars or arrs vectors.
540	size_t idx;
541
542	/// The type of the variable.
543	BcType type;
544
545} BcAuto;
546#endif // BC_ENABLED
547
548/// Forward declaration.
549struct BcProgram;
550
551/**
552 * Initializes a function.
553 * @param f     The function to initialize.
554 * @param name  The name of the function. The string is assumed to be owned by
555 *              some other entity.
556 */
557void
558bc_func_init(BcFunc* f, const char* name);
559
560/**
561 * Inserts an auto into the function.
562 * @param f     The function to insert into.
563 * @param p     The program. This is to search for the variable or array name.
564 * @param name  The name of the auto to insert.
565 * @param type  The type of the auto.
566 * @param line  The line in the source code where the insert happened. This is
567 *              solely for error reporting.
568 */
569void
570bc_func_insert(BcFunc* f, struct BcProgram* p, char* name, BcType type,
571               size_t line);
572
573/**
574 * Resets a function in preparation for it to be reused. This can happen in bc
575 * because it is a dynamic language and functions can be redefined.
576 * @param f  The functio to reset.
577 */
578void
579bc_func_reset(BcFunc* f);
580
581#if BC_DEBUG
582/**
583 * Frees a function. This is a destructor. This is only used in debug builds
584 * because all functions are freed at exit. We free them in debug builds to
585 * check for memory leaks.
586 * @param func  The function to free as a void pointer.
587 */
588void
589bc_func_free(void* func);
590#endif // BC_DEBUG
591
592/**
593 * Initializes an array, which is the array type in bc and dc source code. Since
594 * variables and arrays are both arrays (see the development manual,
595 * manuals/development.md#execution, for more information), the @a nums
596 * parameter tells bc whether to initialize an array of numbers or an array of
597 * arrays of numbers. If the latter, it does a recursive call with nums set to
598 * true.
599 * @param a     The array to initialize.
600 * @param nums  True if the array should be for numbers, false if it should be
601 *              for vectors.
602 */
603void
604bc_array_init(BcVec* a, bool nums);
605
606/**
607 * Copies an array to another array. This is used to do pass arrays to functions
608 * that do not take references to arrays. The arrays are passed entirely by
609 * value, which means that they need to be copied.
610 * @param d  The destination array.
611 * @param s  The source array.
612 */
613void
614bc_array_copy(BcVec* d, const BcVec* s);
615
616/**
617 * Frees a string stored in a function. This is a destructor.
618 * @param string  The string to free as a void pointer.
619 */
620void
621bc_string_free(void* string);
622
623/**
624 * Frees a constant stored in a function. This is a destructor.
625 * @param constant  The constant to free as a void pointer.
626 */
627void
628bc_const_free(void* constant);
629
630/**
631 * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by
632 * clearing the BcNum in the union. This is to ensure that bc does not use
633 * uninitialized data.
634 * @param r  The result to clear.
635 */
636void
637bc_result_clear(BcResult* r);
638
639/**
640 * Copies a result into another. This is done for things like duplicating the
641 * top of the results stack or copying the result of an assignment to put back
642 * on the results stack.
643 * @param d    The destination result.
644 * @param src  The source result.
645 */
646void
647bc_result_copy(BcResult* d, BcResult* src);
648
649/**
650 * Frees a result. This is a destructor.
651 * @param result  The result to free as a void pointer.
652 */
653void
654bc_result_free(void* result);
655
656/**
657 * Expands an array to @a len. This can happen because in bc, you do not have to
658 * explicitly initialize elements of an array. If you access an element that is
659 * not initialized, the array is expanded to fit it, and all missing elements
660 * are initialized to 0 if they are numbers, or arrays with one element of 0.
661 * This function does that expansion.
662 * @param a    The array to expand.
663 * @param len  The length to expand to.
664 */
665void
666bc_array_expand(BcVec* a, size_t len);
667
668#if BC_ENABLED
669
670/**
671 * Returns non-zero if the bytecode instruction i is an assignment instruction.
672 * @param i  The instruction to test.
673 * @return   Non-zero if i is an assignment instruction, zero otherwise.
674 */
675#define BC_INST_IS_ASSIGN(i) \
676	((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL)
677
678/**
679 * Returns true if the bytecode instruction @a i requires the value to be
680 * returned for use.
681 * @param i  The instruction to test.
682 * @return   True if @a i requires the value to be returned for use, false
683 *           otherwise.
684 */
685#define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN)
686
687#else // BC_ENABLED
688
689/**
690 * Returns non-zero if the bytecode instruction i is an assignment instruction.
691 * @param i  The instruction to test.
692 * @return   Non-zero if i is an assignment instruction, zero otherwise.
693 */
694#define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL)
695
696/**
697 * Returns true if the bytecode instruction @a i requires the value to be
698 * returned for use.
699 * @param i  The instruction to test.
700 * @return   True if @a i requires the value to be returned for use, false
701 *           otherwise.
702 */
703#define BC_INST_USE_VAL(i) (false)
704
705#endif // BC_ENABLED
706
707#if BC_DEBUG_CODE
708/// Reference to string names for all of the instructions. For debugging.
709extern const char* bc_inst_names[];
710#endif // BC_DEBUG_CODE
711
712/// References to the names of the main and read functions.
713extern const char bc_func_main[];
714extern const char bc_func_read[];
715
716#endif // BC_LANG_H
717