1/* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for bc's lexer. 33 * 34 */ 35 36#ifndef BC_LEX_H 37#define BC_LEX_H 38 39#include <stdbool.h> 40#include <stddef.h> 41 42#include <status.h> 43#include <vector.h> 44#include <lang.h> 45 46/** 47 * A convenience macro for throwing errors in lex code. This takes care of 48 * plumbing like passing in the current line the lexer is on. 49 * @param l The lexer. 50 * @param e The error. 51 */ 52#if BC_DEBUG 53#define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line)) 54#else // BC_DEBUG 55#define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) 56#endif // BC_DEBUG 57 58/** 59 * A convenience macro for throwing errors in lex code. This takes care of 60 * plumbing like passing in the current line the lexer is on. 61 * @param l The lexer. 62 * @param e The error. 63 */ 64#if BC_DEBUG 65#define bc_lex_verr(l, e, ...) \ 66 (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__)) 67#else // BC_DEBUG 68#define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) 69#endif // BC_DEBUG 70 71// BC_LEX_NEG_CHAR returns the char that corresponds to negative for the 72// current calculator. 73// 74// BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid 75// char for numbers. In bc and dc, capital letters are part of numbers, to a 76// point. (dc only goes up to hex, so its last valid char is 'F'.) 77#if BC_ENABLED 78 79#if DC_ENABLED 80#define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') 81#define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') 82#else // DC_ENABLED 83#define BC_LEX_NEG_CHAR ('-') 84#define BC_LEX_LAST_NUM_CHAR ('Z') 85#endif // DC_ENABLED 86 87#else // BC_ENABLED 88 89#define BC_LEX_NEG_CHAR ('_') 90#define BC_LEX_LAST_NUM_CHAR ('F') 91 92#endif // BC_ENABLED 93 94/** 95 * Returns true if c is a valid number character. 96 * @param c The char to check. 97 * @param pt If a decimal point has already been seen. 98 * @param int_only True if the number is expected to be an int only, false if 99 * non-integers are allowed. 100 * @return True if @a c is a valid number character. 101 */ 102#define BC_LEX_NUM_CHAR(c, pt, int_only) \ 103 (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ 104 ((c) == '.' && !(pt) && !(int_only))) 105 106/// An enum of lex token types. 107typedef enum BcLexType 108{ 109 /// End of file. 110 BC_LEX_EOF, 111 112 /// Marker for invalid tokens, used by bc and dc for const data. 113 BC_LEX_INVALID, 114 115#if BC_ENABLED 116 117 /// Increment operator. 118 BC_LEX_OP_INC, 119 120 /// Decrement operator. 121 BC_LEX_OP_DEC, 122 123#endif // BC_ENABLED 124 125 /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer 126 /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be 127 /// able to distinguish them. 128 BC_LEX_NEG, 129 130 /// Boolean not. 131 BC_LEX_OP_BOOL_NOT, 132 133#if BC_ENABLE_EXTRA_MATH 134 135 /// Truncation operator. 136 BC_LEX_OP_TRUNC, 137 138#endif // BC_ENABLE_EXTRA_MATH 139 140 /// Power operator. 141 BC_LEX_OP_POWER, 142 143 /// Multiplication operator. 144 BC_LEX_OP_MULTIPLY, 145 146 /// Division operator. 147 BC_LEX_OP_DIVIDE, 148 149 /// Modulus operator. 150 BC_LEX_OP_MODULUS, 151 152 /// Addition operator. 153 BC_LEX_OP_PLUS, 154 155 /// Subtraction operator. 156 BC_LEX_OP_MINUS, 157 158#if BC_ENABLE_EXTRA_MATH 159 160 /// Places (truncate or extend) operator. 161 BC_LEX_OP_PLACES, 162 163 /// Left (decimal) shift operator. 164 BC_LEX_OP_LSHIFT, 165 166 /// Right (decimal) shift operator. 167 BC_LEX_OP_RSHIFT, 168 169#endif // BC_ENABLE_EXTRA_MATH 170 171 /// Equal operator. 172 BC_LEX_OP_REL_EQ, 173 174 /// Less than or equal operator. 175 BC_LEX_OP_REL_LE, 176 177 /// Greater than or equal operator. 178 BC_LEX_OP_REL_GE, 179 180 /// Not equal operator. 181 BC_LEX_OP_REL_NE, 182 183 /// Less than operator. 184 BC_LEX_OP_REL_LT, 185 186 /// Greater than operator. 187 BC_LEX_OP_REL_GT, 188 189 /// Boolean or operator. 190 BC_LEX_OP_BOOL_OR, 191 192 /// Boolean and operator. 193 BC_LEX_OP_BOOL_AND, 194 195#if BC_ENABLED 196 197 /// Power assignment operator. 198 BC_LEX_OP_ASSIGN_POWER, 199 200 /// Multiplication assignment operator. 201 BC_LEX_OP_ASSIGN_MULTIPLY, 202 203 /// Division assignment operator. 204 BC_LEX_OP_ASSIGN_DIVIDE, 205 206 /// Modulus assignment operator. 207 BC_LEX_OP_ASSIGN_MODULUS, 208 209 /// Addition assignment operator. 210 BC_LEX_OP_ASSIGN_PLUS, 211 212 /// Subtraction assignment operator. 213 BC_LEX_OP_ASSIGN_MINUS, 214 215#if BC_ENABLE_EXTRA_MATH 216 217 /// Places (truncate or extend) assignment operator. 218 BC_LEX_OP_ASSIGN_PLACES, 219 220 /// Left (decimal) shift assignment operator. 221 BC_LEX_OP_ASSIGN_LSHIFT, 222 223 /// Right (decimal) shift assignment operator. 224 BC_LEX_OP_ASSIGN_RSHIFT, 225 226#endif // BC_ENABLE_EXTRA_MATH 227#endif // BC_ENABLED 228 229 /// Assignment operator. 230 BC_LEX_OP_ASSIGN, 231 232 /// Newline. 233 BC_LEX_NLINE, 234 235 /// Whitespace. 236 BC_LEX_WHITESPACE, 237 238 /// Left parenthesis. 239 BC_LEX_LPAREN, 240 241 /// Right parenthesis. 242 BC_LEX_RPAREN, 243 244 /// Left bracket. 245 BC_LEX_LBRACKET, 246 247 /// Comma. 248 BC_LEX_COMMA, 249 250 /// Right bracket. 251 BC_LEX_RBRACKET, 252 253 /// Left brace. 254 BC_LEX_LBRACE, 255 256 /// Semicolon. 257 BC_LEX_SCOLON, 258 259 /// Right brace. 260 BC_LEX_RBRACE, 261 262 /// String. 263 BC_LEX_STR, 264 265 /// Identifier/name. 266 BC_LEX_NAME, 267 268 /// Constant number. 269 BC_LEX_NUMBER, 270 271 // These keywords are in the order they are in for a reason. Don't change 272 // the order unless you want a bunch of weird failures in the test suite. 273 // In fact, almost all of these tokens are in a specific order for a reason. 274 275#if BC_ENABLED 276 277 /// bc auto keyword. 278 BC_LEX_KW_AUTO, 279 280 /// bc break keyword. 281 BC_LEX_KW_BREAK, 282 283 /// bc continue keyword. 284 BC_LEX_KW_CONTINUE, 285 286 /// bc define keyword. 287 BC_LEX_KW_DEFINE, 288 289 /// bc for keyword. 290 BC_LEX_KW_FOR, 291 292 /// bc if keyword. 293 BC_LEX_KW_IF, 294 295 /// bc limits keyword. 296 BC_LEX_KW_LIMITS, 297 298 /// bc return keyword. 299 BC_LEX_KW_RETURN, 300 301 /// bc while keyword. 302 BC_LEX_KW_WHILE, 303 304 /// bc halt keyword. 305 BC_LEX_KW_HALT, 306 307 /// bc last keyword. 308 BC_LEX_KW_LAST, 309 310#endif // BC_ENABLED 311 312 /// bc ibase keyword. 313 BC_LEX_KW_IBASE, 314 315 /// bc obase keyword. 316 BC_LEX_KW_OBASE, 317 318 /// bc scale keyword. 319 BC_LEX_KW_SCALE, 320 321#if BC_ENABLE_EXTRA_MATH 322 323 /// bc seed keyword. 324 BC_LEX_KW_SEED, 325 326#endif // BC_ENABLE_EXTRA_MATH 327 328 /// bc length keyword. 329 BC_LEX_KW_LENGTH, 330 331 /// bc print keyword. 332 BC_LEX_KW_PRINT, 333 334 /// bc sqrt keyword. 335 BC_LEX_KW_SQRT, 336 337 /// bc abs keyword. 338 BC_LEX_KW_ABS, 339 340 /// bc is_number keyword. 341 BC_LEX_KW_IS_NUMBER, 342 343 /// bc is_string keyword. 344 BC_LEX_KW_IS_STRING, 345 346#if BC_ENABLE_EXTRA_MATH 347 348 /// bc irand keyword. 349 BC_LEX_KW_IRAND, 350 351#endif // BC_ENABLE_EXTRA_MATH 352 353 /// bc asciffy keyword. 354 BC_LEX_KW_ASCIIFY, 355 356 /// bc modexp keyword. 357 BC_LEX_KW_MODEXP, 358 359 /// bc divmod keyword. 360 BC_LEX_KW_DIVMOD, 361 362 /// bc quit keyword. 363 BC_LEX_KW_QUIT, 364 365 /// bc read keyword. 366 BC_LEX_KW_READ, 367 368#if BC_ENABLE_EXTRA_MATH 369 370 /// bc rand keyword. 371 BC_LEX_KW_RAND, 372 373#endif // BC_ENABLE_EXTRA_MATH 374 375 /// bc maxibase keyword. 376 BC_LEX_KW_MAXIBASE, 377 378 /// bc maxobase keyword. 379 BC_LEX_KW_MAXOBASE, 380 381 /// bc maxscale keyword. 382 BC_LEX_KW_MAXSCALE, 383 384#if BC_ENABLE_EXTRA_MATH 385 386 /// bc maxrand keyword. 387 BC_LEX_KW_MAXRAND, 388 389#endif // BC_ENABLE_EXTRA_MATH 390 391 /// bc line_length keyword. 392 BC_LEX_KW_LINE_LENGTH, 393 394#if BC_ENABLED 395 396 /// bc global_stacks keyword. 397 BC_LEX_KW_GLOBAL_STACKS, 398 399#endif // BC_ENABLED 400 401 /// bc leading_zero keyword. 402 BC_LEX_KW_LEADING_ZERO, 403 404 /// bc stream keyword. 405 BC_LEX_KW_STREAM, 406 407 /// bc else keyword. 408 BC_LEX_KW_ELSE, 409 410#if DC_ENABLED 411 412 /// dc extended registers keyword. 413 BC_LEX_EXTENDED_REGISTERS, 414 415 /// A special token for dc to calculate equal without a register. 416 BC_LEX_EQ_NO_REG, 417 418 /// Colon (array) operator. 419 BC_LEX_COLON, 420 421 /// Execute command. 422 BC_LEX_EXECUTE, 423 424 /// Print stack command. 425 BC_LEX_PRINT_STACK, 426 427 /// Clear stack command. 428 BC_LEX_CLEAR_STACK, 429 430 /// Register stack level command. 431 BC_LEX_REG_STACK_LEVEL, 432 433 /// Main stack level command. 434 BC_LEX_STACK_LEVEL, 435 436 /// Duplicate command. 437 BC_LEX_DUPLICATE, 438 439 /// Swap (reverse) command. 440 BC_LEX_SWAP, 441 442 /// Pop (remove) command. 443 BC_LEX_POP, 444 445 /// Store ibase command. 446 BC_LEX_STORE_IBASE, 447 448 /// Store obase command. 449 BC_LEX_STORE_OBASE, 450 451 /// Store scale command. 452 BC_LEX_STORE_SCALE, 453 454#if BC_ENABLE_EXTRA_MATH 455 456 /// Store seed command. 457 BC_LEX_STORE_SEED, 458 459#endif // BC_ENABLE_EXTRA_MATH 460 461 /// Load variable onto stack command. 462 BC_LEX_LOAD, 463 464 /// Pop off of variable stack onto results stack command. 465 BC_LEX_LOAD_POP, 466 467 /// Push onto variable stack command. 468 BC_LEX_STORE_PUSH, 469 470 /// Print with pop command. 471 BC_LEX_PRINT_POP, 472 473 /// Parameterized quit command. 474 BC_LEX_NQUIT, 475 476 /// Execution stack depth command. 477 BC_LEX_EXEC_STACK_LENGTH, 478 479 /// Scale of number command. This is needed specifically for dc because bc 480 /// parses the scale function in parts. 481 BC_LEX_SCALE_FACTOR, 482 483 /// Array length command. This is needed specifically for dc because bc 484 /// just reuses its length keyword. 485 BC_LEX_ARRAY_LENGTH, 486 487#endif // DC_ENABLED 488 489} BcLexType; 490 491struct BcLex; 492 493/** 494 * A function pointer to call when another token is needed. Mostly called by the 495 * parser. 496 * @param l The lexer. 497 */ 498typedef void (*BcLexNext)(struct BcLex* l); 499 500/// The lexer. 501typedef struct BcLex 502{ 503 /// A pointer to the text to lex. 504 const char* buf; 505 506 /// The current index into buf. 507 size_t i; 508 509 /// The current line. 510 size_t line; 511 512 /// The length of buf. 513 size_t len; 514 515 /// The current token. 516 BcLexType t; 517 518 /// The previous token. 519 BcLexType last; 520 521 /// A string to store extra data for tokens. For example, the @a BC_LEX_STR 522 /// token really needs to store the actual string, and numbers also need the 523 /// string. 524 BcVec str; 525 526 /// The mode the lexer is in. 527 BcMode mode; 528 529} BcLex; 530 531/** 532 * Initializes a lexer. 533 * @param l The lexer to initialize. 534 */ 535void 536bc_lex_init(BcLex* l); 537 538/** 539 * Frees a lexer. This is not guarded by #if BC_DEBUG because a separate 540 * parser is created at runtime to parse read() expressions and dc strings, and 541 * that parser needs a lexer. 542 * @param l The lexer to free. 543 */ 544void 545bc_lex_free(BcLex* l); 546 547/** 548 * Sets the filename that the lexer will be lexing. 549 * @param l The lexer. 550 * @param file The filename that the lexer will lex. 551 */ 552void 553bc_lex_file(BcLex* l, const char* file); 554 555/** 556 * Sets the text the lexer will lex. 557 * @param l The lexer. 558 * @param text The text to lex. 559 * @param mode The mode to lex in. 560 */ 561void 562bc_lex_text(BcLex* l, const char* text, BcMode mode); 563 564/** 565 * Generic next function for the parser to call. It takes care of calling the 566 * correct @a BcLexNext function and consuming whitespace. 567 * @param l The lexer. 568 */ 569void 570bc_lex_next(BcLex* l); 571 572/** 573 * Lexes a line comment (one beginning with '#' and going to a newline). 574 * @param l The lexer. 575 */ 576void 577bc_lex_lineComment(BcLex* l); 578 579/** 580 * Lexes a general comment (C-style comment). 581 * @param l The lexer. 582 */ 583void 584bc_lex_comment(BcLex* l); 585 586/** 587 * Lexes whitespace, finding as much as possible. 588 * @param l The lexer. 589 */ 590void 591bc_lex_whitespace(BcLex* l); 592 593/** 594 * Lexes a number that begins with char @a start. This takes care of parsing 595 * numbers in scientific and engineering notations. 596 * @param l The lexer. 597 * @param start The starting char of the number. To detect a number and call 598 * this function, the lexer had to eat the first char. It fixes 599 * that by passing it in. 600 */ 601void 602bc_lex_number(BcLex* l, char start); 603 604/** 605 * Lexes a name/identifier. 606 * @param l The lexer. 607 */ 608void 609bc_lex_name(BcLex* l); 610 611/** 612 * Lexes common whitespace characters. 613 * @param l The lexer. 614 * @param c The character to lex. 615 */ 616void 617bc_lex_commonTokens(BcLex* l, char c); 618 619/** 620 * Throws a parse error because char @a c was invalid. 621 * @param l The lexer. 622 * @param c The problem character. 623 */ 624void 625bc_lex_invalidChar(BcLex* l, char c); 626 627/** 628 * Reads a line from stdin and puts it into the lexer's buffer. 629 * @param l The lexer. 630 */ 631bool 632bc_lex_readLine(BcLex* l); 633 634#endif // BC_LEX_H 635