1/* xgettext Scheme backend. 2 Copyright (C) 2004-2007 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19#ifdef HAVE_CONFIG_H 20# include "config.h" 21#endif 22 23/* Specification. */ 24#include "x-scheme.h" 25 26#include <errno.h> 27#include <stdbool.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31 32#include "message.h" 33#include "xgettext.h" 34#include "x-scheme.h" 35#include "error.h" 36#include "xalloc.h" 37#include "hash.h" 38#include "gettext.h" 39 40#define _(s) gettext(s) 41 42 43/* The Scheme syntax is described in R5RS. It is implemented in 44 guile-1.6.4/libguile/read.c. 45 Since we are interested only in strings and in forms similar to 46 (gettext msgid ...) 47 or (ngettext msgid msgid_plural ...) 48 we make the following simplifications: 49 50 - Assume the keywords and strings are in an ASCII compatible encoding. 51 This means we can read the input file one byte at a time, instead of 52 one character at a time. No need to worry about multibyte characters: 53 If they occur as part of identifiers, they most probably act as 54 constituent characters, and the byte based approach will do the same. 55 56 - Assume the read-hash-procedures is in the default state. 57 Non-standard reader extensions are mostly used to read data, not programs. 58 59 The remaining syntax rules are: 60 61 - The syntax code assigned to each character, and how tokens are built 62 up from characters (single escape, multiple escape etc.). 63 64 - Comment syntax: ';' and '#! ... \n!#\n'. 65 66 - String syntax: "..." with single escapes. 67 68 - Read macros and dispatch macro character '#'. Needed to be able to 69 tell which is the n-th argument of a function call. 70 71 */ 72 73 74/* ====================== Keyword set customization. ====================== */ 75 76/* If true extract all strings. */ 77static bool extract_all = false; 78 79static hash_table keywords; 80static bool default_keywords = true; 81 82 83void 84x_scheme_extract_all () 85{ 86 extract_all = true; 87} 88 89 90void 91x_scheme_keyword (const char *name) 92{ 93 if (name == NULL) 94 default_keywords = false; 95 else 96 { 97 const char *end; 98 struct callshape shape; 99 const char *colon; 100 101 if (keywords.table == NULL) 102 hash_init (&keywords, 100); 103 104 split_keywordspec (name, &end, &shape); 105 106 /* The characters between name and end should form a valid Lisp symbol. 107 Extract the symbol name part. */ 108 colon = strchr (name, ':'); 109 if (colon != NULL && colon < end) 110 { 111 name = colon + 1; 112 if (name < end && *name == ':') 113 name++; 114 colon = strchr (name, ':'); 115 if (colon != NULL && colon < end) 116 return; 117 } 118 119 insert_keyword_callshape (&keywords, name, end - name, &shape); 120 } 121} 122 123/* Finish initializing the keywords hash table. 124 Called after argument processing, before each file is processed. */ 125static void 126init_keywords () 127{ 128 if (default_keywords) 129 { 130 /* When adding new keywords here, also update the documentation in 131 xgettext.texi! */ 132 x_scheme_keyword ("gettext"); /* libguile/i18n.c */ 133 x_scheme_keyword ("ngettext:1,2"); /* libguile/i18n.c */ 134 x_scheme_keyword ("gettext-noop"); 135 default_keywords = false; 136 } 137} 138 139void 140init_flag_table_scheme () 141{ 142 xgettext_record_flag ("gettext:1:pass-scheme-format"); 143 xgettext_record_flag ("ngettext:1:pass-scheme-format"); 144 xgettext_record_flag ("ngettext:2:pass-scheme-format"); 145 xgettext_record_flag ("gettext-noop:1:pass-scheme-format"); 146 xgettext_record_flag ("format:2:scheme-format"); 147} 148 149 150/* ======================== Reading of characters. ======================== */ 151 152/* Real filename, used in error messages about the input file. */ 153static const char *real_file_name; 154 155/* Logical filename and line number, used to label the extracted messages. */ 156static char *logical_file_name; 157static int line_number; 158 159/* The input file stream. */ 160static FILE *fp; 161 162 163/* Fetch the next character from the input file. */ 164static int 165do_getc () 166{ 167 int c = getc (fp); 168 169 if (c == EOF) 170 { 171 if (ferror (fp)) 172 error (EXIT_FAILURE, errno, _("\ 173error while reading \"%s\""), real_file_name); 174 } 175 else if (c == '\n') 176 line_number++; 177 178 return c; 179} 180 181/* Put back the last fetched character, not EOF. */ 182static void 183do_ungetc (int c) 184{ 185 if (c == '\n') 186 line_number--; 187 ungetc (c, fp); 188} 189 190 191/* ========================== Reading of tokens. ========================== */ 192 193 194/* A token consists of a sequence of characters. */ 195struct token 196{ 197 int allocated; /* number of allocated 'token_char's */ 198 int charcount; /* number of used 'token_char's */ 199 char *chars; /* the token's constituents */ 200}; 201 202/* Initialize a 'struct token'. */ 203static inline void 204init_token (struct token *tp) 205{ 206 tp->allocated = 10; 207 tp->chars = XNMALLOC (tp->allocated, char); 208 tp->charcount = 0; 209} 210 211/* Free the memory pointed to by a 'struct token'. */ 212static inline void 213free_token (struct token *tp) 214{ 215 free (tp->chars); 216} 217 218/* Ensure there is enough room in the token for one more character. */ 219static inline void 220grow_token (struct token *tp) 221{ 222 if (tp->charcount == tp->allocated) 223 { 224 tp->allocated *= 2; 225 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); 226 } 227} 228 229/* Read the next token. 'first' is the first character, which has already 230 been read. */ 231static void 232read_token (struct token *tp, int first) 233{ 234 init_token (tp); 235 236 grow_token (tp); 237 tp->chars[tp->charcount++] = first; 238 239 for (;;) 240 { 241 int c = do_getc (); 242 243 if (c == EOF) 244 break; 245 if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n' 246 || c == '"' || c == '(' || c == ')' || c == ';') 247 { 248 do_ungetc (c); 249 break; 250 } 251 grow_token (tp); 252 tp->chars[tp->charcount++] = c; 253 } 254} 255 256/* Tests if a token represents an integer. 257 Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int(). */ 258static inline bool 259is_integer_syntax (const char *str, int len, int radix) 260{ 261 const char *p = str; 262 const char *p_end = str + len; 263 264 /* The accepted syntax is 265 ['+'|'-'] DIGIT+ 266 where DIGIT is a hexadecimal digit whose value is below radix. */ 267 268 if (p == p_end) 269 return false; 270 if (*p == '+' || *p == '-') 271 { 272 p++; 273 if (p == p_end) 274 return false; 275 } 276 do 277 { 278 int c = *p++; 279 280 if (c >= '0' && c <= '9') 281 c = c - '0'; 282 else if (c >= 'A' && c <= 'F') 283 c = c - 'A' + 10; 284 else if (c >= 'a' && c <= 'f') 285 c = c - 'a' + 10; 286 else 287 return false; 288 if (c >= radix) 289 return false; 290 } 291 while (p < p_end); 292 return true; 293} 294 295/* Tests if a token represents a rational, floating-point or complex number. 296 If unconstrained is false, only real numbers are accepted; otherwise, 297 complex numbers are accepted as well. 298 Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo(). */ 299static inline bool 300is_other_number_syntax (const char *str, int len, int radix, bool unconstrained) 301{ 302 const char *p = str; 303 const char *p_end = str + len; 304 bool seen_sign; 305 bool seen_digits; 306 307 /* The accepted syntaxes are: 308 for a floating-point number: 309 ['+'|'-'] DIGIT+ [EXPONENT] 310 ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT] 311 where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+ 312 (Dot and exponent are allowed only if radix is 10.) 313 for a rational number: 314 ['+'|'-'] DIGIT+ '/' DIGIT+ 315 for a complex number: 316 REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' 317 REAL-NUMBER {'+'|'-'} 'i' 318 {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' 319 {'+'|'-'} 'i' 320 REAL-NUMBER '@' REAL-NUMBER 321 */ 322 if (p == p_end) 323 return false; 324 /* Parse leading sign. */ 325 seen_sign = false; 326 if (*p == '+' || *p == '-') 327 { 328 p++; 329 if (p == p_end) 330 return false; 331 seen_sign = true; 332 /* Recognize complex number syntax: {'+'|'-'} 'i' */ 333 if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end) 334 return true; 335 } 336 /* Parse digits before dot or exponent or slash. */ 337 seen_digits = false; 338 do 339 { 340 int c = *p; 341 342 if (c >= '0' && c <= '9') 343 c = c - '0'; 344 else if (c >= 'A' && c <= 'F') 345 { 346 if (c >= 'D' && radix == 10) /* exponent? */ 347 break; 348 c = c - 'A' + 10; 349 } 350 else if (c >= 'a' && c <= 'f') 351 { 352 if (c >= 'd' && radix == 10) /* exponent? */ 353 break; 354 c = c - 'a' + 10; 355 } 356 else 357 break; 358 if (c >= radix) 359 return false; 360 seen_digits = true; 361 p++; 362 } 363 while (p < p_end); 364 /* If p == p_end, we know that seen_digits = true, and the number is an 365 integer without exponent. */ 366 if (p < p_end) 367 { 368 /* If we have no digits so far, we need a decimal point later. */ 369 if (!seen_digits && !(*p == '.' && radix == 10)) 370 return false; 371 /* Trailing '#' signs are equivalent to zeroes. */ 372 while (p < p_end && *p == '#') 373 p++; 374 if (p < p_end) 375 { 376 if (*p == '/') 377 { 378 /* Parse digits after the slash. */ 379 bool all_zeroes = true; 380 p++; 381 for (; p < p_end; p++) 382 { 383 int c = *p; 384 385 if (c >= '0' && c <= '9') 386 c = c - '0'; 387 else if (c >= 'A' && c <= 'F') 388 c = c - 'A' + 10; 389 else if (c >= 'a' && c <= 'f') 390 c = c - 'a' + 10; 391 else 392 break; 393 if (c >= radix) 394 return false; 395 if (c != 0) 396 all_zeroes = false; 397 } 398 /* A zero denominator is not allowed. */ 399 if (all_zeroes) 400 return false; 401 /* Trailing '#' signs are equivalent to zeroes. */ 402 while (p < p_end && *p == '#') 403 p++; 404 } 405 else 406 { 407 if (*p == '.') 408 { 409 /* Decimal point notation. */ 410 if (radix != 10) 411 return false; 412 /* Parse digits after the decimal point. */ 413 p++; 414 for (; p < p_end; p++) 415 { 416 int c = *p; 417 418 if (c >= '0' && c <= '9') 419 seen_digits = true; 420 else 421 break; 422 } 423 /* Digits are required before or after the decimal point. */ 424 if (!seen_digits) 425 return false; 426 /* Trailing '#' signs are equivalent to zeroes. */ 427 while (p < p_end && *p == '#') 428 p++; 429 } 430 if (p < p_end) 431 { 432 /* Parse exponent. */ 433 switch (*p) 434 { 435 case 'D': case 'd': 436 case 'E': case 'e': 437 case 'F': case 'f': 438 case 'L': case 'l': 439 case 'S': case 's': 440 if (radix != 10) 441 return false; 442 p++; 443 if (p == p_end) 444 return false; 445 if (*p == '+' || *p == '-') 446 { 447 p++; 448 if (p == p_end) 449 return false; 450 } 451 if (!(*p >= '0' && *p <= '9')) 452 return false; 453 for (;;) 454 { 455 p++; 456 if (p == p_end) 457 break; 458 if (!(*p >= '0' && *p <= '9')) 459 break; 460 } 461 break; 462 default: 463 break; 464 } 465 } 466 } 467 } 468 } 469 if (p == p_end) 470 return true; 471 /* Recognize complex number syntax. */ 472 if (unconstrained) 473 { 474 /* Recognize the syntax {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' */ 475 if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end) 476 return true; 477 /* Recognize the syntaxes 478 REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' 479 REAL-NUMBER {'+'|'-'} 'i' 480 */ 481 if (*p == '+' || *p == '-') 482 return (p_end[-1] == 'I' || p_end[-1] == 'i') 483 && (p + 1 == p_end - 1 484 || is_other_number_syntax (p, p_end - 1 - p, radix, false)); 485 /* Recognize the syntax REAL-NUMBER '@' REAL-NUMBER */ 486 if (*p == '@') 487 { 488 p++; 489 return is_other_number_syntax (p, p_end - p, radix, false); 490 } 491 } 492 return false; 493} 494 495/* Tests if a token represents a number. 496 Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number(). */ 497static bool 498is_number (const struct token *tp) 499{ 500 const char *str = tp->chars; 501 int len = tp->charcount; 502 int radix = 10; 503 enum { unknown, exact, inexact } exactness = unknown; 504 bool seen_radix_prefix = false; 505 bool seen_exactness_prefix = false; 506 507 if (len == 1) 508 if (*str == '+' || *str == '-') 509 return false; 510 while (len >= 2 && *str == '#') 511 { 512 switch (str[1]) 513 { 514 case 'B': case 'b': 515 if (seen_radix_prefix) 516 return false; 517 radix = 2; 518 seen_radix_prefix = true; 519 break; 520 case 'O': case 'o': 521 if (seen_radix_prefix) 522 return false; 523 radix = 8; 524 seen_radix_prefix = true; 525 break; 526 case 'D': case 'd': 527 if (seen_radix_prefix) 528 return false; 529 radix = 10; 530 seen_radix_prefix = true; 531 break; 532 case 'X': case 'x': 533 if (seen_radix_prefix) 534 return false; 535 radix = 16; 536 seen_radix_prefix = true; 537 break; 538 case 'E': case 'e': 539 if (seen_exactness_prefix) 540 return false; 541 exactness = exact; 542 seen_exactness_prefix = true; 543 break; 544 case 'I': case 'i': 545 if (seen_exactness_prefix) 546 return false; 547 exactness = inexact; 548 seen_exactness_prefix = true; 549 break; 550 default: 551 return false; 552 } 553 str += 2; 554 len -= 2; 555 } 556 if (exactness != inexact) 557 { 558 /* Try to parse an integer. */ 559 if (is_integer_syntax (str, len, 10)) 560 return true; 561 /* FIXME: Other Scheme implementations support exact rational numbers 562 or exact complex numbers. */ 563 } 564 if (exactness != exact) 565 { 566 /* Try to parse a rational, floating-point or complex number. */ 567 if (is_other_number_syntax (str, len, 10, true)) 568 return true; 569 } 570 return false; 571} 572 573 574/* ========================= Accumulating comments ========================= */ 575 576 577static char *buffer; 578static size_t bufmax; 579static size_t buflen; 580 581static inline void 582comment_start () 583{ 584 buflen = 0; 585} 586 587static inline void 588comment_add (int c) 589{ 590 if (buflen >= bufmax) 591 { 592 bufmax = 2 * bufmax + 10; 593 buffer = xrealloc (buffer, bufmax); 594 } 595 buffer[buflen++] = c; 596} 597 598static inline void 599comment_line_end (size_t chars_to_remove) 600{ 601 buflen -= chars_to_remove; 602 while (buflen >= 1 603 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) 604 --buflen; 605 if (chars_to_remove == 0 && buflen >= bufmax) 606 { 607 bufmax = 2 * bufmax + 10; 608 buffer = xrealloc (buffer, bufmax); 609 } 610 buffer[buflen] = '\0'; 611 savable_comment_add (buffer); 612} 613 614 615/* These are for tracking whether comments count as immediately before 616 keyword. */ 617static int last_comment_line; 618static int last_non_comment_line; 619 620 621/* ========================= Accumulating messages ========================= */ 622 623 624static message_list_ty *mlp; 625 626 627/* ========================== Reading of objects. ========================= */ 628 629 630/* We are only interested in symbols (e.g. gettext or ngettext) and strings. 631 Other objects need not to be represented precisely. */ 632enum object_type 633{ 634 t_symbol, /* symbol */ 635 t_string, /* string */ 636 t_other, /* other kind of real object */ 637 t_dot, /* '.' pseudo object */ 638 t_close, /* ')' pseudo object */ 639 t_eof /* EOF marker */ 640}; 641 642struct object 643{ 644 enum object_type type; 645 struct token *token; /* for t_symbol and t_string */ 646 int line_number_at_start; /* for t_string */ 647}; 648 649/* Free the memory pointed to by a 'struct object'. */ 650static inline void 651free_object (struct object *op) 652{ 653 if (op->type == t_symbol || op->type == t_string) 654 { 655 free_token (op->token); 656 free (op->token); 657 } 658} 659 660/* Convert a t_symbol/t_string token to a char*. */ 661static char * 662string_of_object (const struct object *op) 663{ 664 char *str; 665 int n; 666 667 if (!(op->type == t_symbol || op->type == t_string)) 668 abort (); 669 n = op->token->charcount; 670 str = XNMALLOC (n + 1, char); 671 memcpy (str, op->token->chars, n); 672 str[n] = '\0'; 673 return str; 674} 675 676/* Context lookup table. */ 677static flag_context_list_table_ty *flag_context_list_table; 678 679/* Read the next object. */ 680static void 681read_object (struct object *op, flag_context_ty outer_context) 682{ 683 for (;;) 684 { 685 int c = do_getc (); 686 687 switch (c) 688 { 689 case EOF: 690 op->type = t_eof; 691 return; 692 693 case ' ': case '\r': case '\f': case '\t': 694 continue; 695 696 case '\n': 697 /* Comments assumed to be grouped with a message must immediately 698 precede it, with no non-whitespace token on a line between 699 both. */ 700 if (last_non_comment_line > last_comment_line) 701 savable_comment_reset (); 702 continue; 703 704 case ';': 705 { 706 bool all_semicolons = true; 707 708 last_comment_line = line_number; 709 comment_start (); 710 for (;;) 711 { 712 c = do_getc (); 713 if (c == EOF || c == '\n') 714 break; 715 if (c != ';') 716 all_semicolons = false; 717 if (!all_semicolons) 718 { 719 /* We skip all leading white space, but not EOLs. */ 720 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 721 comment_add (c); 722 } 723 } 724 comment_line_end (0); 725 continue; 726 } 727 728 case '(': 729 { 730 int arg = 0; /* Current argument number. */ 731 flag_context_list_iterator_ty context_iter; 732 const struct callshapes *shapes = NULL; 733 struct arglist_parser *argparser = NULL; 734 735 for (;; arg++) 736 { 737 struct object inner; 738 flag_context_ty inner_context; 739 740 if (arg == 0) 741 inner_context = null_context; 742 else 743 inner_context = 744 inherited_context (outer_context, 745 flag_context_list_iterator_advance ( 746 &context_iter)); 747 748 read_object (&inner, inner_context); 749 750 /* Recognize end of list. */ 751 if (inner.type == t_close) 752 { 753 op->type = t_other; 754 last_non_comment_line = line_number; 755 if (argparser != NULL) 756 arglist_parser_done (argparser, arg); 757 return; 758 } 759 760 /* Dots are not allowed in every position. 761 But be tolerant. */ 762 763 /* EOF inside list is illegal. 764 But be tolerant. */ 765 if (inner.type == t_eof) 766 break; 767 768 if (arg == 0) 769 { 770 /* This is the function position. */ 771 if (inner.type == t_symbol) 772 { 773 char *symbol_name = string_of_object (&inner); 774 void *keyword_value; 775 776 if (hash_find_entry (&keywords, 777 symbol_name, strlen (symbol_name), 778 &keyword_value) 779 == 0) 780 shapes = (const struct callshapes *) keyword_value; 781 782 argparser = arglist_parser_alloc (mlp, shapes); 783 784 context_iter = 785 flag_context_list_iterator ( 786 flag_context_list_table_lookup ( 787 flag_context_list_table, 788 symbol_name, strlen (symbol_name))); 789 790 free (symbol_name); 791 } 792 else 793 context_iter = null_context_list_iterator; 794 } 795 else 796 { 797 /* These are the argument positions. */ 798 if (argparser != NULL && inner.type == t_string) 799 arglist_parser_remember (argparser, arg, 800 string_of_object (&inner), 801 inner_context, 802 logical_file_name, 803 inner.line_number_at_start, 804 savable_comment); 805 } 806 807 free_object (&inner); 808 } 809 if (argparser != NULL) 810 arglist_parser_done (argparser, arg); 811 } 812 op->type = t_other; 813 last_non_comment_line = line_number; 814 return; 815 816 case ')': 817 /* Tell the caller about the end of list. 818 Unmatched closing parenthesis is illegal. 819 But be tolerant. */ 820 op->type = t_close; 821 last_non_comment_line = line_number; 822 return; 823 824 case ',': 825 { 826 int c = do_getc (); 827 /* The ,@ handling inside lists is wrong anyway, because 828 ,@form expands to an unknown number of elements. */ 829 if (c != EOF && c != '@') 830 do_ungetc (c); 831 } 832 /*FALLTHROUGH*/ 833 case '\'': 834 case '`': 835 { 836 struct object inner; 837 838 read_object (&inner, null_context); 839 840 /* Dots and EOF are not allowed here. But be tolerant. */ 841 842 free_object (&inner); 843 844 op->type = t_other; 845 last_non_comment_line = line_number; 846 return; 847 } 848 849 case '#': 850 /* Dispatch macro handling. */ 851 { 852 c = do_getc (); 853 if (c == EOF) 854 /* Invalid input. Be tolerant, no error message. */ 855 { 856 op->type = t_other; 857 return; 858 } 859 860 switch (c) 861 { 862 case '(': /* Vector */ 863 do_ungetc (c); 864 { 865 struct object inner; 866 read_object (&inner, null_context); 867 /* Dots and EOF are not allowed here. 868 But be tolerant. */ 869 free_object (&inner); 870 op->type = t_other; 871 last_non_comment_line = line_number; 872 return; 873 } 874 875 case 'T': case 't': /* Boolean true */ 876 case 'F': case 'f': /* Boolean false */ 877 op->type = t_other; 878 last_non_comment_line = line_number; 879 return; 880 881 case 'B': case 'b': 882 case 'O': case 'o': 883 case 'D': case 'd': 884 case 'X': case 'x': 885 case 'E': case 'e': 886 case 'I': case 'i': 887 { 888 struct token token; 889 do_ungetc (c); 890 read_token (&token, '#'); 891 if (is_number (&token)) 892 { 893 /* A number. */ 894 free_token (&token); 895 op->type = t_other; 896 last_non_comment_line = line_number; 897 return; 898 } 899 else 900 { 901 if (token.charcount == 2 902 && (token.chars[1] == 'e' || token.chars[1] == 'i')) 903 { 904 c = do_getc (); 905 if (c != EOF) 906 do_ungetc (c); 907 if (c == '(') 908 /* Homogenous vector syntax, see arrays.scm. */ 909 case 'a': /* Vectors of char */ 910 case 'c': /* Vectors of complex */ 911 /*case 'e':*/ /* Vectors of long */ 912 case 'h': /* Vectors of short */ 913 /*case 'i':*/ /* Vectors of double-float */ 914 case 'l': /* Vectors of long long */ 915 case 's': /* Vectors of single-float */ 916 case 'u': /* Vectors of unsigned long */ 917 case 'y': /* Vectors of byte */ 918 { 919 struct object inner; 920 read_object (&inner, null_context); 921 /* Dots and EOF are not allowed here. 922 But be tolerant. */ 923 free_token (&token); 924 free_object (&inner); 925 op->type = t_other; 926 last_non_comment_line = line_number; 927 return; 928 } 929 } 930 /* Unknown # object. But be tolerant. */ 931 free_token (&token); 932 op->type = t_other; 933 last_non_comment_line = line_number; 934 return; 935 } 936 } 937 938 case '!': 939 /* Block comment '#! ... \n!#\n'. We don't extract it 940 because it's only used to introduce scripts on Unix. */ 941 { 942 int last1 = 0; 943 int last2 = 0; 944 int last3 = 0; 945 946 for (;;) 947 { 948 c = do_getc (); 949 if (c == EOF) 950 /* EOF is not allowed here. But be tolerant. */ 951 break; 952 if (last3 == '\n' && last2 == '!' && last1 == '#' 953 && c == '\n') 954 break; 955 last3 = last2; 956 last2 = last1; 957 last1 = c; 958 } 959 continue; 960 } 961 962 case '*': 963 /* Bit vector. */ 964 { 965 struct token token; 966 read_token (&token, c); 967 /* The token should consists only of '0' and '1', except 968 for the initial '*'. But be tolerant. */ 969 free_token (&token); 970 op->type = t_other; 971 last_non_comment_line = line_number; 972 return; 973 } 974 975 case '{': 976 /* Symbol with multiple escapes: #{...}# */ 977 { 978 op->token = XMALLOC (struct token); 979 980 init_token (op->token); 981 982 for (;;) 983 { 984 c = do_getc (); 985 986 if (c == EOF) 987 break; 988 if (c == '\\') 989 { 990 c = do_getc (); 991 if (c == EOF) 992 break; 993 } 994 else if (c == '}') 995 { 996 c = do_getc (); 997 if (c == '#') 998 break; 999 if (c != EOF) 1000 do_ungetc (c); 1001 c = '}'; 1002 } 1003 grow_token (op->token); 1004 op->token->chars[op->token->charcount++] = c; 1005 } 1006 1007 op->type = t_symbol; 1008 last_non_comment_line = line_number; 1009 return; 1010 } 1011 1012 case '\\': 1013 /* Character. */ 1014 { 1015 struct token token; 1016 c = do_getc (); 1017 if (c != EOF) 1018 { 1019 read_token (&token, c); 1020 free_token (&token); 1021 } 1022 op->type = t_other; 1023 last_non_comment_line = line_number; 1024 return; 1025 } 1026 1027 case ':': /* Keyword. */ 1028 case '&': /* Deprecated keyword, installed in optargs.scm. */ 1029 { 1030 struct token token; 1031 read_token (&token, '-'); 1032 free_token (&token); 1033 op->type = t_other; 1034 last_non_comment_line = line_number; 1035 return; 1036 } 1037 1038 /* The following are installed through read-hash-extend. */ 1039 1040 /* arrays.scm */ 1041 case '0': case '1': case '2': case '3': case '4': 1042 case '5': case '6': case '7': case '8': case '9': 1043 /* Multidimensional array syntax: #nx(...) where 1044 n ::= DIGIT+ 1045 x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'} 1046 */ 1047 do 1048 c = do_getc (); 1049 while (c >= '0' && c <= '9'); 1050 /* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}. 1051 But be tolerant. */ 1052 /*FALLTHROUGH*/ 1053 case '\'': /* boot-9.scm */ 1054 case '.': /* boot-9.scm */ 1055 case ',': /* srfi-10.scm */ 1056 { 1057 struct object inner; 1058 read_object (&inner, null_context); 1059 /* Dots and EOF are not allowed here. 1060 But be tolerant. */ 1061 free_object (&inner); 1062 op->type = t_other; 1063 last_non_comment_line = line_number; 1064 return; 1065 } 1066 1067 default: 1068 /* Unknown. */ 1069 op->type = t_other; 1070 last_non_comment_line = line_number; 1071 return; 1072 } 1073 /*NOTREACHED*/ 1074 abort (); 1075 } 1076 1077 case '"': 1078 { 1079 op->token = XMALLOC (struct token); 1080 init_token (op->token); 1081 op->line_number_at_start = line_number; 1082 for (;;) 1083 { 1084 int c = do_getc (); 1085 if (c == EOF) 1086 /* Invalid input. Be tolerant, no error message. */ 1087 break; 1088 if (c == '"') 1089 break; 1090 if (c == '\\') 1091 { 1092 c = do_getc (); 1093 if (c == EOF) 1094 /* Invalid input. Be tolerant, no error message. */ 1095 break; 1096 switch (c) 1097 { 1098 case '\n': 1099 continue; 1100 case '0': 1101 c = '\0'; 1102 break; 1103 case 'a': 1104 c = '\a'; 1105 break; 1106 case 'f': 1107 c = '\f'; 1108 break; 1109 case 'n': 1110 c = '\n'; 1111 break; 1112 case 'r': 1113 c = '\r'; 1114 break; 1115 case 't': 1116 c = '\t'; 1117 break; 1118 case 'v': 1119 c = '\v'; 1120 break; 1121 default: 1122 break; 1123 } 1124 } 1125 grow_token (op->token); 1126 op->token->chars[op->token->charcount++] = c; 1127 } 1128 op->type = t_string; 1129 1130 if (extract_all) 1131 { 1132 lex_pos_ty pos; 1133 1134 pos.file_name = logical_file_name; 1135 pos.line_number = op->line_number_at_start; 1136 remember_a_message (mlp, NULL, string_of_object (op), 1137 null_context, &pos, savable_comment); 1138 } 1139 last_non_comment_line = line_number; 1140 return; 1141 } 1142 1143 case '0': case '1': case '2': case '3': case '4': 1144 case '5': case '6': case '7': case '8': case '9': 1145 case '+': case '-': case '.': 1146 /* Read a number or symbol token. */ 1147 op->token = XMALLOC (struct token); 1148 read_token (op->token, c); 1149 if (op->token->charcount == 1 && op->token->chars[0] == '.') 1150 { 1151 free_token (op->token); 1152 free (op->token); 1153 op->type = t_dot; 1154 } 1155 else if (is_number (op->token)) 1156 { 1157 /* A number. */ 1158 free_token (op->token); 1159 free (op->token); 1160 op->type = t_other; 1161 } 1162 else 1163 { 1164 /* A symbol. */ 1165 op->type = t_symbol; 1166 } 1167 last_non_comment_line = line_number; 1168 return; 1169 1170 case ':': 1171 default: 1172 /* Read a symbol token. */ 1173 op->token = XMALLOC (struct token); 1174 read_token (op->token, c); 1175 op->type = t_symbol; 1176 last_non_comment_line = line_number; 1177 return; 1178 } 1179 } 1180} 1181 1182 1183void 1184extract_scheme (FILE *f, 1185 const char *real_filename, const char *logical_filename, 1186 flag_context_list_table_ty *flag_table, 1187 msgdomain_list_ty *mdlp) 1188{ 1189 mlp = mdlp->item[0]->messages; 1190 1191 fp = f; 1192 real_file_name = real_filename; 1193 logical_file_name = xstrdup (logical_filename); 1194 line_number = 1; 1195 1196 last_comment_line = -1; 1197 last_non_comment_line = -1; 1198 1199 flag_context_list_table = flag_table; 1200 1201 init_keywords (); 1202 1203 /* Eat tokens until eof is seen. When read_object returns 1204 due to an unbalanced closing parenthesis, just restart it. */ 1205 do 1206 { 1207 struct object toplevel_object; 1208 1209 read_object (&toplevel_object, null_context); 1210 1211 if (toplevel_object.type == t_eof) 1212 break; 1213 1214 free_object (&toplevel_object); 1215 } 1216 while (!feof (fp)); 1217 1218 /* Close scanner. */ 1219 fp = NULL; 1220 real_file_name = NULL; 1221 logical_file_name = NULL; 1222 line_number = 0; 1223} 1224