1/* xgettext Scheme backend. 2 Copyright (C) 2004-2006 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 20#ifdef HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24#include <errno.h> 25#include <stdbool.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include "message.h" 31#include "xgettext.h" 32#include "x-scheme.h" 33#include "error.h" 34#include "xalloc.h" 35#include "exit.h" 36#include "hash.h" 37#include "gettext.h" 38 39#define _(s) gettext(s) 40 41 42/* The Scheme syntax is described in R5RS. It is implemented in 43 guile-1.6.4/libguile/read.c. 44 Since we are interested only in strings and in forms similar to 45 (gettext msgid ...) 46 or (ngettext msgid msgid_plural ...) 47 we make the following simplifications: 48 49 - Assume the keywords and strings are in an ASCII compatible encoding. 50 This means we can read the input file one byte at a time, instead of 51 one character at a time. No need to worry about multibyte characters: 52 If they occur as part of identifiers, they most probably act as 53 constituent characters, and the byte based approach will do the same. 54 55 - Assume the read-hash-procedures is in the default state. 56 Non-standard reader extensions are mostly used to read data, not programs. 57 58 The remaining syntax rules are: 59 60 - The syntax code assigned to each character, and how tokens are built 61 up from characters (single escape, multiple escape etc.). 62 63 - Comment syntax: ';' and '#! ... \n!#\n'. 64 65 - String syntax: "..." with single escapes. 66 67 - Read macros and dispatch macro character '#'. Needed to be able to 68 tell which is the n-th argument of a function call. 69 70 */ 71 72 73/* ====================== Keyword set customization. ====================== */ 74 75/* If true extract all strings. */ 76static bool extract_all = false; 77 78static hash_table keywords; 79static bool default_keywords = true; 80 81 82void 83x_scheme_extract_all () 84{ 85 extract_all = true; 86} 87 88 89void 90x_scheme_keyword (const char *name) 91{ 92 if (name == NULL) 93 default_keywords = false; 94 else 95 { 96 const char *end; 97 struct callshape shape; 98 const char *colon; 99 100 if (keywords.table == NULL) 101 hash_init (&keywords, 100); 102 103 split_keywordspec (name, &end, &shape); 104 105 /* The characters between name and end should form a valid Lisp symbol. 106 Extract the symbol name part. */ 107 colon = strchr (name, ':'); 108 if (colon != NULL && colon < end) 109 { 110 name = colon + 1; 111 if (name < end && *name == ':') 112 name++; 113 colon = strchr (name, ':'); 114 if (colon != NULL && colon < end) 115 return; 116 } 117 118 insert_keyword_callshape (&keywords, name, end - name, &shape); 119 } 120} 121 122/* Finish initializing the keywords hash table. 123 Called after argument processing, before each file is processed. */ 124static void 125init_keywords () 126{ 127 if (default_keywords) 128 { 129 /* When adding new keywords here, also update the documentation in 130 xgettext.texi! */ 131 x_scheme_keyword ("gettext"); /* libguile/i18n.c */ 132 x_scheme_keyword ("ngettext:1,2"); /* libguile/i18n.c */ 133 x_scheme_keyword ("gettext-noop"); 134 default_keywords = false; 135 } 136} 137 138void 139init_flag_table_scheme () 140{ 141 xgettext_record_flag ("gettext:1:pass-scheme-format"); 142 xgettext_record_flag ("ngettext:1:pass-scheme-format"); 143 xgettext_record_flag ("ngettext:2:pass-scheme-format"); 144 xgettext_record_flag ("gettext-noop:1:pass-scheme-format"); 145 xgettext_record_flag ("format:2:scheme-format"); 146} 147 148 149/* ======================== Reading of characters. ======================== */ 150 151/* Real filename, used in error messages about the input file. */ 152static const char *real_file_name; 153 154/* Logical filename and line number, used to label the extracted messages. */ 155static char *logical_file_name; 156static int line_number; 157 158/* The input file stream. */ 159static FILE *fp; 160 161 162/* Fetch the next character from the input file. */ 163static int 164do_getc () 165{ 166 int c = getc (fp); 167 168 if (c == EOF) 169 { 170 if (ferror (fp)) 171 error (EXIT_FAILURE, errno, _("\ 172error while reading \"%s\""), real_file_name); 173 } 174 else if (c == '\n') 175 line_number++; 176 177 return c; 178} 179 180/* Put back the last fetched character, not EOF. */ 181static void 182do_ungetc (int c) 183{ 184 if (c == '\n') 185 line_number--; 186 ungetc (c, fp); 187} 188 189 190/* ========================== Reading of tokens. ========================== */ 191 192 193/* A token consists of a sequence of characters. */ 194struct token 195{ 196 int allocated; /* number of allocated 'token_char's */ 197 int charcount; /* number of used 'token_char's */ 198 char *chars; /* the token's constituents */ 199}; 200 201/* Initialize a 'struct token'. */ 202static inline void 203init_token (struct token *tp) 204{ 205 tp->allocated = 10; 206 tp->chars = (char *) xmalloc (tp->allocated * sizeof (char)); 207 tp->charcount = 0; 208} 209 210/* Free the memory pointed to by a 'struct token'. */ 211static inline void 212free_token (struct token *tp) 213{ 214 free (tp->chars); 215} 216 217/* Ensure there is enough room in the token for one more character. */ 218static inline void 219grow_token (struct token *tp) 220{ 221 if (tp->charcount == tp->allocated) 222 { 223 tp->allocated *= 2; 224 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); 225 } 226} 227 228/* Read the next token. 'first' is the first character, which has already 229 been read. */ 230static void 231read_token (struct token *tp, int first) 232{ 233 init_token (tp); 234 235 grow_token (tp); 236 tp->chars[tp->charcount++] = first; 237 238 for (;;) 239 { 240 int c = do_getc (); 241 242 if (c == EOF) 243 break; 244 if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n' 245 || c == '"' || c == '(' || c == ')' || c == ';') 246 { 247 do_ungetc (c); 248 break; 249 } 250 grow_token (tp); 251 tp->chars[tp->charcount++] = c; 252 } 253} 254 255/* Tests if a token represents an integer. 256 Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int(). */ 257static inline bool 258is_integer_syntax (const char *str, int len, int radix) 259{ 260 const char *p = str; 261 const char *p_end = str + len; 262 263 /* The accepted syntax is 264 ['+'|'-'] DIGIT+ 265 where DIGIT is a hexadecimal digit whose value is below radix. */ 266 267 if (p == p_end) 268 return false; 269 if (*p == '+' || *p == '-') 270 { 271 p++; 272 if (p == p_end) 273 return false; 274 } 275 do 276 { 277 int c = *p++; 278 279 if (c >= '0' && c <= '9') 280 c = c - '0'; 281 else if (c >= 'A' && c <= 'F') 282 c = c - 'A' + 10; 283 else if (c >= 'a' && c <= 'f') 284 c = c - 'a' + 10; 285 else 286 return false; 287 if (c >= radix) 288 return false; 289 } 290 while (p < p_end); 291 return true; 292} 293 294/* Tests if a token represents a rational, floating-point or complex number. 295 If unconstrained is false, only real numbers are accepted; otherwise, 296 complex numbers are accepted as well. 297 Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo(). */ 298static inline bool 299is_other_number_syntax (const char *str, int len, int radix, bool unconstrained) 300{ 301 const char *p = str; 302 const char *p_end = str + len; 303 bool seen_sign; 304 bool seen_digits; 305 306 /* The accepted syntaxes are: 307 for a floating-point number: 308 ['+'|'-'] DIGIT+ [EXPONENT] 309 ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT] 310 where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+ 311 (Dot and exponent are allowed only if radix is 10.) 312 for a rational number: 313 ['+'|'-'] DIGIT+ '/' DIGIT+ 314 for a complex number: 315 REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' 316 REAL-NUMBER {'+'|'-'} 'i' 317 {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' 318 {'+'|'-'} 'i' 319 REAL-NUMBER '@' REAL-NUMBER 320 */ 321 if (p == p_end) 322 return false; 323 /* Parse leading sign. */ 324 seen_sign = false; 325 if (*p == '+' || *p == '-') 326 { 327 p++; 328 if (p == p_end) 329 return false; 330 seen_sign = true; 331 /* Recognize complex number syntax: {'+'|'-'} 'i' */ 332 if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end) 333 return true; 334 } 335 /* Parse digits before dot or exponent or slash. */ 336 seen_digits = false; 337 do 338 { 339 int c = *p; 340 341 if (c >= '0' && c <= '9') 342 c = c - '0'; 343 else if (c >= 'A' && c <= 'F') 344 { 345 if (c >= 'D' && radix == 10) /* exponent? */ 346 break; 347 c = c - 'A' + 10; 348 } 349 else if (c >= 'a' && c <= 'f') 350 { 351 if (c >= 'd' && radix == 10) /* exponent? */ 352 break; 353 c = c - 'a' + 10; 354 } 355 else 356 break; 357 if (c >= radix) 358 return false; 359 seen_digits = true; 360 p++; 361 } 362 while (p < p_end); 363 /* If p == p_end, we know that seen_digits = true, and the number is an 364 integer without exponent. */ 365 if (p < p_end) 366 { 367 /* If we have no digits so far, we need a decimal point later. */ 368 if (!seen_digits && !(*p == '.' && radix == 10)) 369 return false; 370 /* Trailing '#' signs are equivalent to zeroes. */ 371 while (p < p_end && *p == '#') 372 p++; 373 if (p < p_end) 374 { 375 if (*p == '/') 376 { 377 /* Parse digits after the slash. */ 378 bool all_zeroes = true; 379 p++; 380 for (; p < p_end; p++) 381 { 382 int c = *p; 383 384 if (c >= '0' && c <= '9') 385 c = c - '0'; 386 else if (c >= 'A' && c <= 'F') 387 c = c - 'A' + 10; 388 else if (c >= 'a' && c <= 'f') 389 c = c - 'a' + 10; 390 else 391 break; 392 if (c >= radix) 393 return false; 394 if (c != 0) 395 all_zeroes = false; 396 } 397 /* A zero denominator is not allowed. */ 398 if (all_zeroes) 399 return false; 400 /* Trailing '#' signs are equivalent to zeroes. */ 401 while (p < p_end && *p == '#') 402 p++; 403 } 404 else 405 { 406 if (*p == '.') 407 { 408 /* Decimal point notation. */ 409 if (radix != 10) 410 return false; 411 /* Parse digits after the decimal point. */ 412 p++; 413 for (; p < p_end; p++) 414 { 415 int c = *p; 416 417 if (c >= '0' && c <= '9') 418 seen_digits = true; 419 else 420 break; 421 } 422 /* Digits are required before or after the decimal point. */ 423 if (!seen_digits) 424 return false; 425 /* Trailing '#' signs are equivalent to zeroes. */ 426 while (p < p_end && *p == '#') 427 p++; 428 } 429 if (p < p_end) 430 { 431 /* Parse exponent. */ 432 switch (*p) 433 { 434 case 'D': case 'd': 435 case 'E': case 'e': 436 case 'F': case 'f': 437 case 'L': case 'l': 438 case 'S': case 's': 439 if (radix != 10) 440 return false; 441 p++; 442 if (p == p_end) 443 return false; 444 if (*p == '+' || *p == '-') 445 { 446 p++; 447 if (p == p_end) 448 return false; 449 } 450 if (!(*p >= '0' && *p <= '9')) 451 return false; 452 for (;;) 453 { 454 p++; 455 if (p == p_end) 456 break; 457 if (!(*p >= '0' && *p <= '9')) 458 break; 459 } 460 break; 461 default: 462 break; 463 } 464 } 465 } 466 } 467 } 468 if (p == p_end) 469 return true; 470 /* Recognize complex number syntax. */ 471 if (unconstrained) 472 { 473 /* Recognize the syntax {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' */ 474 if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end) 475 return true; 476 /* Recognize the syntaxes 477 REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' 478 REAL-NUMBER {'+'|'-'} 'i' 479 */ 480 if (*p == '+' || *p == '-') 481 return (p_end[-1] == 'I' || p_end[-1] == 'i') 482 && (p + 1 == p_end - 1 483 || is_other_number_syntax (p, p_end - 1 - p, radix, false)); 484 /* Recognize the syntax REAL-NUMBER '@' REAL-NUMBER */ 485 if (*p == '@') 486 { 487 p++; 488 return is_other_number_syntax (p, p_end - p, radix, false); 489 } 490 } 491 return false; 492} 493 494/* Tests if a token represents a number. 495 Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number(). */ 496static bool 497is_number (const struct token *tp) 498{ 499 const char *str = tp->chars; 500 int len = tp->charcount; 501 int radix = 10; 502 enum { unknown, exact, inexact } exactness = unknown; 503 bool seen_radix_prefix = false; 504 bool seen_exactness_prefix = false; 505 506 if (len == 1) 507 if (*str == '+' || *str == '-') 508 return false; 509 while (len >= 2 && *str == '#') 510 { 511 switch (str[1]) 512 { 513 case 'B': case 'b': 514 if (seen_radix_prefix) 515 return false; 516 radix = 2; 517 seen_radix_prefix = true; 518 break; 519 case 'O': case 'o': 520 if (seen_radix_prefix) 521 return false; 522 radix = 8; 523 seen_radix_prefix = true; 524 break; 525 case 'D': case 'd': 526 if (seen_radix_prefix) 527 return false; 528 radix = 10; 529 seen_radix_prefix = true; 530 break; 531 case 'X': case 'x': 532 if (seen_radix_prefix) 533 return false; 534 radix = 16; 535 seen_radix_prefix = true; 536 break; 537 case 'E': case 'e': 538 if (seen_exactness_prefix) 539 return false; 540 exactness = exact; 541 seen_exactness_prefix = true; 542 break; 543 case 'I': case 'i': 544 if (seen_exactness_prefix) 545 return false; 546 exactness = inexact; 547 seen_exactness_prefix = true; 548 break; 549 default: 550 return false; 551 } 552 str += 2; 553 len -= 2; 554 } 555 if (exactness != inexact) 556 { 557 /* Try to parse an integer. */ 558 if (is_integer_syntax (str, len, 10)) 559 return true; 560 /* FIXME: Other Scheme implementations support exact rational numbers 561 or exact complex numbers. */ 562 } 563 if (exactness != exact) 564 { 565 /* Try to parse a rational, floating-point or complex number. */ 566 if (is_other_number_syntax (str, len, 10, true)) 567 return true; 568 } 569 return false; 570} 571 572 573/* ========================= Accumulating comments ========================= */ 574 575 576static char *buffer; 577static size_t bufmax; 578static size_t buflen; 579 580static inline void 581comment_start () 582{ 583 buflen = 0; 584} 585 586static inline void 587comment_add (int c) 588{ 589 if (buflen >= bufmax) 590 { 591 bufmax = 2 * bufmax + 10; 592 buffer = xrealloc (buffer, bufmax); 593 } 594 buffer[buflen++] = c; 595} 596 597static inline void 598comment_line_end (size_t chars_to_remove) 599{ 600 buflen -= chars_to_remove; 601 while (buflen >= 1 602 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) 603 --buflen; 604 if (chars_to_remove == 0 && buflen >= bufmax) 605 { 606 bufmax = 2 * bufmax + 10; 607 buffer = xrealloc (buffer, bufmax); 608 } 609 buffer[buflen] = '\0'; 610 savable_comment_add (buffer); 611} 612 613 614/* These are for tracking whether comments count as immediately before 615 keyword. */ 616static int last_comment_line; 617static int last_non_comment_line; 618 619 620/* ========================= Accumulating messages ========================= */ 621 622 623static message_list_ty *mlp; 624 625 626/* ========================== Reading of objects. ========================= */ 627 628 629/* We are only interested in symbols (e.g. gettext or ngettext) and strings. 630 Other objects need not to be represented precisely. */ 631enum object_type 632{ 633 t_symbol, /* symbol */ 634 t_string, /* string */ 635 t_other, /* other kind of real object */ 636 t_dot, /* '.' pseudo object */ 637 t_close, /* ')' pseudo object */ 638 t_eof /* EOF marker */ 639}; 640 641struct object 642{ 643 enum object_type type; 644 struct token *token; /* for t_symbol and t_string */ 645 int line_number_at_start; /* for t_string */ 646}; 647 648/* Free the memory pointed to by a 'struct object'. */ 649static inline void 650free_object (struct object *op) 651{ 652 if (op->type == t_symbol || op->type == t_string) 653 { 654 free_token (op->token); 655 free (op->token); 656 } 657} 658 659/* Convert a t_symbol/t_string token to a char*. */ 660static char * 661string_of_object (const struct object *op) 662{ 663 char *str; 664 int n; 665 666 if (!(op->type == t_symbol || op->type == t_string)) 667 abort (); 668 n = op->token->charcount; 669 str = (char *) xmalloc (n + 1); 670 memcpy (str, op->token->chars, n); 671 str[n] = '\0'; 672 return str; 673} 674 675/* Context lookup table. */ 676static flag_context_list_table_ty *flag_context_list_table; 677 678/* Read the next object. */ 679static void 680read_object (struct object *op, flag_context_ty outer_context) 681{ 682 for (;;) 683 { 684 int c = do_getc (); 685 686 switch (c) 687 { 688 case EOF: 689 op->type = t_eof; 690 return; 691 692 case ' ': case '\r': case '\f': case '\t': 693 continue; 694 695 case '\n': 696 /* Comments assumed to be grouped with a message must immediately 697 precede it, with no non-whitespace token on a line between 698 both. */ 699 if (last_non_comment_line > last_comment_line) 700 savable_comment_reset (); 701 continue; 702 703 case ';': 704 { 705 bool all_semicolons = true; 706 707 last_comment_line = line_number; 708 comment_start (); 709 for (;;) 710 { 711 c = do_getc (); 712 if (c == EOF || c == '\n') 713 break; 714 if (c != ';') 715 all_semicolons = false; 716 if (!all_semicolons) 717 { 718 /* We skip all leading white space, but not EOLs. */ 719 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 720 comment_add (c); 721 } 722 } 723 comment_line_end (0); 724 continue; 725 } 726 727 case '(': 728 { 729 int arg = 0; /* Current argument number. */ 730 flag_context_list_iterator_ty context_iter; 731 const struct callshapes *shapes = NULL; 732 struct arglist_parser *argparser = NULL; 733 734 for (;; arg++) 735 { 736 struct object inner; 737 flag_context_ty inner_context; 738 739 if (arg == 0) 740 inner_context = null_context; 741 else 742 inner_context = 743 inherited_context (outer_context, 744 flag_context_list_iterator_advance ( 745 &context_iter)); 746 747 read_object (&inner, inner_context); 748 749 /* Recognize end of list. */ 750 if (inner.type == t_close) 751 { 752 op->type = t_other; 753 last_non_comment_line = line_number; 754 if (argparser != NULL) 755 arglist_parser_done (argparser, arg); 756 return; 757 } 758 759 /* Dots are not allowed in every position. 760 But be tolerant. */ 761 762 /* EOF inside list is illegal. 763 But be tolerant. */ 764 if (inner.type == t_eof) 765 break; 766 767 if (arg == 0) 768 { 769 /* This is the function position. */ 770 if (inner.type == t_symbol) 771 { 772 char *symbol_name = string_of_object (&inner); 773 void *keyword_value; 774 775 if (hash_find_entry (&keywords, 776 symbol_name, strlen (symbol_name), 777 &keyword_value) 778 == 0) 779 shapes = (const struct callshapes *) keyword_value; 780 781 argparser = arglist_parser_alloc (mlp, shapes); 782 783 context_iter = 784 flag_context_list_iterator ( 785 flag_context_list_table_lookup ( 786 flag_context_list_table, 787 symbol_name, strlen (symbol_name))); 788 789 free (symbol_name); 790 } 791 else 792 context_iter = null_context_list_iterator; 793 } 794 else 795 { 796 /* These are the argument positions. */ 797 if (argparser != NULL && inner.type == t_string) 798 arglist_parser_remember (argparser, arg, 799 string_of_object (&inner), 800 inner_context, 801 logical_file_name, 802 inner.line_number_at_start, 803 savable_comment); 804 } 805 806 free_object (&inner); 807 } 808 if (argparser != NULL) 809 arglist_parser_done (argparser, arg); 810 } 811 op->type = t_other; 812 last_non_comment_line = line_number; 813 return; 814 815 case ')': 816 /* Tell the caller about the end of list. 817 Unmatched closing parenthesis is illegal. 818 But be tolerant. */ 819 op->type = t_close; 820 last_non_comment_line = line_number; 821 return; 822 823 case ',': 824 { 825 int c = do_getc (); 826 /* The ,@ handling inside lists is wrong anyway, because 827 ,@form expands to an unknown number of elements. */ 828 if (c != EOF && c != '@') 829 do_ungetc (c); 830 } 831 /*FALLTHROUGH*/ 832 case '\'': 833 case '`': 834 { 835 struct object inner; 836 837 read_object (&inner, null_context); 838 839 /* Dots and EOF are not allowed here. But be tolerant. */ 840 841 free_object (&inner); 842 843 op->type = t_other; 844 last_non_comment_line = line_number; 845 return; 846 } 847 848 case '#': 849 /* Dispatch macro handling. */ 850 { 851 c = do_getc (); 852 if (c == EOF) 853 /* Invalid input. Be tolerant, no error message. */ 854 { 855 op->type = t_other; 856 return; 857 } 858 859 switch (c) 860 { 861 case '(': /* Vector */ 862 do_ungetc (c); 863 { 864 struct object inner; 865 read_object (&inner, null_context); 866 /* Dots and EOF are not allowed here. 867 But be tolerant. */ 868 free_object (&inner); 869 op->type = t_other; 870 last_non_comment_line = line_number; 871 return; 872 } 873 874 case 'T': case 't': /* Boolean true */ 875 case 'F': case 'f': /* Boolean false */ 876 op->type = t_other; 877 last_non_comment_line = line_number; 878 return; 879 880 case 'B': case 'b': 881 case 'O': case 'o': 882 case 'D': case 'd': 883 case 'X': case 'x': 884 case 'E': case 'e': 885 case 'I': case 'i': 886 { 887 struct token token; 888 do_ungetc (c); 889 read_token (&token, '#'); 890 if (is_number (&token)) 891 { 892 /* A number. */ 893 free_token (&token); 894 op->type = t_other; 895 last_non_comment_line = line_number; 896 return; 897 } 898 else 899 { 900 if (token.charcount == 2 901 && (token.chars[1] == 'e' || token.chars[1] == 'i')) 902 { 903 c = do_getc (); 904 if (c != EOF) 905 do_ungetc (c); 906 if (c == '(') 907 /* Homogenous vector syntax, see arrays.scm. */ 908 case 'a': /* Vectors of char */ 909 case 'c': /* Vectors of complex */ 910 /*case 'e':*/ /* Vectors of long */ 911 case 'h': /* Vectors of short */ 912 /*case 'i':*/ /* Vectors of double-float */ 913 case 'l': /* Vectors of long long */ 914 case 's': /* Vectors of single-float */ 915 case 'u': /* Vectors of unsigned long */ 916 case 'y': /* Vectors of byte */ 917 { 918 struct object inner; 919 read_object (&inner, null_context); 920 /* Dots and EOF are not allowed here. 921 But be tolerant. */ 922 free_token (&token); 923 free_object (&inner); 924 op->type = t_other; 925 last_non_comment_line = line_number; 926 return; 927 } 928 } 929 /* Unknown # object. But be tolerant. */ 930 free_token (&token); 931 op->type = t_other; 932 last_non_comment_line = line_number; 933 return; 934 } 935 } 936 937 case '!': 938 /* Block comment '#! ... \n!#\n'. We don't extract it 939 because it's only used to introduce scripts on Unix. */ 940 { 941 int last1 = 0; 942 int last2 = 0; 943 int last3 = 0; 944 945 for (;;) 946 { 947 c = do_getc (); 948 if (c == EOF) 949 /* EOF is not allowed here. But be tolerant. */ 950 break; 951 if (last3 == '\n' && last2 == '!' && last1 == '#' 952 && c == '\n') 953 break; 954 last3 = last2; 955 last2 = last1; 956 last1 = c; 957 } 958 continue; 959 } 960 961 case '*': 962 /* Bit vector. */ 963 { 964 struct token token; 965 read_token (&token, c); 966 /* The token should consists only of '0' and '1', except 967 for the initial '*'. But be tolerant. */ 968 free_token (&token); 969 op->type = t_other; 970 last_non_comment_line = line_number; 971 return; 972 } 973 974 case '{': 975 /* Symbol with multiple escapes: #{...}# */ 976 { 977 op->token = (struct token *) xmalloc (sizeof (struct token)); 978 979 init_token (op->token); 980 981 for (;;) 982 { 983 c = do_getc (); 984 985 if (c == EOF) 986 break; 987 if (c == '\\') 988 { 989 c = do_getc (); 990 if (c == EOF) 991 break; 992 } 993 else if (c == '}') 994 { 995 c = do_getc (); 996 if (c == '#') 997 break; 998 if (c != EOF) 999 do_ungetc (c); 1000 c = '}'; 1001 } 1002 grow_token (op->token); 1003 op->token->chars[op->token->charcount++] = c; 1004 } 1005 1006 op->type = t_symbol; 1007 last_non_comment_line = line_number; 1008 return; 1009 } 1010 1011 case '\\': 1012 /* Character. */ 1013 { 1014 struct token token; 1015 c = do_getc (); 1016 if (c != EOF) 1017 { 1018 read_token (&token, c); 1019 free_token (&token); 1020 } 1021 op->type = t_other; 1022 last_non_comment_line = line_number; 1023 return; 1024 } 1025 1026 case ':': /* Keyword. */ 1027 case '&': /* Deprecated keyword, installed in optargs.scm. */ 1028 { 1029 struct token token; 1030 read_token (&token, '-'); 1031 free_token (&token); 1032 op->type = t_other; 1033 last_non_comment_line = line_number; 1034 return; 1035 } 1036 1037 /* The following are installed through read-hash-extend. */ 1038 1039 /* arrays.scm */ 1040 case '0': case '1': case '2': case '3': case '4': 1041 case '5': case '6': case '7': case '8': case '9': 1042 /* Multidimensional array syntax: #nx(...) where 1043 n ::= DIGIT+ 1044 x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'} 1045 */ 1046 do 1047 c = do_getc (); 1048 while (c >= '0' && c <= '9'); 1049 /* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}. 1050 But be tolerant. */ 1051 /*FALLTHROUGH*/ 1052 case '\'': /* boot-9.scm */ 1053 case '.': /* boot-9.scm */ 1054 case ',': /* srfi-10.scm */ 1055 { 1056 struct object inner; 1057 read_object (&inner, null_context); 1058 /* Dots and EOF are not allowed here. 1059 But be tolerant. */ 1060 free_object (&inner); 1061 op->type = t_other; 1062 last_non_comment_line = line_number; 1063 return; 1064 } 1065 1066 default: 1067 /* Unknown. */ 1068 op->type = t_other; 1069 last_non_comment_line = line_number; 1070 return; 1071 } 1072 /*NOTREACHED*/ 1073 abort (); 1074 } 1075 1076 case '"': 1077 { 1078 op->token = (struct token *) xmalloc (sizeof (struct token)); 1079 init_token (op->token); 1080 op->line_number_at_start = line_number; 1081 for (;;) 1082 { 1083 int c = do_getc (); 1084 if (c == EOF) 1085 /* Invalid input. Be tolerant, no error message. */ 1086 break; 1087 if (c == '"') 1088 break; 1089 if (c == '\\') 1090 { 1091 c = do_getc (); 1092 if (c == EOF) 1093 /* Invalid input. Be tolerant, no error message. */ 1094 break; 1095 switch (c) 1096 { 1097 case '\n': 1098 continue; 1099 case '0': 1100 c = '\0'; 1101 break; 1102 case 'a': 1103 c = '\a'; 1104 break; 1105 case 'f': 1106 c = '\f'; 1107 break; 1108 case 'n': 1109 c = '\n'; 1110 break; 1111 case 'r': 1112 c = '\r'; 1113 break; 1114 case 't': 1115 c = '\t'; 1116 break; 1117 case 'v': 1118 c = '\v'; 1119 break; 1120 default: 1121 break; 1122 } 1123 } 1124 grow_token (op->token); 1125 op->token->chars[op->token->charcount++] = c; 1126 } 1127 op->type = t_string; 1128 1129 if (extract_all) 1130 { 1131 lex_pos_ty pos; 1132 1133 pos.file_name = logical_file_name; 1134 pos.line_number = op->line_number_at_start; 1135 remember_a_message (mlp, NULL, string_of_object (op), 1136 null_context, &pos, savable_comment); 1137 } 1138 last_non_comment_line = line_number; 1139 return; 1140 } 1141 1142 case '0': case '1': case '2': case '3': case '4': 1143 case '5': case '6': case '7': case '8': case '9': 1144 case '+': case '-': case '.': 1145 /* Read a number or symbol token. */ 1146 op->token = (struct token *) xmalloc (sizeof (struct token)); 1147 read_token (op->token, c); 1148 if (op->token->charcount == 1 && op->token->chars[0] == '.') 1149 { 1150 free_token (op->token); 1151 free (op->token); 1152 op->type = t_dot; 1153 } 1154 else if (is_number (op->token)) 1155 { 1156 /* A number. */ 1157 free_token (op->token); 1158 free (op->token); 1159 op->type = t_other; 1160 } 1161 else 1162 { 1163 /* A symbol. */ 1164 op->type = t_symbol; 1165 } 1166 last_non_comment_line = line_number; 1167 return; 1168 1169 case ':': 1170 default: 1171 /* Read a symbol token. */ 1172 op->token = (struct token *) xmalloc (sizeof (struct token)); 1173 read_token (op->token, c); 1174 op->type = t_symbol; 1175 last_non_comment_line = line_number; 1176 return; 1177 } 1178 } 1179} 1180 1181 1182void 1183extract_scheme (FILE *f, 1184 const char *real_filename, const char *logical_filename, 1185 flag_context_list_table_ty *flag_table, 1186 msgdomain_list_ty *mdlp) 1187{ 1188 mlp = mdlp->item[0]->messages; 1189 1190 fp = f; 1191 real_file_name = real_filename; 1192 logical_file_name = xstrdup (logical_filename); 1193 line_number = 1; 1194 1195 last_comment_line = -1; 1196 last_non_comment_line = -1; 1197 1198 flag_context_list_table = flag_table; 1199 1200 init_keywords (); 1201 1202 /* Eat tokens until eof is seen. When read_object returns 1203 due to an unbalanced closing parenthesis, just restart it. */ 1204 do 1205 { 1206 struct object toplevel_object; 1207 1208 read_object (&toplevel_object, null_context); 1209 1210 if (toplevel_object.type == t_eof) 1211 break; 1212 1213 free_object (&toplevel_object); 1214 } 1215 while (!feof (fp)); 1216 1217 /* Close scanner. */ 1218 fp = NULL; 1219 real_file_name = NULL; 1220 logical_file_name = NULL; 1221 line_number = 0; 1222} 1223