1/* xgettext Emacs Lisp backend. 2 Copyright (C) 2001-2003, 2005-2007 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19#ifdef HAVE_CONFIG_H 20# include "config.h" 21#endif 22 23/* Specification. */ 24#include "x-elisp.h" 25 26#include <errno.h> 27#include <stdbool.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31 32#include "message.h" 33#include "xgettext.h" 34#include "x-elisp.h" 35#include "error.h" 36#include "xalloc.h" 37#include "hash.h" 38#include "c-ctype.h" 39#include "gettext.h" 40 41#define _(s) gettext(s) 42 43 44/* Summary of Emacs Lisp syntax: 45 - ';' starts a comment until end of line. 46 - '#@nn' starts a comment of nn bytes. 47 - Integers are constituted of an optional prefix (#b, #B for binary, 48 #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix), 49 an optional sign (+ or -), the digits, and an optional trailing dot. 50 - Characters are written as '?' followed by the character, possibly 51 with an escape sequence, for examples '?a', '?\n', '?\177'. 52 - Strings are delimited by double quotes. Backslash introduces an escape 53 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a', 54 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal). 55 - Symbols: can contain meta-characters if preceded by backslash. 56 - Uninterned symbols: written as #:SYMBOL. 57 - () delimit lists. 58 - [] delimit vectors. 59 The reader is implemented in emacs-21.1/src/lread.c. */ 60 61 62/* ====================== Keyword set customization. ====================== */ 63 64/* If true extract all strings. */ 65static bool extract_all = false; 66 67static hash_table keywords; 68static bool default_keywords = true; 69 70 71void 72x_elisp_extract_all () 73{ 74 extract_all = true; 75} 76 77 78void 79x_elisp_keyword (const char *name) 80{ 81 if (name == NULL) 82 default_keywords = false; 83 else 84 { 85 const char *end; 86 struct callshape shape; 87 const char *colon; 88 89 if (keywords.table == NULL) 90 hash_init (&keywords, 100); 91 92 split_keywordspec (name, &end, &shape); 93 94 /* The characters between name and end should form a valid Lisp 95 symbol. */ 96 colon = strchr (name, ':'); 97 if (colon == NULL || colon >= end) 98 insert_keyword_callshape (&keywords, name, end - name, &shape); 99 } 100} 101 102/* Finish initializing the keywords hash table. 103 Called after argument processing, before each file is processed. */ 104static void 105init_keywords () 106{ 107 if (default_keywords) 108 { 109 /* When adding new keywords here, also update the documentation in 110 xgettext.texi! */ 111 x_elisp_keyword ("_"); 112 default_keywords = false; 113 } 114} 115 116void 117init_flag_table_elisp () 118{ 119 xgettext_record_flag ("_:1:pass-elisp-format"); 120 xgettext_record_flag ("format:1:elisp-format"); 121} 122 123 124/* ======================== Reading of characters. ======================== */ 125 126/* Real filename, used in error messages about the input file. */ 127static const char *real_file_name; 128 129/* Logical filename and line number, used to label the extracted messages. */ 130static char *logical_file_name; 131static int line_number; 132 133/* The input file stream. */ 134static FILE *fp; 135 136 137/* Fetch the next character from the input file. */ 138static int 139do_getc () 140{ 141 int c = getc (fp); 142 143 if (c == EOF) 144 { 145 if (ferror (fp)) 146 error (EXIT_FAILURE, errno, _("\ 147error while reading \"%s\""), real_file_name); 148 } 149 else if (c == '\n') 150 line_number++; 151 152 return c; 153} 154 155/* Put back the last fetched character, not EOF. */ 156static void 157do_ungetc (int c) 158{ 159 if (c == '\n') 160 line_number--; 161 ungetc (c, fp); 162} 163 164 165/* ========================== Reading of tokens. ========================== */ 166 167 168/* A token consists of a sequence of characters. */ 169struct token 170{ 171 int allocated; /* number of allocated 'token_char's */ 172 int charcount; /* number of used 'token_char's */ 173 char *chars; /* the token's constituents */ 174}; 175 176/* Initialize a 'struct token'. */ 177static inline void 178init_token (struct token *tp) 179{ 180 tp->allocated = 10; 181 tp->chars = XNMALLOC (tp->allocated, char); 182 tp->charcount = 0; 183} 184 185/* Free the memory pointed to by a 'struct token'. */ 186static inline void 187free_token (struct token *tp) 188{ 189 free (tp->chars); 190} 191 192/* Ensure there is enough room in the token for one more character. */ 193static inline void 194grow_token (struct token *tp) 195{ 196 if (tp->charcount == tp->allocated) 197 { 198 tp->allocated *= 2; 199 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); 200 } 201} 202 203/* Test whether a token has integer syntax. */ 204static inline bool 205is_integer (const char *p) 206{ 207 /* NB: Yes, '+.' and '-.' both designate the integer 0. */ 208 const char *p_start = p; 209 210 if (*p == '+' || *p == '-') 211 p++; 212 if (*p == '\0') 213 return false; 214 while (*p >= '0' && *p <= '9') 215 p++; 216 if (p > p_start && *p == '.') 217 p++; 218 return (*p == '\0'); 219} 220 221/* Test whether a token has float syntax. */ 222static inline bool 223is_float (const char *p) 224{ 225 enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 }; 226 int state; 227 228 state = 0; 229 if (*p == '+' || *p == '-') 230 p++; 231 if (*p >= '0' && *p <= '9') 232 { 233 state |= LEAD_INT; 234 do 235 p++; 236 while (*p >= '0' && *p <= '9'); 237 } 238 if (*p == '.') 239 { 240 state |= DOT_CHAR; 241 p++; 242 } 243 if (*p >= '0' && *p <= '9') 244 { 245 state |= TRAIL_INT; 246 do 247 p++; 248 while (*p >= '0' && *p <= '9'); 249 } 250 if (*p == 'e' || *p == 'E') 251 { 252 state |= E_CHAR; 253 p++; 254 if (*p == '+' || *p == '-') 255 p++; 256 if (*p >= '0' && *p <= '9') 257 { 258 state |= EXP_INT; 259 do 260 p++; 261 while (*p >= '0' && *p <= '9'); 262 } 263 else if (p[-1] == '+' 264 && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F') 265 || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N'))) 266 { 267 state |= EXP_INT; 268 p += 3; 269 } 270 } 271 return (*p == '\0') 272 && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT) 273 || state == (DOT_CHAR | TRAIL_INT) 274 || state == (LEAD_INT | E_CHAR | EXP_INT) 275 || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT) 276 || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)); 277} 278 279/* Read the next token. 'first' is the first character, which has already 280 been read. Returns true for a symbol, false for a number. */ 281static bool 282read_token (struct token *tp, int first) 283{ 284 int c; 285 bool quoted = false; 286 287 init_token (tp); 288 289 c = first; 290 291 for (;; c = do_getc ()) 292 { 293 if (c == EOF) 294 break; 295 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ 296 break; 297 if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')' 298 || c == '[' || c == ']' || c == '#') 299 break; 300 if (c == '\\') 301 { 302 quoted = true; 303 c = do_getc (); 304 if (c == EOF) 305 /* Invalid, but be tolerant. */ 306 break; 307 } 308 grow_token (tp); 309 tp->chars[tp->charcount++] = c; 310 } 311 if (c != EOF) 312 do_ungetc (c); 313 314 if (quoted) 315 return true; /* symbol */ 316 317 /* Add a NUL byte at the end, for is_integer and is_float. */ 318 grow_token (tp); 319 tp->chars[tp->charcount] = '\0'; 320 321 if (is_integer (tp->chars) || is_float (tp->chars)) 322 return false; /* number */ 323 else 324 return true; /* symbol */ 325} 326 327 328/* ========================= Accumulating comments ========================= */ 329 330 331static char *buffer; 332static size_t bufmax; 333static size_t buflen; 334 335static inline void 336comment_start () 337{ 338 buflen = 0; 339} 340 341static inline void 342comment_add (int c) 343{ 344 if (buflen >= bufmax) 345 { 346 bufmax = 2 * bufmax + 10; 347 buffer = xrealloc (buffer, bufmax); 348 } 349 buffer[buflen++] = c; 350} 351 352static inline void 353comment_line_end (size_t chars_to_remove) 354{ 355 buflen -= chars_to_remove; 356 while (buflen >= 1 357 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) 358 --buflen; 359 if (chars_to_remove == 0 && buflen >= bufmax) 360 { 361 bufmax = 2 * bufmax + 10; 362 buffer = xrealloc (buffer, bufmax); 363 } 364 buffer[buflen] = '\0'; 365 savable_comment_add (buffer); 366} 367 368 369/* These are for tracking whether comments count as immediately before 370 keyword. */ 371static int last_comment_line; 372static int last_non_comment_line; 373 374 375/* ========================= Accumulating messages ========================= */ 376 377 378static message_list_ty *mlp; 379 380 381/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */ 382 383 384/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings. 385 Other objects need not to be represented precisely. */ 386enum object_type 387{ 388 t_symbol, /* symbol */ 389 t_string, /* string */ 390 t_other, /* other kind of real object */ 391 t_dot, /* '.' pseudo object */ 392 t_listclose, /* ')' pseudo object */ 393 t_vectorclose,/* ']' pseudo object */ 394 t_eof /* EOF marker */ 395}; 396 397struct object 398{ 399 enum object_type type; 400 struct token *token; /* for t_symbol and t_string */ 401 int line_number_at_start; /* for t_string */ 402}; 403 404/* Free the memory pointed to by a 'struct object'. */ 405static inline void 406free_object (struct object *op) 407{ 408 if (op->type == t_symbol || op->type == t_string) 409 { 410 free_token (op->token); 411 free (op->token); 412 } 413} 414 415/* Convert a t_symbol/t_string token to a char*. */ 416static char * 417string_of_object (const struct object *op) 418{ 419 char *str; 420 int n; 421 422 if (!(op->type == t_symbol || op->type == t_string)) 423 abort (); 424 n = op->token->charcount; 425 str = XNMALLOC (n + 1, char); 426 memcpy (str, op->token->chars, n); 427 str[n] = '\0'; 428 return str; 429} 430 431/* Context lookup table. */ 432static flag_context_list_table_ty *flag_context_list_table; 433 434/* Returns the character represented by an escape sequence. */ 435#define IGNORABLE_ESCAPE (EOF - 1) 436static int 437do_getc_escaped (int c, bool in_string) 438{ 439 switch (c) 440 { 441 case 'a': 442 return '\a'; 443 case 'b': 444 return '\b'; 445 case 'd': 446 return 0x7F; 447 case 'e': 448 return 0x1B; 449 case 'f': 450 return '\f'; 451 case 'n': 452 return '\n'; 453 case 'r': 454 return '\r'; 455 case 't': 456 return '\t'; 457 case 'v': 458 return '\v'; 459 460 case '\n': 461 return IGNORABLE_ESCAPE; 462 463 case ' ': 464 return (in_string ? IGNORABLE_ESCAPE : ' '); 465 466 case 'M': /* meta */ 467 c = do_getc (); 468 if (c == EOF) 469 return EOF; 470 if (c != '-') 471 /* Invalid input. But be tolerant. */ 472 return c; 473 c = do_getc (); 474 if (c == EOF) 475 return EOF; 476 if (c == '\\') 477 { 478 c = do_getc (); 479 if (c == EOF) 480 return EOF; 481 c = do_getc_escaped (c, false); 482 } 483 return c | 0x80; 484 485 case 'S': /* shift */ 486 c = do_getc (); 487 if (c == EOF) 488 return EOF; 489 if (c != '-') 490 /* Invalid input. But be tolerant. */ 491 return c; 492 c = do_getc (); 493 if (c == EOF) 494 return EOF; 495 if (c == '\\') 496 { 497 c = do_getc (); 498 if (c == EOF) 499 return EOF; 500 c = do_getc_escaped (c, false); 501 } 502 return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); 503 504 case 'H': /* hyper */ 505 case 'A': /* alt */ 506 case 's': /* super */ 507 c = do_getc (); 508 if (c == EOF) 509 return EOF; 510 if (c != '-') 511 /* Invalid input. But be tolerant. */ 512 return c; 513 c = do_getc (); 514 if (c == EOF) 515 return EOF; 516 if (c == '\\') 517 { 518 c = do_getc (); 519 if (c == EOF) 520 return EOF; 521 c = do_getc_escaped (c, false); 522 } 523 return c; 524 525 case 'C': /* ctrl */ 526 c = do_getc (); 527 if (c == EOF) 528 return EOF; 529 if (c != '-') 530 /* Invalid input. But be tolerant. */ 531 return c; 532 /*FALLTHROUGH*/ 533 case '^': 534 c = do_getc (); 535 if (c == EOF) 536 return EOF; 537 if (c == '\\') 538 { 539 c = do_getc (); 540 if (c == EOF) 541 return EOF; 542 c = do_getc_escaped (c, false); 543 } 544 if (c == '?') 545 return 0x7F; 546 if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A) 547 return c & 0x9F; 548 if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F) 549 return c & 0x9F; 550#if 0 /* We cannot handle NUL bytes in strings. */ 551 if (c == ' ') 552 return 0x00; 553#endif 554 return c; 555 556 case '0': case '1': case '2': case '3': case '4': 557 case '5': case '6': case '7': 558 /* An octal escape, as in ANSI C. */ 559 { 560 int n = c - '0'; 561 562 c = do_getc (); 563 if (c != EOF) 564 { 565 if (c >= '0' && c <= '7') 566 { 567 n = (n << 3) + (c - '0'); 568 c = do_getc (); 569 if (c != EOF) 570 { 571 if (c >= '0' && c <= '7') 572 n = (n << 3) + (c - '0'); 573 else 574 do_ungetc (c); 575 } 576 } 577 else 578 do_ungetc (c); 579 } 580 return (unsigned char) n; 581 } 582 583 case 'x': 584 /* A hexadecimal escape, as in ANSI C. */ 585 { 586 int n = 0; 587 588 for (;;) 589 { 590 c = do_getc (); 591 if (c == EOF) 592 break; 593 else if (c >= '0' && c <= '9') 594 n = (n << 4) + (c - '0'); 595 else if (c >= 'A' && c <= 'F') 596 n = (n << 4) + (c - 'A' + 10); 597 else if (c >= 'a' && c <= 'f') 598 n = (n << 4) + (c - 'a' + 10); 599 else 600 { 601 do_ungetc (c); 602 break; 603 } 604 } 605 return (unsigned char) n; 606 } 607 608 default: 609 /* Ignore Emacs multibyte character stuff. All the strings we are 610 interested in are ASCII strings. */ 611 return c; 612 } 613} 614 615/* Read the next object. 616 'first_in_list' and 'new_backquote_flag' are used for reading old 617 backquote syntax and new backquote syntax. */ 618static void 619read_object (struct object *op, bool first_in_list, bool new_backquote_flag, 620 flag_context_ty outer_context) 621{ 622 for (;;) 623 { 624 int c; 625 626 c = do_getc (); 627 628 switch (c) 629 { 630 case EOF: 631 op->type = t_eof; 632 return; 633 634 case '\n': 635 /* Comments assumed to be grouped with a message must immediately 636 precede it, with no non-whitespace token on a line between 637 both. */ 638 if (last_non_comment_line > last_comment_line) 639 savable_comment_reset (); 640 continue; 641 642 case '(': 643 { 644 int arg = 0; /* Current argument number. */ 645 flag_context_list_iterator_ty context_iter; 646 const struct callshapes *shapes = NULL; 647 struct arglist_parser *argparser = NULL; 648 649 for (;; arg++) 650 { 651 struct object inner; 652 flag_context_ty inner_context; 653 654 if (arg == 0) 655 inner_context = null_context; 656 else 657 inner_context = 658 inherited_context (outer_context, 659 flag_context_list_iterator_advance ( 660 &context_iter)); 661 662 read_object (&inner, arg == 0, new_backquote_flag, 663 inner_context); 664 665 /* Recognize end of list. */ 666 if (inner.type == t_listclose) 667 { 668 op->type = t_other; 669 /* Don't bother converting "()" to "NIL". */ 670 last_non_comment_line = line_number; 671 if (argparser != NULL) 672 arglist_parser_done (argparser, arg); 673 return; 674 } 675 676 /* Dots are not allowed in every position. ']' is not allowed. 677 But be tolerant. */ 678 679 /* EOF inside list is illegal. But be tolerant. */ 680 if (inner.type == t_eof) 681 break; 682 683 if (arg == 0) 684 { 685 /* This is the function position. */ 686 if (inner.type == t_symbol) 687 { 688 char *symbol_name = string_of_object (&inner); 689 void *keyword_value; 690 691 if (hash_find_entry (&keywords, 692 symbol_name, strlen (symbol_name), 693 &keyword_value) 694 == 0) 695 shapes = (const struct callshapes *) keyword_value; 696 697 argparser = arglist_parser_alloc (mlp, shapes); 698 699 context_iter = 700 flag_context_list_iterator ( 701 flag_context_list_table_lookup ( 702 flag_context_list_table, 703 symbol_name, strlen (symbol_name))); 704 705 free (symbol_name); 706 } 707 else 708 context_iter = null_context_list_iterator; 709 } 710 else 711 { 712 /* These are the argument positions. */ 713 if (argparser != NULL && inner.type == t_string) 714 arglist_parser_remember (argparser, arg, 715 string_of_object (&inner), 716 inner_context, 717 logical_file_name, 718 inner.line_number_at_start, 719 savable_comment); 720 } 721 722 free_object (&inner); 723 } 724 725 if (argparser != NULL) 726 arglist_parser_done (argparser, arg); 727 } 728 op->type = t_other; 729 last_non_comment_line = line_number; 730 return; 731 732 case ')': 733 /* Tell the caller about the end of list. 734 Unmatched closing parenthesis is illegal. But be tolerant. */ 735 op->type = t_listclose; 736 last_non_comment_line = line_number; 737 return; 738 739 case '[': 740 { 741 for (;;) 742 { 743 struct object inner; 744 745 read_object (&inner, false, new_backquote_flag, null_context); 746 747 /* Recognize end of vector. */ 748 if (inner.type == t_vectorclose) 749 { 750 op->type = t_other; 751 last_non_comment_line = line_number; 752 return; 753 } 754 755 /* Dots and ')' are not allowed. But be tolerant. */ 756 757 /* EOF inside vector is illegal. But be tolerant. */ 758 if (inner.type == t_eof) 759 break; 760 761 free_object (&inner); 762 } 763 } 764 op->type = t_other; 765 last_non_comment_line = line_number; 766 return; 767 768 case ']': 769 /* Tell the caller about the end of vector. 770 Unmatched closing bracket is illegal. But be tolerant. */ 771 op->type = t_vectorclose; 772 last_non_comment_line = line_number; 773 return; 774 775 case '\'': 776 { 777 struct object inner; 778 779 read_object (&inner, false, new_backquote_flag, null_context); 780 781 /* Dots and EOF are not allowed here. But be tolerant. */ 782 783 free_object (&inner); 784 785 op->type = t_other; 786 last_non_comment_line = line_number; 787 return; 788 } 789 790 case '`': 791 if (first_in_list) 792 goto default_label; 793 { 794 struct object inner; 795 796 read_object (&inner, false, true, null_context); 797 798 /* Dots and EOF are not allowed here. But be tolerant. */ 799 800 free_object (&inner); 801 802 op->type = t_other; 803 last_non_comment_line = line_number; 804 return; 805 } 806 807 case ',': 808 if (!new_backquote_flag) 809 goto default_label; 810 { 811 int c = do_getc (); 812 /* The ,@ handling inside lists is wrong anyway, because 813 ,@form expands to an unknown number of elements. */ 814 if (c != EOF && c != '@' && c != '.') 815 do_ungetc (c); 816 } 817 { 818 struct object inner; 819 820 read_object (&inner, false, false, null_context); 821 822 /* Dots and EOF are not allowed here. But be tolerant. */ 823 824 free_object (&inner); 825 826 op->type = t_other; 827 last_non_comment_line = line_number; 828 return; 829 } 830 831 case ';': 832 { 833 bool all_semicolons = true; 834 835 last_comment_line = line_number; 836 comment_start (); 837 for (;;) 838 { 839 int c = do_getc (); 840 if (c == EOF || c == '\n') 841 break; 842 if (c != ';') 843 all_semicolons = false; 844 if (!all_semicolons) 845 { 846 /* We skip all leading white space, but not EOLs. */ 847 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 848 comment_add (c); 849 } 850 } 851 comment_line_end (0); 852 continue; 853 } 854 855 case '"': 856 { 857 op->token = XMALLOC (struct token); 858 init_token (op->token); 859 op->line_number_at_start = line_number; 860 for (;;) 861 { 862 int c = do_getc (); 863 if (c == EOF) 864 /* Invalid input. Be tolerant, no error message. */ 865 break; 866 if (c == '"') 867 break; 868 if (c == '\\') 869 { 870 c = do_getc (); 871 if (c == EOF) 872 /* Invalid input. Be tolerant, no error message. */ 873 break; 874 c = do_getc_escaped (c, true); 875 if (c == EOF) 876 /* Invalid input. Be tolerant, no error message. */ 877 break; 878 if (c == IGNORABLE_ESCAPE) 879 /* Ignore escaped newline and escaped space. */ 880 ; 881 else 882 { 883 grow_token (op->token); 884 op->token->chars[op->token->charcount++] = c; 885 } 886 } 887 else 888 { 889 grow_token (op->token); 890 op->token->chars[op->token->charcount++] = c; 891 } 892 } 893 op->type = t_string; 894 895 if (extract_all) 896 { 897 lex_pos_ty pos; 898 899 pos.file_name = logical_file_name; 900 pos.line_number = op->line_number_at_start; 901 remember_a_message (mlp, NULL, string_of_object (op), 902 null_context, &pos, savable_comment); 903 } 904 last_non_comment_line = line_number; 905 return; 906 } 907 908 case '?': 909 c = do_getc (); 910 if (c == EOF) 911 /* Invalid input. Be tolerant, no error message. */ 912 ; 913 else if (c == '\\') 914 { 915 c = do_getc (); 916 if (c == EOF) 917 /* Invalid input. Be tolerant, no error message. */ 918 ; 919 else 920 { 921 c = do_getc_escaped (c, false); 922 if (c == EOF) 923 /* Invalid input. Be tolerant, no error message. */ 924 ; 925 } 926 } 927 /* Impossible to deal with Emacs multibyte character stuff here. */ 928 op->type = t_other; 929 last_non_comment_line = line_number; 930 return; 931 932 case '#': 933 /* Dispatch macro handling. */ 934 c = do_getc (); 935 if (c == EOF) 936 /* Invalid input. Be tolerant, no error message. */ 937 { 938 op->type = t_other; 939 return; 940 } 941 942 switch (c) 943 { 944 case '^': 945 c = do_getc (); 946 if (c == '^') 947 c = do_getc (); 948 if (c == '[') 949 { 950 /* Read a char table, same syntax as a vector. */ 951 for (;;) 952 { 953 struct object inner; 954 955 read_object (&inner, false, new_backquote_flag, 956 null_context); 957 958 /* Recognize end of vector. */ 959 if (inner.type == t_vectorclose) 960 { 961 op->type = t_other; 962 last_non_comment_line = line_number; 963 return; 964 } 965 966 /* Dots and ')' are not allowed. But be tolerant. */ 967 968 /* EOF inside vector is illegal. But be tolerant. */ 969 if (inner.type == t_eof) 970 break; 971 972 free_object (&inner); 973 } 974 op->type = t_other; 975 last_non_comment_line = line_number; 976 return; 977 } 978 else 979 /* Invalid input. Be tolerant, no error message. */ 980 { 981 op->type = t_other; 982 if (c != EOF) 983 last_non_comment_line = line_number; 984 return; 985 } 986 987 case '&': 988 /* Read a bit vector. */ 989 { 990 struct object length; 991 read_object (&length, first_in_list, new_backquote_flag, 992 null_context); 993 /* Dots and EOF are not allowed here. 994 But be tolerant. */ 995 free_object (&length); 996 } 997 c = do_getc (); 998 if (c == '"') 999 { 1000 struct object string; 1001 read_object (&string, first_in_list, new_backquote_flag, 1002 null_context); 1003 free_object (&string); 1004 } 1005 else 1006 /* Invalid input. Be tolerant, no error message. */ 1007 do_ungetc (c); 1008 op->type = t_other; 1009 last_non_comment_line = line_number; 1010 return; 1011 1012 case '[': 1013 /* Read a compiled function, same syntax as a vector. */ 1014 case '(': 1015 /* Read a string with properties, same syntax as a list. */ 1016 { 1017 struct object inner; 1018 do_ungetc (c); 1019 read_object (&inner, false, new_backquote_flag, null_context); 1020 /* Dots and EOF are not allowed here. 1021 But be tolerant. */ 1022 free_object (&inner); 1023 op->type = t_other; 1024 last_non_comment_line = line_number; 1025 return; 1026 } 1027 1028 case '@': 1029 /* Read a comment consisting of a given number of bytes. */ 1030 { 1031 unsigned int nskip = 0; 1032 1033 for (;;) 1034 { 1035 c = do_getc (); 1036 if (!(c >= '0' && c <= '9')) 1037 break; 1038 nskip = 10 * nskip + (c - '0'); 1039 } 1040 if (c != EOF) 1041 { 1042 do_ungetc (c); 1043 for (; nskip > 0; nskip--) 1044 if (do_getc () == EOF) 1045 break; 1046 } 1047 continue; 1048 } 1049 1050 case '$': 1051 op->type = t_other; 1052 last_non_comment_line = line_number; 1053 return; 1054 1055 case '\'': 1056 case ':': 1057 case 'S': case 's': /* XEmacs only */ 1058 { 1059 struct object inner; 1060 read_object (&inner, false, new_backquote_flag, null_context); 1061 /* Dots and EOF are not allowed here. 1062 But be tolerant. */ 1063 free_object (&inner); 1064 op->type = t_other; 1065 last_non_comment_line = line_number; 1066 return; 1067 } 1068 1069 case '0': case '1': case '2': case '3': case '4': 1070 case '5': case '6': case '7': case '8': case '9': 1071 /* Read Common Lisp style #n# or #n=. */ 1072 for (;;) 1073 { 1074 c = do_getc (); 1075 if (!(c >= '0' && c <= '9')) 1076 break; 1077 } 1078 if (c == EOF) 1079 /* Invalid input. Be tolerant, no error message. */ 1080 { 1081 op->type = t_other; 1082 return; 1083 } 1084 if (c == '=') 1085 { 1086 read_object (op, false, new_backquote_flag, outer_context); 1087 last_non_comment_line = line_number; 1088 return; 1089 } 1090 if (c == '#') 1091 { 1092 op->type = t_other; 1093 last_non_comment_line = line_number; 1094 return; 1095 } 1096 if (c == 'R' || c == 'r') 1097 { 1098 /* Read an integer. */ 1099 c = do_getc (); 1100 if (c == '+' || c == '-') 1101 c = do_getc (); 1102 for (; c != EOF; c = do_getc ()) 1103 if (!c_isalnum (c)) 1104 { 1105 do_ungetc (c); 1106 break; 1107 } 1108 op->type = t_other; 1109 last_non_comment_line = line_number; 1110 return; 1111 } 1112 /* Invalid input. Be tolerant, no error message. */ 1113 op->type = t_other; 1114 last_non_comment_line = line_number; 1115 return; 1116 1117 case 'X': case 'x': 1118 case 'O': case 'o': 1119 case 'B': case 'b': 1120 { 1121 /* Read an integer. */ 1122 c = do_getc (); 1123 if (c == '+' || c == '-') 1124 c = do_getc (); 1125 for (; c != EOF; c = do_getc ()) 1126 if (!c_isalnum (c)) 1127 { 1128 do_ungetc (c); 1129 break; 1130 } 1131 op->type = t_other; 1132 last_non_comment_line = line_number; 1133 return; 1134 } 1135 1136 case '*': /* XEmacs only */ 1137 { 1138 /* Read a bit-vector. */ 1139 do 1140 c = do_getc (); 1141 while (c == '0' || c == '1'); 1142 if (c != EOF) 1143 do_ungetc (c); 1144 op->type = t_other; 1145 last_non_comment_line = line_number; 1146 return; 1147 } 1148 1149 case '+': /* XEmacs only */ 1150 case '-': /* XEmacs only */ 1151 /* Simply assume every feature expression is true. */ 1152 { 1153 struct object inner; 1154 read_object (&inner, false, new_backquote_flag, null_context); 1155 /* Dots and EOF are not allowed here. 1156 But be tolerant. */ 1157 free_object (&inner); 1158 continue; 1159 } 1160 1161 default: 1162 /* Invalid input. Be tolerant, no error message. */ 1163 op->type = t_other; 1164 last_non_comment_line = line_number; 1165 return; 1166 } 1167 1168 /*NOTREACHED*/ 1169 abort (); 1170 1171 case '.': 1172 c = do_getc (); 1173 if (c != EOF) 1174 { 1175 do_ungetc (c); 1176 if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */ 1177 || strchr ("\"'`,(", c) != NULL) 1178 { 1179 op->type = t_dot; 1180 last_non_comment_line = line_number; 1181 return; 1182 } 1183 } 1184 c = '.'; 1185 /*FALLTHROUGH*/ 1186 default: 1187 default_label: 1188 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ 1189 continue; 1190 /* Read a token. */ 1191 { 1192 bool symbol; 1193 1194 op->token = XMALLOC (struct token); 1195 symbol = read_token (op->token, c); 1196 if (symbol) 1197 { 1198 op->type = t_symbol; 1199 last_non_comment_line = line_number; 1200 return; 1201 } 1202 else 1203 { 1204 free_token (op->token); 1205 free (op->token); 1206 op->type = t_other; 1207 last_non_comment_line = line_number; 1208 return; 1209 } 1210 } 1211 } 1212 } 1213} 1214 1215 1216void 1217extract_elisp (FILE *f, 1218 const char *real_filename, const char *logical_filename, 1219 flag_context_list_table_ty *flag_table, 1220 msgdomain_list_ty *mdlp) 1221{ 1222 mlp = mdlp->item[0]->messages; 1223 1224 fp = f; 1225 real_file_name = real_filename; 1226 logical_file_name = xstrdup (logical_filename); 1227 line_number = 1; 1228 1229 last_comment_line = -1; 1230 last_non_comment_line = -1; 1231 1232 flag_context_list_table = flag_table; 1233 1234 init_keywords (); 1235 1236 /* Eat tokens until eof is seen. When read_object returns 1237 due to an unbalanced closing parenthesis, just restart it. */ 1238 do 1239 { 1240 struct object toplevel_object; 1241 1242 read_object (&toplevel_object, false, false, null_context); 1243 1244 if (toplevel_object.type == t_eof) 1245 break; 1246 1247 free_object (&toplevel_object); 1248 } 1249 while (!feof (fp)); 1250 1251 /* Close scanner. */ 1252 fp = NULL; 1253 real_file_name = NULL; 1254 logical_file_name = NULL; 1255 line_number = 0; 1256} 1257