1/* xgettext Emacs Lisp backend. 2 Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 20#ifdef HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24#include <errno.h> 25#include <stdbool.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include "message.h" 31#include "xgettext.h" 32#include "x-elisp.h" 33#include "error.h" 34#include "xalloc.h" 35#include "exit.h" 36#include "hash.h" 37#include "c-ctype.h" 38#include "gettext.h" 39 40#define _(s) gettext(s) 41 42 43/* Summary of Emacs Lisp syntax: 44 - ';' starts a comment until end of line. 45 - '#@nn' starts a comment of nn bytes. 46 - Integers are constituted of an optional prefix (#b, #B for binary, 47 #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix), 48 an optional sign (+ or -), the digits, and an optional trailing dot. 49 - Characters are written as '?' followed by the character, possibly 50 with an escape sequence, for examples '?a', '?\n', '?\177'. 51 - Strings are delimited by double quotes. Backslash introduces an escape 52 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a', 53 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal). 54 - Symbols: can contain meta-characters if preceded by backslash. 55 - Uninterned symbols: written as #:SYMBOL. 56 - () delimit lists. 57 - [] delimit vectors. 58 The reader is implemented in emacs-21.1/src/lread.c. */ 59 60 61/* ====================== Keyword set customization. ====================== */ 62 63/* If true extract all strings. */ 64static bool extract_all = false; 65 66static hash_table keywords; 67static bool default_keywords = true; 68 69 70void 71x_elisp_extract_all () 72{ 73 extract_all = true; 74} 75 76 77void 78x_elisp_keyword (const char *name) 79{ 80 if (name == NULL) 81 default_keywords = false; 82 else 83 { 84 const char *end; 85 struct callshape shape; 86 const char *colon; 87 88 if (keywords.table == NULL) 89 hash_init (&keywords, 100); 90 91 split_keywordspec (name, &end, &shape); 92 93 /* The characters between name and end should form a valid Lisp 94 symbol. */ 95 colon = strchr (name, ':'); 96 if (colon == NULL || colon >= end) 97 insert_keyword_callshape (&keywords, name, end - name, &shape); 98 } 99} 100 101/* Finish initializing the keywords hash table. 102 Called after argument processing, before each file is processed. */ 103static void 104init_keywords () 105{ 106 if (default_keywords) 107 { 108 /* When adding new keywords here, also update the documentation in 109 xgettext.texi! */ 110 x_elisp_keyword ("_"); 111 default_keywords = false; 112 } 113} 114 115void 116init_flag_table_elisp () 117{ 118 xgettext_record_flag ("_:1:pass-elisp-format"); 119 xgettext_record_flag ("format:1:elisp-format"); 120} 121 122 123/* ======================== Reading of characters. ======================== */ 124 125/* Real filename, used in error messages about the input file. */ 126static const char *real_file_name; 127 128/* Logical filename and line number, used to label the extracted messages. */ 129static char *logical_file_name; 130static int line_number; 131 132/* The input file stream. */ 133static FILE *fp; 134 135 136/* Fetch the next character from the input file. */ 137static int 138do_getc () 139{ 140 int c = getc (fp); 141 142 if (c == EOF) 143 { 144 if (ferror (fp)) 145 error (EXIT_FAILURE, errno, _("\ 146error while reading \"%s\""), real_file_name); 147 } 148 else if (c == '\n') 149 line_number++; 150 151 return c; 152} 153 154/* Put back the last fetched character, not EOF. */ 155static void 156do_ungetc (int c) 157{ 158 if (c == '\n') 159 line_number--; 160 ungetc (c, fp); 161} 162 163 164/* ========================== Reading of tokens. ========================== */ 165 166 167/* A token consists of a sequence of characters. */ 168struct token 169{ 170 int allocated; /* number of allocated 'token_char's */ 171 int charcount; /* number of used 'token_char's */ 172 char *chars; /* the token's constituents */ 173}; 174 175/* Initialize a 'struct token'. */ 176static inline void 177init_token (struct token *tp) 178{ 179 tp->allocated = 10; 180 tp->chars = (char *) xmalloc (tp->allocated * sizeof (char)); 181 tp->charcount = 0; 182} 183 184/* Free the memory pointed to by a 'struct token'. */ 185static inline void 186free_token (struct token *tp) 187{ 188 free (tp->chars); 189} 190 191/* Ensure there is enough room in the token for one more character. */ 192static inline void 193grow_token (struct token *tp) 194{ 195 if (tp->charcount == tp->allocated) 196 { 197 tp->allocated *= 2; 198 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); 199 } 200} 201 202/* Test whether a token has integer syntax. */ 203static inline bool 204is_integer (const char *p) 205{ 206 /* NB: Yes, '+.' and '-.' both designate the integer 0. */ 207 const char *p_start = p; 208 209 if (*p == '+' || *p == '-') 210 p++; 211 if (*p == '\0') 212 return false; 213 while (*p >= '0' && *p <= '9') 214 p++; 215 if (p > p_start && *p == '.') 216 p++; 217 return (*p == '\0'); 218} 219 220/* Test whether a token has float syntax. */ 221static inline bool 222is_float (const char *p) 223{ 224 enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 }; 225 int state; 226 227 state = 0; 228 if (*p == '+' || *p == '-') 229 p++; 230 if (*p >= '0' && *p <= '9') 231 { 232 state |= LEAD_INT; 233 do 234 p++; 235 while (*p >= '0' && *p <= '9'); 236 } 237 if (*p == '.') 238 { 239 state |= DOT_CHAR; 240 p++; 241 } 242 if (*p >= '0' && *p <= '9') 243 { 244 state |= TRAIL_INT; 245 do 246 p++; 247 while (*p >= '0' && *p <= '9'); 248 } 249 if (*p == 'e' || *p == 'E') 250 { 251 state |= E_CHAR; 252 p++; 253 if (*p == '+' || *p == '-') 254 p++; 255 if (*p >= '0' && *p <= '9') 256 { 257 state |= EXP_INT; 258 do 259 p++; 260 while (*p >= '0' && *p <= '9'); 261 } 262 else if (p[-1] == '+' 263 && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F') 264 || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N'))) 265 { 266 state |= EXP_INT; 267 p += 3; 268 } 269 } 270 return (*p == '\0') 271 && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT) 272 || state == (DOT_CHAR | TRAIL_INT) 273 || state == (LEAD_INT | E_CHAR | EXP_INT) 274 || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT) 275 || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)); 276} 277 278/* Read the next token. 'first' is the first character, which has already 279 been read. Returns true for a symbol, false for a number. */ 280static bool 281read_token (struct token *tp, int first) 282{ 283 int c; 284 bool quoted = false; 285 286 init_token (tp); 287 288 c = first; 289 290 for (;; c = do_getc ()) 291 { 292 if (c == EOF) 293 break; 294 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ 295 break; 296 if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')' 297 || c == '[' || c == ']' || c == '#') 298 break; 299 if (c == '\\') 300 { 301 quoted = true; 302 c = do_getc (); 303 if (c == EOF) 304 /* Invalid, but be tolerant. */ 305 break; 306 } 307 grow_token (tp); 308 tp->chars[tp->charcount++] = c; 309 } 310 if (c != EOF) 311 do_ungetc (c); 312 313 if (quoted) 314 return true; /* symbol */ 315 316 /* Add a NUL byte at the end, for is_integer and is_float. */ 317 grow_token (tp); 318 tp->chars[tp->charcount] = '\0'; 319 320 if (is_integer (tp->chars) || is_float (tp->chars)) 321 return false; /* number */ 322 else 323 return true; /* symbol */ 324} 325 326 327/* ========================= Accumulating comments ========================= */ 328 329 330static char *buffer; 331static size_t bufmax; 332static size_t buflen; 333 334static inline void 335comment_start () 336{ 337 buflen = 0; 338} 339 340static inline void 341comment_add (int c) 342{ 343 if (buflen >= bufmax) 344 { 345 bufmax = 2 * bufmax + 10; 346 buffer = xrealloc (buffer, bufmax); 347 } 348 buffer[buflen++] = c; 349} 350 351static inline void 352comment_line_end (size_t chars_to_remove) 353{ 354 buflen -= chars_to_remove; 355 while (buflen >= 1 356 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) 357 --buflen; 358 if (chars_to_remove == 0 && buflen >= bufmax) 359 { 360 bufmax = 2 * bufmax + 10; 361 buffer = xrealloc (buffer, bufmax); 362 } 363 buffer[buflen] = '\0'; 364 savable_comment_add (buffer); 365} 366 367 368/* These are for tracking whether comments count as immediately before 369 keyword. */ 370static int last_comment_line; 371static int last_non_comment_line; 372 373 374/* ========================= Accumulating messages ========================= */ 375 376 377static message_list_ty *mlp; 378 379 380/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */ 381 382 383/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings. 384 Other objects need not to be represented precisely. */ 385enum object_type 386{ 387 t_symbol, /* symbol */ 388 t_string, /* string */ 389 t_other, /* other kind of real object */ 390 t_dot, /* '.' pseudo object */ 391 t_listclose, /* ')' pseudo object */ 392 t_vectorclose,/* ']' pseudo object */ 393 t_eof /* EOF marker */ 394}; 395 396struct object 397{ 398 enum object_type type; 399 struct token *token; /* for t_symbol and t_string */ 400 int line_number_at_start; /* for t_string */ 401}; 402 403/* Free the memory pointed to by a 'struct object'. */ 404static inline void 405free_object (struct object *op) 406{ 407 if (op->type == t_symbol || op->type == t_string) 408 { 409 free_token (op->token); 410 free (op->token); 411 } 412} 413 414/* Convert a t_symbol/t_string token to a char*. */ 415static char * 416string_of_object (const struct object *op) 417{ 418 char *str; 419 int n; 420 421 if (!(op->type == t_symbol || op->type == t_string)) 422 abort (); 423 n = op->token->charcount; 424 str = (char *) xmalloc (n + 1); 425 memcpy (str, op->token->chars, n); 426 str[n] = '\0'; 427 return str; 428} 429 430/* Context lookup table. */ 431static flag_context_list_table_ty *flag_context_list_table; 432 433/* Returns the character represented by an escape sequence. */ 434#define IGNORABLE_ESCAPE (EOF - 1) 435static int 436do_getc_escaped (int c, bool in_string) 437{ 438 switch (c) 439 { 440 case 'a': 441 return '\a'; 442 case 'b': 443 return '\b'; 444 case 'd': 445 return 0x7F; 446 case 'e': 447 return 0x1B; 448 case 'f': 449 return '\f'; 450 case 'n': 451 return '\n'; 452 case 'r': 453 return '\r'; 454 case 't': 455 return '\t'; 456 case 'v': 457 return '\v'; 458 459 case '\n': 460 return IGNORABLE_ESCAPE; 461 462 case ' ': 463 return (in_string ? IGNORABLE_ESCAPE : ' '); 464 465 case 'M': /* meta */ 466 c = do_getc (); 467 if (c == EOF) 468 return EOF; 469 if (c != '-') 470 /* Invalid input. But be tolerant. */ 471 return c; 472 c = do_getc (); 473 if (c == EOF) 474 return EOF; 475 if (c == '\\') 476 { 477 c = do_getc (); 478 if (c == EOF) 479 return EOF; 480 c = do_getc_escaped (c, false); 481 } 482 return c | 0x80; 483 484 case 'S': /* shift */ 485 c = do_getc (); 486 if (c == EOF) 487 return EOF; 488 if (c != '-') 489 /* Invalid input. But be tolerant. */ 490 return c; 491 c = do_getc (); 492 if (c == EOF) 493 return EOF; 494 if (c == '\\') 495 { 496 c = do_getc (); 497 if (c == EOF) 498 return EOF; 499 c = do_getc_escaped (c, false); 500 } 501 return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); 502 503 case 'H': /* hyper */ 504 case 'A': /* alt */ 505 case 's': /* super */ 506 c = do_getc (); 507 if (c == EOF) 508 return EOF; 509 if (c != '-') 510 /* Invalid input. But be tolerant. */ 511 return c; 512 c = do_getc (); 513 if (c == EOF) 514 return EOF; 515 if (c == '\\') 516 { 517 c = do_getc (); 518 if (c == EOF) 519 return EOF; 520 c = do_getc_escaped (c, false); 521 } 522 return c; 523 524 case 'C': /* ctrl */ 525 c = do_getc (); 526 if (c == EOF) 527 return EOF; 528 if (c != '-') 529 /* Invalid input. But be tolerant. */ 530 return c; 531 /*FALLTHROUGH*/ 532 case '^': 533 c = do_getc (); 534 if (c == EOF) 535 return EOF; 536 if (c == '\\') 537 { 538 c = do_getc (); 539 if (c == EOF) 540 return EOF; 541 c = do_getc_escaped (c, false); 542 } 543 if (c == '?') 544 return 0x7F; 545 if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A) 546 return c & 0x9F; 547 if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F) 548 return c & 0x9F; 549#if 0 /* We cannot handle NUL bytes in strings. */ 550 if (c == ' ') 551 return 0x00; 552#endif 553 return c; 554 555 case '0': case '1': case '2': case '3': case '4': 556 case '5': case '6': case '7': 557 /* An octal escape, as in ANSI C. */ 558 { 559 int n = c - '0'; 560 561 c = do_getc (); 562 if (c != EOF) 563 { 564 if (c >= '0' && c <= '7') 565 { 566 n = (n << 3) + (c - '0'); 567 c = do_getc (); 568 if (c != EOF) 569 { 570 if (c >= '0' && c <= '7') 571 n = (n << 3) + (c - '0'); 572 else 573 do_ungetc (c); 574 } 575 } 576 else 577 do_ungetc (c); 578 } 579 return (unsigned char) n; 580 } 581 582 case 'x': 583 /* A hexadecimal escape, as in ANSI C. */ 584 { 585 int n = 0; 586 587 for (;;) 588 { 589 c = do_getc (); 590 if (c == EOF) 591 break; 592 else if (c >= '0' && c <= '9') 593 n = (n << 4) + (c - '0'); 594 else if (c >= 'A' && c <= 'F') 595 n = (n << 4) + (c - 'A' + 10); 596 else if (c >= 'a' && c <= 'f') 597 n = (n << 4) + (c - 'a' + 10); 598 else 599 { 600 do_ungetc (c); 601 break; 602 } 603 } 604 return (unsigned char) n; 605 } 606 607 default: 608 /* Ignore Emacs multibyte character stuff. All the strings we are 609 interested in are ASCII strings. */ 610 return c; 611 } 612} 613 614/* Read the next object. 615 'first_in_list' and 'new_backquote_flag' are used for reading old 616 backquote syntax and new backquote syntax. */ 617static void 618read_object (struct object *op, bool first_in_list, bool new_backquote_flag, 619 flag_context_ty outer_context) 620{ 621 for (;;) 622 { 623 int c; 624 625 c = do_getc (); 626 627 switch (c) 628 { 629 case EOF: 630 op->type = t_eof; 631 return; 632 633 case '\n': 634 /* Comments assumed to be grouped with a message must immediately 635 precede it, with no non-whitespace token on a line between 636 both. */ 637 if (last_non_comment_line > last_comment_line) 638 savable_comment_reset (); 639 continue; 640 641 case '(': 642 { 643 int arg = 0; /* Current argument number. */ 644 flag_context_list_iterator_ty context_iter; 645 const struct callshapes *shapes = NULL; 646 struct arglist_parser *argparser = NULL; 647 648 for (;; arg++) 649 { 650 struct object inner; 651 flag_context_ty inner_context; 652 653 if (arg == 0) 654 inner_context = null_context; 655 else 656 inner_context = 657 inherited_context (outer_context, 658 flag_context_list_iterator_advance ( 659 &context_iter)); 660 661 read_object (&inner, arg == 0, new_backquote_flag, 662 inner_context); 663 664 /* Recognize end of list. */ 665 if (inner.type == t_listclose) 666 { 667 op->type = t_other; 668 /* Don't bother converting "()" to "NIL". */ 669 last_non_comment_line = line_number; 670 if (argparser != NULL) 671 arglist_parser_done (argparser, arg); 672 return; 673 } 674 675 /* Dots are not allowed in every position. ']' is not allowed. 676 But be tolerant. */ 677 678 /* EOF inside list is illegal. But be tolerant. */ 679 if (inner.type == t_eof) 680 break; 681 682 if (arg == 0) 683 { 684 /* This is the function position. */ 685 if (inner.type == t_symbol) 686 { 687 char *symbol_name = string_of_object (&inner); 688 void *keyword_value; 689 690 if (hash_find_entry (&keywords, 691 symbol_name, strlen (symbol_name), 692 &keyword_value) 693 == 0) 694 shapes = (const struct callshapes *) keyword_value; 695 696 argparser = arglist_parser_alloc (mlp, shapes); 697 698 context_iter = 699 flag_context_list_iterator ( 700 flag_context_list_table_lookup ( 701 flag_context_list_table, 702 symbol_name, strlen (symbol_name))); 703 704 free (symbol_name); 705 } 706 else 707 context_iter = null_context_list_iterator; 708 } 709 else 710 { 711 /* These are the argument positions. */ 712 if (argparser != NULL && inner.type == t_string) 713 arglist_parser_remember (argparser, arg, 714 string_of_object (&inner), 715 inner_context, 716 logical_file_name, 717 inner.line_number_at_start, 718 savable_comment); 719 } 720 721 free_object (&inner); 722 } 723 724 if (argparser != NULL) 725 arglist_parser_done (argparser, arg); 726 } 727 op->type = t_other; 728 last_non_comment_line = line_number; 729 return; 730 731 case ')': 732 /* Tell the caller about the end of list. 733 Unmatched closing parenthesis is illegal. But be tolerant. */ 734 op->type = t_listclose; 735 last_non_comment_line = line_number; 736 return; 737 738 case '[': 739 { 740 for (;;) 741 { 742 struct object inner; 743 744 read_object (&inner, false, new_backquote_flag, null_context); 745 746 /* Recognize end of vector. */ 747 if (inner.type == t_vectorclose) 748 { 749 op->type = t_other; 750 last_non_comment_line = line_number; 751 return; 752 } 753 754 /* Dots and ')' are not allowed. But be tolerant. */ 755 756 /* EOF inside vector is illegal. But be tolerant. */ 757 if (inner.type == t_eof) 758 break; 759 760 free_object (&inner); 761 } 762 } 763 op->type = t_other; 764 last_non_comment_line = line_number; 765 return; 766 767 case ']': 768 /* Tell the caller about the end of vector. 769 Unmatched closing bracket is illegal. But be tolerant. */ 770 op->type = t_vectorclose; 771 last_non_comment_line = line_number; 772 return; 773 774 case '\'': 775 { 776 struct object inner; 777 778 read_object (&inner, false, new_backquote_flag, null_context); 779 780 /* Dots and EOF are not allowed here. But be tolerant. */ 781 782 free_object (&inner); 783 784 op->type = t_other; 785 last_non_comment_line = line_number; 786 return; 787 } 788 789 case '`': 790 if (first_in_list) 791 goto default_label; 792 { 793 struct object inner; 794 795 read_object (&inner, false, true, null_context); 796 797 /* Dots and EOF are not allowed here. But be tolerant. */ 798 799 free_object (&inner); 800 801 op->type = t_other; 802 last_non_comment_line = line_number; 803 return; 804 } 805 806 case ',': 807 if (!new_backquote_flag) 808 goto default_label; 809 { 810 int c = do_getc (); 811 /* The ,@ handling inside lists is wrong anyway, because 812 ,@form expands to an unknown number of elements. */ 813 if (c != EOF && c != '@' && c != '.') 814 do_ungetc (c); 815 } 816 { 817 struct object inner; 818 819 read_object (&inner, false, false, null_context); 820 821 /* Dots and EOF are not allowed here. But be tolerant. */ 822 823 free_object (&inner); 824 825 op->type = t_other; 826 last_non_comment_line = line_number; 827 return; 828 } 829 830 case ';': 831 { 832 bool all_semicolons = true; 833 834 last_comment_line = line_number; 835 comment_start (); 836 for (;;) 837 { 838 int c = do_getc (); 839 if (c == EOF || c == '\n') 840 break; 841 if (c != ';') 842 all_semicolons = false; 843 if (!all_semicolons) 844 { 845 /* We skip all leading white space, but not EOLs. */ 846 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 847 comment_add (c); 848 } 849 } 850 comment_line_end (0); 851 continue; 852 } 853 854 case '"': 855 { 856 op->token = (struct token *) xmalloc (sizeof (struct token)); 857 init_token (op->token); 858 op->line_number_at_start = line_number; 859 for (;;) 860 { 861 int c = do_getc (); 862 if (c == EOF) 863 /* Invalid input. Be tolerant, no error message. */ 864 break; 865 if (c == '"') 866 break; 867 if (c == '\\') 868 { 869 c = do_getc (); 870 if (c == EOF) 871 /* Invalid input. Be tolerant, no error message. */ 872 break; 873 c = do_getc_escaped (c, true); 874 if (c == EOF) 875 /* Invalid input. Be tolerant, no error message. */ 876 break; 877 if (c == IGNORABLE_ESCAPE) 878 /* Ignore escaped newline and escaped space. */ 879 ; 880 else 881 { 882 grow_token (op->token); 883 op->token->chars[op->token->charcount++] = c; 884 } 885 } 886 else 887 { 888 grow_token (op->token); 889 op->token->chars[op->token->charcount++] = c; 890 } 891 } 892 op->type = t_string; 893 894 if (extract_all) 895 { 896 lex_pos_ty pos; 897 898 pos.file_name = logical_file_name; 899 pos.line_number = op->line_number_at_start; 900 remember_a_message (mlp, NULL, string_of_object (op), 901 null_context, &pos, savable_comment); 902 } 903 last_non_comment_line = line_number; 904 return; 905 } 906 907 case '?': 908 c = do_getc (); 909 if (c == EOF) 910 /* Invalid input. Be tolerant, no error message. */ 911 ; 912 else if (c == '\\') 913 { 914 c = do_getc (); 915 if (c == EOF) 916 /* Invalid input. Be tolerant, no error message. */ 917 ; 918 else 919 { 920 c = do_getc_escaped (c, false); 921 if (c == EOF) 922 /* Invalid input. Be tolerant, no error message. */ 923 ; 924 } 925 } 926 /* Impossible to deal with Emacs multibyte character stuff here. */ 927 op->type = t_other; 928 last_non_comment_line = line_number; 929 return; 930 931 case '#': 932 /* Dispatch macro handling. */ 933 c = do_getc (); 934 if (c == EOF) 935 /* Invalid input. Be tolerant, no error message. */ 936 { 937 op->type = t_other; 938 return; 939 } 940 941 switch (c) 942 { 943 case '^': 944 c = do_getc (); 945 if (c == '^') 946 c = do_getc (); 947 if (c == '[') 948 { 949 /* Read a char table, same syntax as a vector. */ 950 for (;;) 951 { 952 struct object inner; 953 954 read_object (&inner, false, new_backquote_flag, 955 null_context); 956 957 /* Recognize end of vector. */ 958 if (inner.type == t_vectorclose) 959 { 960 op->type = t_other; 961 last_non_comment_line = line_number; 962 return; 963 } 964 965 /* Dots and ')' are not allowed. But be tolerant. */ 966 967 /* EOF inside vector is illegal. But be tolerant. */ 968 if (inner.type == t_eof) 969 break; 970 971 free_object (&inner); 972 } 973 op->type = t_other; 974 last_non_comment_line = line_number; 975 return; 976 } 977 else 978 /* Invalid input. Be tolerant, no error message. */ 979 { 980 op->type = t_other; 981 if (c != EOF) 982 last_non_comment_line = line_number; 983 return; 984 } 985 986 case '&': 987 /* Read a bit vector. */ 988 { 989 struct object length; 990 read_object (&length, first_in_list, new_backquote_flag, 991 null_context); 992 /* Dots and EOF are not allowed here. 993 But be tolerant. */ 994 free_object (&length); 995 } 996 c = do_getc (); 997 if (c == '"') 998 { 999 struct object string; 1000 read_object (&string, first_in_list, new_backquote_flag, 1001 null_context); 1002 free_object (&string); 1003 } 1004 else 1005 /* Invalid input. Be tolerant, no error message. */ 1006 do_ungetc (c); 1007 op->type = t_other; 1008 last_non_comment_line = line_number; 1009 return; 1010 1011 case '[': 1012 /* Read a compiled function, same syntax as a vector. */ 1013 case '(': 1014 /* Read a string with properties, same syntax as a list. */ 1015 { 1016 struct object inner; 1017 do_ungetc (c); 1018 read_object (&inner, false, new_backquote_flag, null_context); 1019 /* Dots and EOF are not allowed here. 1020 But be tolerant. */ 1021 free_object (&inner); 1022 op->type = t_other; 1023 last_non_comment_line = line_number; 1024 return; 1025 } 1026 1027 case '@': 1028 /* Read a comment consisting of a given number of bytes. */ 1029 { 1030 unsigned int nskip = 0; 1031 1032 for (;;) 1033 { 1034 c = do_getc (); 1035 if (!(c >= '0' && c <= '9')) 1036 break; 1037 nskip = 10 * nskip + (c - '0'); 1038 } 1039 if (c != EOF) 1040 { 1041 do_ungetc (c); 1042 for (; nskip > 0; nskip--) 1043 if (do_getc () == EOF) 1044 break; 1045 } 1046 continue; 1047 } 1048 1049 case '$': 1050 op->type = t_other; 1051 last_non_comment_line = line_number; 1052 return; 1053 1054 case '\'': 1055 case ':': 1056 case 'S': case 's': /* XEmacs only */ 1057 { 1058 struct object inner; 1059 read_object (&inner, false, new_backquote_flag, null_context); 1060 /* Dots and EOF are not allowed here. 1061 But be tolerant. */ 1062 free_object (&inner); 1063 op->type = t_other; 1064 last_non_comment_line = line_number; 1065 return; 1066 } 1067 1068 case '0': case '1': case '2': case '3': case '4': 1069 case '5': case '6': case '7': case '8': case '9': 1070 /* Read Common Lisp style #n# or #n=. */ 1071 for (;;) 1072 { 1073 c = do_getc (); 1074 if (!(c >= '0' && c <= '9')) 1075 break; 1076 } 1077 if (c == EOF) 1078 /* Invalid input. Be tolerant, no error message. */ 1079 { 1080 op->type = t_other; 1081 return; 1082 } 1083 if (c == '=') 1084 { 1085 read_object (op, false, new_backquote_flag, outer_context); 1086 last_non_comment_line = line_number; 1087 return; 1088 } 1089 if (c == '#') 1090 { 1091 op->type = t_other; 1092 last_non_comment_line = line_number; 1093 return; 1094 } 1095 if (c == 'R' || c == 'r') 1096 { 1097 /* Read an integer. */ 1098 c = do_getc (); 1099 if (c == '+' || c == '-') 1100 c = do_getc (); 1101 for (; c != EOF; c = do_getc ()) 1102 if (!c_isalnum (c)) 1103 { 1104 do_ungetc (c); 1105 break; 1106 } 1107 op->type = t_other; 1108 last_non_comment_line = line_number; 1109 return; 1110 } 1111 /* Invalid input. Be tolerant, no error message. */ 1112 op->type = t_other; 1113 last_non_comment_line = line_number; 1114 return; 1115 1116 case 'X': case 'x': 1117 case 'O': case 'o': 1118 case 'B': case 'b': 1119 { 1120 /* Read an integer. */ 1121 c = do_getc (); 1122 if (c == '+' || c == '-') 1123 c = do_getc (); 1124 for (; c != EOF; c = do_getc ()) 1125 if (!c_isalnum (c)) 1126 { 1127 do_ungetc (c); 1128 break; 1129 } 1130 op->type = t_other; 1131 last_non_comment_line = line_number; 1132 return; 1133 } 1134 1135 case '*': /* XEmacs only */ 1136 { 1137 /* Read a bit-vector. */ 1138 do 1139 c = do_getc (); 1140 while (c == '0' || c == '1'); 1141 if (c != EOF) 1142 do_ungetc (c); 1143 op->type = t_other; 1144 last_non_comment_line = line_number; 1145 return; 1146 } 1147 1148 case '+': /* XEmacs only */ 1149 case '-': /* XEmacs only */ 1150 /* Simply assume every feature expression is true. */ 1151 { 1152 struct object inner; 1153 read_object (&inner, false, new_backquote_flag, null_context); 1154 /* Dots and EOF are not allowed here. 1155 But be tolerant. */ 1156 free_object (&inner); 1157 continue; 1158 } 1159 1160 default: 1161 /* Invalid input. Be tolerant, no error message. */ 1162 op->type = t_other; 1163 last_non_comment_line = line_number; 1164 return; 1165 } 1166 1167 /*NOTREACHED*/ 1168 abort (); 1169 1170 case '.': 1171 c = do_getc (); 1172 if (c != EOF) 1173 { 1174 do_ungetc (c); 1175 if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */ 1176 || strchr ("\"'`,(", c) != NULL) 1177 { 1178 op->type = t_dot; 1179 last_non_comment_line = line_number; 1180 return; 1181 } 1182 } 1183 c = '.'; 1184 /*FALLTHROUGH*/ 1185 default: 1186 default_label: 1187 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ 1188 continue; 1189 /* Read a token. */ 1190 { 1191 bool symbol; 1192 1193 op->token = (struct token *) xmalloc (sizeof (struct token)); 1194 symbol = read_token (op->token, c); 1195 if (symbol) 1196 { 1197 op->type = t_symbol; 1198 last_non_comment_line = line_number; 1199 return; 1200 } 1201 else 1202 { 1203 free_token (op->token); 1204 free (op->token); 1205 op->type = t_other; 1206 last_non_comment_line = line_number; 1207 return; 1208 } 1209 } 1210 } 1211 } 1212} 1213 1214 1215void 1216extract_elisp (FILE *f, 1217 const char *real_filename, const char *logical_filename, 1218 flag_context_list_table_ty *flag_table, 1219 msgdomain_list_ty *mdlp) 1220{ 1221 mlp = mdlp->item[0]->messages; 1222 1223 fp = f; 1224 real_file_name = real_filename; 1225 logical_file_name = xstrdup (logical_filename); 1226 line_number = 1; 1227 1228 last_comment_line = -1; 1229 last_non_comment_line = -1; 1230 1231 flag_context_list_table = flag_table; 1232 1233 init_keywords (); 1234 1235 /* Eat tokens until eof is seen. When read_object returns 1236 due to an unbalanced closing parenthesis, just restart it. */ 1237 do 1238 { 1239 struct object toplevel_object; 1240 1241 read_object (&toplevel_object, false, false, null_context); 1242 1243 if (toplevel_object.type == t_eof) 1244 break; 1245 1246 free_object (&toplevel_object); 1247 } 1248 while (!feof (fp)); 1249 1250 /* Close scanner. */ 1251 fp = NULL; 1252 real_file_name = NULL; 1253 logical_file_name = NULL; 1254 line_number = 0; 1255} 1256