1/* xgettext librep backend. 2 Copyright (C) 2001-2003, 2005-2007 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19#ifdef HAVE_CONFIG_H 20# include "config.h" 21#endif 22 23/* Specification. */ 24#include "x-librep.h" 25 26#include <errno.h> 27#include <stdbool.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31 32#include "c-ctype.h" 33#include "message.h" 34#include "xgettext.h" 35#include "x-librep.h" 36#include "error.h" 37#include "xalloc.h" 38#include "hash.h" 39#include "gettext.h" 40 41#define _(s) gettext(s) 42 43 44/* Summary of librep syntax: 45 - ';' starts a comment until end of line. 46 - Block comments start with '#|' and end with '|#'. 47 - Numbers are constituted of an optional prefix (#b, #B for binary, 48 #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal, 49 #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and 50 the digits. 51 - Characters are written as '?' followed by the character, possibly 52 with an escape sequence, for examples '?a', '?\n', '?\177'. 53 - Strings are delimited by double quotes. Backslash introduces an escape 54 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a', 55 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal). 56 - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' - 57 if preceded by backslash or enclosed in |...|. 58 - Keywords: written as #:SYMBOL. 59 - () delimit lists. 60 - [] delimit vectors. 61 The reader is implemented in librep-0.14/src/lisp.c. */ 62 63 64/* ====================== Keyword set customization. ====================== */ 65 66/* If true extract all strings. */ 67static bool extract_all = false; 68 69static hash_table keywords; 70static bool default_keywords = true; 71 72 73void 74x_librep_extract_all () 75{ 76 extract_all = true; 77} 78 79 80void 81x_librep_keyword (const char *name) 82{ 83 if (name == NULL) 84 default_keywords = false; 85 else 86 { 87 const char *end; 88 struct callshape shape; 89 const char *colon; 90 91 if (keywords.table == NULL) 92 hash_init (&keywords, 100); 93 94 split_keywordspec (name, &end, &shape); 95 96 /* The characters between name and end should form a valid Lisp 97 symbol. */ 98 colon = strchr (name, ':'); 99 if (colon == NULL || colon >= end) 100 insert_keyword_callshape (&keywords, name, end - name, &shape); 101 } 102} 103 104/* Finish initializing the keywords hash table. 105 Called after argument processing, before each file is processed. */ 106static void 107init_keywords () 108{ 109 if (default_keywords) 110 { 111 /* When adding new keywords here, also update the documentation in 112 xgettext.texi! */ 113 x_librep_keyword ("_"); 114 default_keywords = false; 115 } 116} 117 118void 119init_flag_table_librep () 120{ 121 xgettext_record_flag ("_:1:pass-librep-format"); 122 xgettext_record_flag ("format:2:librep-format"); 123} 124 125 126/* ======================== Reading of characters. ======================== */ 127 128/* Real filename, used in error messages about the input file. */ 129static const char *real_file_name; 130 131/* Logical filename and line number, used to label the extracted messages. */ 132static char *logical_file_name; 133static int line_number; 134 135/* The input file stream. */ 136static FILE *fp; 137 138 139/* Fetch the next character from the input file. */ 140static int 141do_getc () 142{ 143 int c = getc (fp); 144 145 if (c == EOF) 146 { 147 if (ferror (fp)) 148 error (EXIT_FAILURE, errno, _("\ 149error while reading \"%s\""), real_file_name); 150 } 151 else if (c == '\n') 152 line_number++; 153 154 return c; 155} 156 157/* Put back the last fetched character, not EOF. */ 158static void 159do_ungetc (int c) 160{ 161 if (c == '\n') 162 line_number--; 163 ungetc (c, fp); 164} 165 166 167/* ========================== Reading of tokens. ========================== */ 168 169 170/* A token consists of a sequence of characters. */ 171struct token 172{ 173 int allocated; /* number of allocated 'token_char's */ 174 int charcount; /* number of used 'token_char's */ 175 char *chars; /* the token's constituents */ 176}; 177 178/* Initialize a 'struct token'. */ 179static inline void 180init_token (struct token *tp) 181{ 182 tp->allocated = 10; 183 tp->chars = XNMALLOC (tp->allocated, char); 184 tp->charcount = 0; 185} 186 187/* Free the memory pointed to by a 'struct token'. */ 188static inline void 189free_token (struct token *tp) 190{ 191 free (tp->chars); 192} 193 194/* Ensure there is enough room in the token for one more character. */ 195static inline void 196grow_token (struct token *tp) 197{ 198 if (tp->charcount == tp->allocated) 199 { 200 tp->allocated *= 2; 201 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); 202 } 203} 204 205/* Read the next token. If 'first' is given, it points to the first 206 character, which has already been read. Returns true for a symbol, 207 false for a number. */ 208static bool 209read_token (struct token *tp, const int *first) 210{ 211 int c; 212 /* Variables for speculative number parsing: */ 213 int radix = -1; 214 int nfirst = 0; 215 bool exact = true; 216 bool rational = false; 217 bool exponent = false; 218 bool had_sign = false; 219 bool expecting_prefix = false; 220 221 init_token (tp); 222 223 if (first) 224 c = *first; 225 else 226 c = do_getc (); 227 228 for (;; c = do_getc ()) 229 { 230 switch (c) 231 { 232 case EOF: 233 goto done; 234 235 case ' ': case '\t': case '\n': case '\f': case '\r': 236 case '(': case ')': case '[': case ']': 237 case '\'': case '"': case ';': case ',': case '`': 238 goto done; 239 240 case '\\': 241 radix = 0; 242 c = do_getc (); 243 if (c == EOF) 244 /* Invalid, but be tolerant. */ 245 break; 246 grow_token (tp); 247 tp->chars[tp->charcount++] = c; 248 break; 249 250 case '|': 251 radix = 0; 252 for (;;) 253 { 254 c = do_getc (); 255 if (c == EOF || c == '|') 256 break; 257 grow_token (tp); 258 tp->chars[tp->charcount++] = c; 259 } 260 break; 261 262 default: 263 if (radix != 0) 264 { 265 if (expecting_prefix) 266 { 267 switch (c) 268 { 269 case 'B': case 'b': 270 radix = 2; 271 break; 272 case 'O': case 'o': 273 radix = 8; 274 break; 275 case 'D': case 'd': 276 radix = 10; 277 break; 278 case 'X': case 'x': 279 radix = 16; 280 break; 281 case 'E': case 'e': 282 case 'I': case 'i': 283 break; 284 default: 285 radix = 0; 286 break; 287 } 288 expecting_prefix = false; 289 nfirst = tp->charcount + 1; 290 } 291 else if (tp->charcount == nfirst 292 && (c == '+' || c == '-' || c == '#')) 293 { 294 if (c == '#') 295 { 296 if (had_sign) 297 radix = 0; 298 else 299 expecting_prefix = true; 300 } 301 else 302 had_sign = true; 303 nfirst = tp->charcount + 1; 304 } 305 else 306 { 307 switch (radix) 308 { 309 case -1: 310 if (c == '.') 311 { 312 radix = 10; 313 exact = false; 314 } 315 else if (!(c >= '0' && c <= '9')) 316 radix = 0; 317 else if (c == '0') 318 radix = 1; 319 else 320 radix = 10; 321 break; 322 323 case 1: 324 switch (c) 325 { 326 case 'X': case 'x': 327 radix = 16; 328 nfirst = tp->charcount + 1; 329 break; 330 case '0': case '1': case '2': case '3': case '4': 331 case '5': case '6': case '7': 332 radix = 8; 333 nfirst = tp->charcount; 334 break; 335 case '.': case 'E': case 'e': 336 radix = 10; 337 exact = false; 338 break; 339 case '/': 340 radix = 10; 341 rational = true; 342 break; 343 default: 344 radix = 0; 345 break; 346 } 347 break; 348 349 default: 350 switch (c) 351 { 352 case '.': 353 if (exact && radix == 10 && !rational) 354 exact = false; 355 else 356 radix = 0; 357 break; 358 case '/': 359 if (exact && !rational) 360 rational = true; 361 else 362 radix = 0; 363 break; 364 case 'E': case 'e': 365 if (radix == 10) 366 { 367 if (!rational && !exponent) 368 { 369 exponent = true; 370 exact = false; 371 } 372 else 373 radix = 0; 374 break; 375 } 376 /*FALLTHROUGH*/ 377 default: 378 if (exponent && (c == '+' || c == '-')) 379 break; 380 if ((radix <= 10 381 && !(c >= '0' && c <= '0' + radix - 1)) 382 || (radix == 16 && !c_isxdigit (c))) 383 radix = 0; 384 break; 385 } 386 break; 387 } 388 } 389 } 390 else 391 { 392 if (c == '#') 393 goto done; 394 } 395 grow_token (tp); 396 tp->chars[tp->charcount++] = c; 397 } 398 } 399 done: 400 if (c != EOF) 401 do_ungetc (c); 402 if (radix > 0 && nfirst < tp->charcount) 403 return false; /* number */ 404 else 405 return true; /* symbol */ 406} 407 408 409/* ========================= Accumulating comments ========================= */ 410 411 412static char *buffer; 413static size_t bufmax; 414static size_t buflen; 415 416static inline void 417comment_start () 418{ 419 buflen = 0; 420} 421 422static inline void 423comment_add (int c) 424{ 425 if (buflen >= bufmax) 426 { 427 bufmax = 2 * bufmax + 10; 428 buffer = xrealloc (buffer, bufmax); 429 } 430 buffer[buflen++] = c; 431} 432 433static inline void 434comment_line_end (size_t chars_to_remove) 435{ 436 buflen -= chars_to_remove; 437 while (buflen >= 1 438 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) 439 --buflen; 440 if (chars_to_remove == 0 && buflen >= bufmax) 441 { 442 bufmax = 2 * bufmax + 10; 443 buffer = xrealloc (buffer, bufmax); 444 } 445 buffer[buflen] = '\0'; 446 savable_comment_add (buffer); 447} 448 449 450/* These are for tracking whether comments count as immediately before 451 keyword. */ 452static int last_comment_line; 453static int last_non_comment_line; 454 455 456/* ========================= Accumulating messages ========================= */ 457 458 459static message_list_ty *mlp; 460 461 462/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */ 463 464 465/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings. 466 Other objects need not to be represented precisely. */ 467enum object_type 468{ 469 t_symbol, /* symbol */ 470 t_string, /* string */ 471 t_other, /* other kind of real object */ 472 t_dot, /* '.' pseudo object */ 473 t_close, /* ')' or ']' pseudo object */ 474 t_eof /* EOF marker */ 475}; 476 477struct object 478{ 479 enum object_type type; 480 struct token *token; /* for t_symbol and t_string */ 481 int line_number_at_start; /* for t_string */ 482}; 483 484/* Free the memory pointed to by a 'struct object'. */ 485static inline void 486free_object (struct object *op) 487{ 488 if (op->type == t_symbol || op->type == t_string) 489 { 490 free_token (op->token); 491 free (op->token); 492 } 493} 494 495/* Convert a t_symbol/t_string token to a char*. */ 496static char * 497string_of_object (const struct object *op) 498{ 499 char *str; 500 int n; 501 502 if (!(op->type == t_symbol || op->type == t_string)) 503 abort (); 504 n = op->token->charcount; 505 str = XNMALLOC (n + 1, char); 506 memcpy (str, op->token->chars, n); 507 str[n] = '\0'; 508 return str; 509} 510 511/* Context lookup table. */ 512static flag_context_list_table_ty *flag_context_list_table; 513 514/* Returns the character represented by an escape sequence. */ 515static int 516do_getc_escaped (int c) 517{ 518 switch (c) 519 { 520 case 'n': 521 return '\n'; 522 case 'r': 523 return '\r'; 524 case 'f': 525 return '\f'; 526 case 't': 527 return '\t'; 528 case 'v': 529 return '\v'; 530 case 'a': 531 return '\a'; 532 case '^': 533 c = do_getc (); 534 if (c == EOF) 535 return EOF; 536 return c & 0x1f; 537 case '0': case '1': case '2': case '3': case '4': 538 case '5': case '6': case '7': 539 { 540 int n = c - '0'; 541 542 c = do_getc (); 543 if (c != EOF) 544 { 545 if (c >= '0' && c <= '7') 546 { 547 n = (n << 3) + (c - '0'); 548 c = do_getc (); 549 if (c != EOF) 550 { 551 if (c >= '0' && c <= '7') 552 n = (n << 3) + (c - '0'); 553 else 554 do_ungetc (c); 555 } 556 } 557 else 558 do_ungetc (c); 559 } 560 return (unsigned char) n; 561 } 562 case 'x': 563 { 564 int n = 0; 565 566 for (;;) 567 { 568 c = do_getc (); 569 if (c == EOF) 570 break; 571 else if (c >= '0' && c <= '9') 572 n = (n << 4) + (c - '0'); 573 else if (c >= 'A' && c <= 'F') 574 n = (n << 4) + (c - 'A' + 10); 575 else if (c >= 'a' && c <= 'f') 576 n = (n << 4) + (c - 'a' + 10); 577 else 578 { 579 do_ungetc (c); 580 break; 581 } 582 } 583 return (unsigned char) n; 584 } 585 default: 586 return c; 587 } 588} 589 590/* Read the next object. */ 591static void 592read_object (struct object *op, flag_context_ty outer_context) 593{ 594 for (;;) 595 { 596 int c; 597 598 c = do_getc (); 599 600 switch (c) 601 { 602 case EOF: 603 op->type = t_eof; 604 return; 605 606 case '\n': 607 /* Comments assumed to be grouped with a message must immediately 608 precede it, with no non-whitespace token on a line between 609 both. */ 610 if (last_non_comment_line > last_comment_line) 611 savable_comment_reset (); 612 continue; 613 614 case ' ': case '\t': case '\f': case '\r': 615 continue; 616 617 case '(': 618 { 619 int arg = 0; /* Current argument number. */ 620 flag_context_list_iterator_ty context_iter; 621 const struct callshapes *shapes = NULL; 622 struct arglist_parser *argparser = NULL; 623 624 for (;; arg++) 625 { 626 struct object inner; 627 flag_context_ty inner_context; 628 629 if (arg == 0) 630 inner_context = null_context; 631 else 632 inner_context = 633 inherited_context (outer_context, 634 flag_context_list_iterator_advance ( 635 &context_iter)); 636 637 read_object (&inner, inner_context); 638 639 /* Recognize end of list. */ 640 if (inner.type == t_close) 641 { 642 op->type = t_other; 643 /* Don't bother converting "()" to "NIL". */ 644 last_non_comment_line = line_number; 645 if (argparser != NULL) 646 arglist_parser_done (argparser, arg); 647 return; 648 } 649 650 /* Dots are not allowed in every position. 651 But be tolerant. */ 652 653 /* EOF inside list is illegal. But be tolerant. */ 654 if (inner.type == t_eof) 655 break; 656 657 if (arg == 0) 658 { 659 /* This is the function position. */ 660 if (inner.type == t_symbol) 661 { 662 char *symbol_name = string_of_object (&inner); 663 void *keyword_value; 664 665 if (hash_find_entry (&keywords, 666 symbol_name, strlen (symbol_name), 667 &keyword_value) 668 == 0) 669 shapes = (const struct callshapes *) keyword_value; 670 671 argparser = arglist_parser_alloc (mlp, shapes); 672 673 context_iter = 674 flag_context_list_iterator ( 675 flag_context_list_table_lookup ( 676 flag_context_list_table, 677 symbol_name, strlen (symbol_name))); 678 679 free (symbol_name); 680 } 681 else 682 context_iter = null_context_list_iterator; 683 } 684 else 685 { 686 /* These are the argument positions. */ 687 if (argparser != NULL && inner.type == t_string) 688 arglist_parser_remember (argparser, arg, 689 string_of_object (&inner), 690 inner_context, 691 logical_file_name, 692 inner.line_number_at_start, 693 savable_comment); 694 } 695 696 free_object (&inner); 697 } 698 699 if (argparser != NULL) 700 arglist_parser_done (argparser, arg); 701 } 702 op->type = t_other; 703 last_non_comment_line = line_number; 704 return; 705 706 case '[': 707 { 708 for (;;) 709 { 710 struct object inner; 711 712 read_object (&inner, null_context); 713 714 /* Recognize end of vector. */ 715 if (inner.type == t_close) 716 { 717 op->type = t_other; 718 last_non_comment_line = line_number; 719 return; 720 } 721 722 /* Dots are not allowed. But be tolerant. */ 723 724 /* EOF inside vector is illegal. But be tolerant. */ 725 if (inner.type == t_eof) 726 break; 727 728 free_object (&inner); 729 } 730 } 731 op->type = t_other; 732 last_non_comment_line = line_number; 733 return; 734 735 case ')': case ']': 736 /* Tell the caller about the end of list or vector. 737 Unmatched closing parenthesis is illegal. But be tolerant. */ 738 op->type = t_close; 739 last_non_comment_line = line_number; 740 return; 741 742 case ',': 743 { 744 int c = do_getc (); 745 /* The ,@ handling inside lists is wrong anyway, because 746 ,@form expands to an unknown number of elements. */ 747 if (c != EOF && c != '@') 748 do_ungetc (c); 749 } 750 /*FALLTHROUGH*/ 751 case '\'': 752 case '`': 753 { 754 struct object inner; 755 756 read_object (&inner, null_context); 757 758 /* Dots and EOF are not allowed here. But be tolerant. */ 759 760 free_object (&inner); 761 762 op->type = t_other; 763 last_non_comment_line = line_number; 764 return; 765 } 766 767 case ';': 768 { 769 bool all_semicolons = true; 770 771 last_comment_line = line_number; 772 comment_start (); 773 for (;;) 774 { 775 int c = do_getc (); 776 if (c == EOF || c == '\n' || c == '\f' || c == '\r') 777 break; 778 if (c != ';') 779 all_semicolons = false; 780 if (!all_semicolons) 781 { 782 /* We skip all leading white space, but not EOLs. */ 783 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 784 comment_add (c); 785 } 786 } 787 comment_line_end (0); 788 continue; 789 } 790 791 case '"': 792 { 793 op->token = XMALLOC (struct token); 794 init_token (op->token); 795 op->line_number_at_start = line_number; 796 for (;;) 797 { 798 int c = do_getc (); 799 if (c == EOF) 800 /* Invalid input. Be tolerant, no error message. */ 801 break; 802 if (c == '"') 803 break; 804 if (c == '\\') 805 { 806 c = do_getc (); 807 if (c == EOF) 808 /* Invalid input. Be tolerant, no error message. */ 809 break; 810 if (c == '\n') 811 /* Ignore escaped newline. */ 812 ; 813 else 814 { 815 c = do_getc_escaped (c); 816 if (c == EOF) 817 /* Invalid input. Be tolerant, no error message. */ 818 break; 819 grow_token (op->token); 820 op->token->chars[op->token->charcount++] = c; 821 } 822 } 823 else 824 { 825 grow_token (op->token); 826 op->token->chars[op->token->charcount++] = c; 827 } 828 } 829 op->type = t_string; 830 831 if (extract_all) 832 { 833 lex_pos_ty pos; 834 835 pos.file_name = logical_file_name; 836 pos.line_number = op->line_number_at_start; 837 remember_a_message (mlp, NULL, string_of_object (op), 838 null_context, &pos, savable_comment); 839 } 840 last_non_comment_line = line_number; 841 return; 842 } 843 844 case '?': 845 c = do_getc (); 846 if (c == EOF) 847 /* Invalid input. Be tolerant, no error message. */ 848 ; 849 else if (c == '\\') 850 { 851 c = do_getc (); 852 if (c == EOF) 853 /* Invalid input. Be tolerant, no error message. */ 854 ; 855 else 856 { 857 c = do_getc_escaped (c); 858 if (c == EOF) 859 /* Invalid input. Be tolerant, no error message. */ 860 ; 861 } 862 } 863 op->type = t_other; 864 last_non_comment_line = line_number; 865 return; 866 867 case '#': 868 /* Dispatch macro handling. */ 869 c = do_getc (); 870 if (c == EOF) 871 /* Invalid input. Be tolerant, no error message. */ 872 { 873 op->type = t_other; 874 return; 875 } 876 877 switch (c) 878 { 879 case '!': 880 if (ftell (fp) == 2) 881 /* Skip comment until !# */ 882 { 883 c = do_getc (); 884 for (;;) 885 { 886 if (c == EOF) 887 break; 888 if (c == '!') 889 { 890 c = do_getc (); 891 if (c == EOF || c == '#') 892 break; 893 } 894 else 895 c = do_getc (); 896 } 897 if (c == EOF) 898 { 899 /* EOF not allowed here. But be tolerant. */ 900 op->type = t_eof; 901 return; 902 } 903 continue; 904 } 905 /*FALLTHROUGH*/ 906 case '\'': 907 case ':': 908 { 909 struct object inner; 910 read_object (&inner, null_context); 911 /* Dots and EOF are not allowed here. 912 But be tolerant. */ 913 free_object (&inner); 914 op->type = t_other; 915 last_non_comment_line = line_number; 916 return; 917 } 918 919 case '[': 920 case '(': 921 { 922 struct object inner; 923 do_ungetc (c); 924 read_object (&inner, null_context); 925 /* Dots and EOF are not allowed here. 926 But be tolerant. */ 927 free_object (&inner); 928 op->type = t_other; 929 last_non_comment_line = line_number; 930 return; 931 } 932 933 case '|': 934 { 935 int depth = 0; 936 937 comment_start (); 938 c = do_getc (); 939 for (;;) 940 { 941 if (c == EOF) 942 break; 943 if (c == '|') 944 { 945 c = do_getc (); 946 if (c == EOF) 947 break; 948 if (c == '#') 949 { 950 if (depth == 0) 951 { 952 comment_line_end (0); 953 break; 954 } 955 depth--; 956 comment_add ('|'); 957 comment_add ('#'); 958 c = do_getc (); 959 } 960 else 961 comment_add ('|'); 962 } 963 else if (c == '#') 964 { 965 c = do_getc (); 966 if (c == EOF) 967 break; 968 comment_add ('#'); 969 if (c == '|') 970 { 971 depth++; 972 comment_add ('|'); 973 c = do_getc (); 974 } 975 } 976 else 977 { 978 /* We skip all leading white space. */ 979 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 980 comment_add (c); 981 if (c == '\n') 982 { 983 comment_line_end (1); 984 comment_start (); 985 } 986 c = do_getc (); 987 } 988 } 989 if (c == EOF) 990 { 991 /* EOF not allowed here. But be tolerant. */ 992 op->type = t_eof; 993 return; 994 } 995 last_comment_line = line_number; 996 continue; 997 } 998 999 case '\\': 1000 { 1001 struct token token; 1002 int first = '\\'; 1003 read_token (&token, &first); 1004 free_token (&token); 1005 op->type = t_other; 1006 last_non_comment_line = line_number; 1007 return; 1008 } 1009 1010 case 'T': case 't': 1011 case 'F': case 'f': 1012 op->type = t_other; 1013 last_non_comment_line = line_number; 1014 return; 1015 1016 case 'B': case 'b': 1017 case 'O': case 'o': 1018 case 'D': case 'd': 1019 case 'X': case 'x': 1020 case 'E': case 'e': 1021 case 'I': case 'i': 1022 { 1023 struct token token; 1024 do_ungetc (c); 1025 c = '#'; 1026 read_token (&token, &c); 1027 free_token (&token); 1028 op->type = t_other; 1029 last_non_comment_line = line_number; 1030 return; 1031 } 1032 1033 default: 1034 /* Invalid input. Be tolerant, no error message. */ 1035 op->type = t_other; 1036 last_non_comment_line = line_number; 1037 return; 1038 } 1039 1040 /*NOTREACHED*/ 1041 abort (); 1042 1043 default: 1044 /* Read a token. */ 1045 { 1046 bool symbol; 1047 1048 op->token = XMALLOC (struct token); 1049 symbol = read_token (op->token, &c); 1050 if (op->token->charcount == 1 && op->token->chars[0] == '.') 1051 { 1052 free_token (op->token); 1053 free (op->token); 1054 op->type = t_dot; 1055 last_non_comment_line = line_number; 1056 return; 1057 } 1058 if (!symbol) 1059 { 1060 free_token (op->token); 1061 free (op->token); 1062 op->type = t_other; 1063 last_non_comment_line = line_number; 1064 return; 1065 } 1066 /* Distinguish between "foo" and "foo#bar". */ 1067 c = do_getc (); 1068 if (c == '#') 1069 { 1070 struct token second_token; 1071 1072 free_token (op->token); 1073 free (op->token); 1074 read_token (&second_token, NULL); 1075 free_token (&second_token); 1076 op->type = t_other; 1077 last_non_comment_line = line_number; 1078 return; 1079 } 1080 else 1081 { 1082 if (c != EOF) 1083 do_ungetc (c); 1084 op->type = t_symbol; 1085 last_non_comment_line = line_number; 1086 return; 1087 } 1088 } 1089 } 1090 } 1091} 1092 1093 1094void 1095extract_librep (FILE *f, 1096 const char *real_filename, const char *logical_filename, 1097 flag_context_list_table_ty *flag_table, 1098 msgdomain_list_ty *mdlp) 1099{ 1100 mlp = mdlp->item[0]->messages; 1101 1102 fp = f; 1103 real_file_name = real_filename; 1104 logical_file_name = xstrdup (logical_filename); 1105 line_number = 1; 1106 1107 last_comment_line = -1; 1108 last_non_comment_line = -1; 1109 1110 flag_context_list_table = flag_table; 1111 1112 init_keywords (); 1113 1114 /* Eat tokens until eof is seen. When read_object returns 1115 due to an unbalanced closing parenthesis, just restart it. */ 1116 do 1117 { 1118 struct object toplevel_object; 1119 1120 read_object (&toplevel_object, null_context); 1121 1122 if (toplevel_object.type == t_eof) 1123 break; 1124 1125 free_object (&toplevel_object); 1126 } 1127 while (!feof (fp)); 1128 1129 /* Close scanner. */ 1130 fp = NULL; 1131 real_file_name = NULL; 1132 logical_file_name = NULL; 1133 line_number = 0; 1134} 1135