1/* xgettext librep backend. 2 Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 20#ifdef HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24#include <errno.h> 25#include <stdbool.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include "c-ctype.h" 31#include "message.h" 32#include "xgettext.h" 33#include "x-librep.h" 34#include "error.h" 35#include "xalloc.h" 36#include "exit.h" 37#include "hash.h" 38#include "gettext.h" 39 40#define _(s) gettext(s) 41 42 43/* Summary of librep syntax: 44 - ';' starts a comment until end of line. 45 - Block comments start with '#|' and end with '|#'. 46 - Numbers are constituted of an optional prefix (#b, #B for binary, 47 #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal, 48 #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and 49 the digits. 50 - Characters are written as '?' followed by the character, possibly 51 with an escape sequence, for examples '?a', '?\n', '?\177'. 52 - Strings are delimited by double quotes. Backslash introduces an escape 53 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a', 54 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal). 55 - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' - 56 if preceded by backslash or enclosed in |...|. 57 - Keywords: written as #:SYMBOL. 58 - () delimit lists. 59 - [] delimit vectors. 60 The reader is implemented in librep-0.14/src/lisp.c. */ 61 62 63/* ====================== Keyword set customization. ====================== */ 64 65/* If true extract all strings. */ 66static bool extract_all = false; 67 68static hash_table keywords; 69static bool default_keywords = true; 70 71 72void 73x_librep_extract_all () 74{ 75 extract_all = true; 76} 77 78 79void 80x_librep_keyword (const char *name) 81{ 82 if (name == NULL) 83 default_keywords = false; 84 else 85 { 86 const char *end; 87 struct callshape shape; 88 const char *colon; 89 90 if (keywords.table == NULL) 91 hash_init (&keywords, 100); 92 93 split_keywordspec (name, &end, &shape); 94 95 /* The characters between name and end should form a valid Lisp 96 symbol. */ 97 colon = strchr (name, ':'); 98 if (colon == NULL || colon >= end) 99 insert_keyword_callshape (&keywords, name, end - name, &shape); 100 } 101} 102 103/* Finish initializing the keywords hash table. 104 Called after argument processing, before each file is processed. */ 105static void 106init_keywords () 107{ 108 if (default_keywords) 109 { 110 /* When adding new keywords here, also update the documentation in 111 xgettext.texi! */ 112 x_librep_keyword ("_"); 113 default_keywords = false; 114 } 115} 116 117void 118init_flag_table_librep () 119{ 120 xgettext_record_flag ("_:1:pass-librep-format"); 121 xgettext_record_flag ("format:2:librep-format"); 122} 123 124 125/* ======================== Reading of characters. ======================== */ 126 127/* Real filename, used in error messages about the input file. */ 128static const char *real_file_name; 129 130/* Logical filename and line number, used to label the extracted messages. */ 131static char *logical_file_name; 132static int line_number; 133 134/* The input file stream. */ 135static FILE *fp; 136 137 138/* Fetch the next character from the input file. */ 139static int 140do_getc () 141{ 142 int c = getc (fp); 143 144 if (c == EOF) 145 { 146 if (ferror (fp)) 147 error (EXIT_FAILURE, errno, _("\ 148error while reading \"%s\""), real_file_name); 149 } 150 else if (c == '\n') 151 line_number++; 152 153 return c; 154} 155 156/* Put back the last fetched character, not EOF. */ 157static void 158do_ungetc (int c) 159{ 160 if (c == '\n') 161 line_number--; 162 ungetc (c, fp); 163} 164 165 166/* ========================== Reading of tokens. ========================== */ 167 168 169/* A token consists of a sequence of characters. */ 170struct token 171{ 172 int allocated; /* number of allocated 'token_char's */ 173 int charcount; /* number of used 'token_char's */ 174 char *chars; /* the token's constituents */ 175}; 176 177/* Initialize a 'struct token'. */ 178static inline void 179init_token (struct token *tp) 180{ 181 tp->allocated = 10; 182 tp->chars = (char *) xmalloc (tp->allocated * sizeof (char)); 183 tp->charcount = 0; 184} 185 186/* Free the memory pointed to by a 'struct token'. */ 187static inline void 188free_token (struct token *tp) 189{ 190 free (tp->chars); 191} 192 193/* Ensure there is enough room in the token for one more character. */ 194static inline void 195grow_token (struct token *tp) 196{ 197 if (tp->charcount == tp->allocated) 198 { 199 tp->allocated *= 2; 200 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); 201 } 202} 203 204/* Read the next token. If 'first' is given, it points to the first 205 character, which has already been read. Returns true for a symbol, 206 false for a number. */ 207static bool 208read_token (struct token *tp, const int *first) 209{ 210 int c; 211 /* Variables for speculative number parsing: */ 212 int radix = -1; 213 int nfirst = 0; 214 bool exact = true; 215 bool rational = false; 216 bool exponent = false; 217 bool had_sign = false; 218 bool expecting_prefix = false; 219 220 init_token (tp); 221 222 if (first) 223 c = *first; 224 else 225 c = do_getc (); 226 227 for (;; c = do_getc ()) 228 { 229 switch (c) 230 { 231 case EOF: 232 goto done; 233 234 case ' ': case '\t': case '\n': case '\f': case '\r': 235 case '(': case ')': case '[': case ']': 236 case '\'': case '"': case ';': case ',': case '`': 237 goto done; 238 239 case '\\': 240 radix = 0; 241 c = do_getc (); 242 if (c == EOF) 243 /* Invalid, but be tolerant. */ 244 break; 245 grow_token (tp); 246 tp->chars[tp->charcount++] = c; 247 break; 248 249 case '|': 250 radix = 0; 251 for (;;) 252 { 253 c = do_getc (); 254 if (c == EOF || c == '|') 255 break; 256 grow_token (tp); 257 tp->chars[tp->charcount++] = c; 258 } 259 break; 260 261 default: 262 if (radix != 0) 263 { 264 if (expecting_prefix) 265 { 266 switch (c) 267 { 268 case 'B': case 'b': 269 radix = 2; 270 break; 271 case 'O': case 'o': 272 radix = 8; 273 break; 274 case 'D': case 'd': 275 radix = 10; 276 break; 277 case 'X': case 'x': 278 radix = 16; 279 break; 280 case 'E': case 'e': 281 case 'I': case 'i': 282 break; 283 default: 284 radix = 0; 285 break; 286 } 287 expecting_prefix = false; 288 nfirst = tp->charcount + 1; 289 } 290 else if (tp->charcount == nfirst 291 && (c == '+' || c == '-' || c == '#')) 292 { 293 if (c == '#') 294 { 295 if (had_sign) 296 radix = 0; 297 else 298 expecting_prefix = true; 299 } 300 else 301 had_sign = true; 302 nfirst = tp->charcount + 1; 303 } 304 else 305 { 306 switch (radix) 307 { 308 case -1: 309 if (c == '.') 310 { 311 radix = 10; 312 exact = false; 313 } 314 else if (!(c >= '0' && c <= '9')) 315 radix = 0; 316 else if (c == '0') 317 radix = 1; 318 else 319 radix = 10; 320 break; 321 322 case 1: 323 switch (c) 324 { 325 case 'X': case 'x': 326 radix = 16; 327 nfirst = tp->charcount + 1; 328 break; 329 case '0': case '1': case '2': case '3': case '4': 330 case '5': case '6': case '7': 331 radix = 8; 332 nfirst = tp->charcount; 333 break; 334 case '.': case 'E': case 'e': 335 radix = 10; 336 exact = false; 337 break; 338 case '/': 339 radix = 10; 340 rational = true; 341 break; 342 default: 343 radix = 0; 344 break; 345 } 346 break; 347 348 default: 349 switch (c) 350 { 351 case '.': 352 if (exact && radix == 10 && !rational) 353 exact = false; 354 else 355 radix = 0; 356 break; 357 case '/': 358 if (exact && !rational) 359 rational = true; 360 else 361 radix = 0; 362 break; 363 case 'E': case 'e': 364 if (radix == 10) 365 { 366 if (!rational && !exponent) 367 { 368 exponent = true; 369 exact = false; 370 } 371 else 372 radix = 0; 373 break; 374 } 375 /*FALLTHROUGH*/ 376 default: 377 if (exponent && (c == '+' || c == '-')) 378 break; 379 if ((radix <= 10 380 && !(c >= '0' && c <= '0' + radix - 1)) 381 || (radix == 16 && !c_isxdigit (c))) 382 radix = 0; 383 break; 384 } 385 break; 386 } 387 } 388 } 389 else 390 { 391 if (c == '#') 392 goto done; 393 } 394 grow_token (tp); 395 tp->chars[tp->charcount++] = c; 396 } 397 } 398 done: 399 if (c != EOF) 400 do_ungetc (c); 401 if (radix > 0 && nfirst < tp->charcount) 402 return false; /* number */ 403 else 404 return true; /* symbol */ 405} 406 407 408/* ========================= Accumulating comments ========================= */ 409 410 411static char *buffer; 412static size_t bufmax; 413static size_t buflen; 414 415static inline void 416comment_start () 417{ 418 buflen = 0; 419} 420 421static inline void 422comment_add (int c) 423{ 424 if (buflen >= bufmax) 425 { 426 bufmax = 2 * bufmax + 10; 427 buffer = xrealloc (buffer, bufmax); 428 } 429 buffer[buflen++] = c; 430} 431 432static inline void 433comment_line_end (size_t chars_to_remove) 434{ 435 buflen -= chars_to_remove; 436 while (buflen >= 1 437 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) 438 --buflen; 439 if (chars_to_remove == 0 && buflen >= bufmax) 440 { 441 bufmax = 2 * bufmax + 10; 442 buffer = xrealloc (buffer, bufmax); 443 } 444 buffer[buflen] = '\0'; 445 savable_comment_add (buffer); 446} 447 448 449/* These are for tracking whether comments count as immediately before 450 keyword. */ 451static int last_comment_line; 452static int last_non_comment_line; 453 454 455/* ========================= Accumulating messages ========================= */ 456 457 458static message_list_ty *mlp; 459 460 461/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */ 462 463 464/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings. 465 Other objects need not to be represented precisely. */ 466enum object_type 467{ 468 t_symbol, /* symbol */ 469 t_string, /* string */ 470 t_other, /* other kind of real object */ 471 t_dot, /* '.' pseudo object */ 472 t_close, /* ')' or ']' pseudo object */ 473 t_eof /* EOF marker */ 474}; 475 476struct object 477{ 478 enum object_type type; 479 struct token *token; /* for t_symbol and t_string */ 480 int line_number_at_start; /* for t_string */ 481}; 482 483/* Free the memory pointed to by a 'struct object'. */ 484static inline void 485free_object (struct object *op) 486{ 487 if (op->type == t_symbol || op->type == t_string) 488 { 489 free_token (op->token); 490 free (op->token); 491 } 492} 493 494/* Convert a t_symbol/t_string token to a char*. */ 495static char * 496string_of_object (const struct object *op) 497{ 498 char *str; 499 int n; 500 501 if (!(op->type == t_symbol || op->type == t_string)) 502 abort (); 503 n = op->token->charcount; 504 str = (char *) xmalloc (n + 1); 505 memcpy (str, op->token->chars, n); 506 str[n] = '\0'; 507 return str; 508} 509 510/* Context lookup table. */ 511static flag_context_list_table_ty *flag_context_list_table; 512 513/* Returns the character represented by an escape sequence. */ 514static int 515do_getc_escaped (int c) 516{ 517 switch (c) 518 { 519 case 'n': 520 return '\n'; 521 case 'r': 522 return '\r'; 523 case 'f': 524 return '\f'; 525 case 't': 526 return '\t'; 527 case 'v': 528 return '\v'; 529 case 'a': 530 return '\a'; 531 case '^': 532 c = do_getc (); 533 if (c == EOF) 534 return EOF; 535 return c & 0x1f; 536 case '0': case '1': case '2': case '3': case '4': 537 case '5': case '6': case '7': 538 { 539 int n = c - '0'; 540 541 c = do_getc (); 542 if (c != EOF) 543 { 544 if (c >= '0' && c <= '7') 545 { 546 n = (n << 3) + (c - '0'); 547 c = do_getc (); 548 if (c != EOF) 549 { 550 if (c >= '0' && c <= '7') 551 n = (n << 3) + (c - '0'); 552 else 553 do_ungetc (c); 554 } 555 } 556 else 557 do_ungetc (c); 558 } 559 return (unsigned char) n; 560 } 561 case 'x': 562 { 563 int n = 0; 564 565 for (;;) 566 { 567 c = do_getc (); 568 if (c == EOF) 569 break; 570 else if (c >= '0' && c <= '9') 571 n = (n << 4) + (c - '0'); 572 else if (c >= 'A' && c <= 'F') 573 n = (n << 4) + (c - 'A' + 10); 574 else if (c >= 'a' && c <= 'f') 575 n = (n << 4) + (c - 'a' + 10); 576 else 577 { 578 do_ungetc (c); 579 break; 580 } 581 } 582 return (unsigned char) n; 583 } 584 default: 585 return c; 586 } 587} 588 589/* Read the next object. */ 590static void 591read_object (struct object *op, flag_context_ty outer_context) 592{ 593 for (;;) 594 { 595 int c; 596 597 c = do_getc (); 598 599 switch (c) 600 { 601 case EOF: 602 op->type = t_eof; 603 return; 604 605 case '\n': 606 /* Comments assumed to be grouped with a message must immediately 607 precede it, with no non-whitespace token on a line between 608 both. */ 609 if (last_non_comment_line > last_comment_line) 610 savable_comment_reset (); 611 continue; 612 613 case ' ': case '\t': case '\f': case '\r': 614 continue; 615 616 case '(': 617 { 618 int arg = 0; /* Current argument number. */ 619 flag_context_list_iterator_ty context_iter; 620 const struct callshapes *shapes = NULL; 621 struct arglist_parser *argparser = NULL; 622 623 for (;; arg++) 624 { 625 struct object inner; 626 flag_context_ty inner_context; 627 628 if (arg == 0) 629 inner_context = null_context; 630 else 631 inner_context = 632 inherited_context (outer_context, 633 flag_context_list_iterator_advance ( 634 &context_iter)); 635 636 read_object (&inner, inner_context); 637 638 /* Recognize end of list. */ 639 if (inner.type == t_close) 640 { 641 op->type = t_other; 642 /* Don't bother converting "()" to "NIL". */ 643 last_non_comment_line = line_number; 644 if (argparser != NULL) 645 arglist_parser_done (argparser, arg); 646 return; 647 } 648 649 /* Dots are not allowed in every position. 650 But be tolerant. */ 651 652 /* EOF inside list is illegal. But be tolerant. */ 653 if (inner.type == t_eof) 654 break; 655 656 if (arg == 0) 657 { 658 /* This is the function position. */ 659 if (inner.type == t_symbol) 660 { 661 char *symbol_name = string_of_object (&inner); 662 void *keyword_value; 663 664 if (hash_find_entry (&keywords, 665 symbol_name, strlen (symbol_name), 666 &keyword_value) 667 == 0) 668 shapes = (const struct callshapes *) keyword_value; 669 670 argparser = arglist_parser_alloc (mlp, shapes); 671 672 context_iter = 673 flag_context_list_iterator ( 674 flag_context_list_table_lookup ( 675 flag_context_list_table, 676 symbol_name, strlen (symbol_name))); 677 678 free (symbol_name); 679 } 680 else 681 context_iter = null_context_list_iterator; 682 } 683 else 684 { 685 /* These are the argument positions. */ 686 if (argparser != NULL && inner.type == t_string) 687 arglist_parser_remember (argparser, arg, 688 string_of_object (&inner), 689 inner_context, 690 logical_file_name, 691 inner.line_number_at_start, 692 savable_comment); 693 } 694 695 free_object (&inner); 696 } 697 698 if (argparser != NULL) 699 arglist_parser_done (argparser, arg); 700 } 701 op->type = t_other; 702 last_non_comment_line = line_number; 703 return; 704 705 case '[': 706 { 707 for (;;) 708 { 709 struct object inner; 710 711 read_object (&inner, null_context); 712 713 /* Recognize end of vector. */ 714 if (inner.type == t_close) 715 { 716 op->type = t_other; 717 last_non_comment_line = line_number; 718 return; 719 } 720 721 /* Dots are not allowed. But be tolerant. */ 722 723 /* EOF inside vector is illegal. But be tolerant. */ 724 if (inner.type == t_eof) 725 break; 726 727 free_object (&inner); 728 } 729 } 730 op->type = t_other; 731 last_non_comment_line = line_number; 732 return; 733 734 case ')': case ']': 735 /* Tell the caller about the end of list or vector. 736 Unmatched closing parenthesis is illegal. But be tolerant. */ 737 op->type = t_close; 738 last_non_comment_line = line_number; 739 return; 740 741 case ',': 742 { 743 int c = do_getc (); 744 /* The ,@ handling inside lists is wrong anyway, because 745 ,@form expands to an unknown number of elements. */ 746 if (c != EOF && c != '@') 747 do_ungetc (c); 748 } 749 /*FALLTHROUGH*/ 750 case '\'': 751 case '`': 752 { 753 struct object inner; 754 755 read_object (&inner, null_context); 756 757 /* Dots and EOF are not allowed here. But be tolerant. */ 758 759 free_object (&inner); 760 761 op->type = t_other; 762 last_non_comment_line = line_number; 763 return; 764 } 765 766 case ';': 767 { 768 bool all_semicolons = true; 769 770 last_comment_line = line_number; 771 comment_start (); 772 for (;;) 773 { 774 int c = do_getc (); 775 if (c == EOF || c == '\n' || c == '\f' || c == '\r') 776 break; 777 if (c != ';') 778 all_semicolons = false; 779 if (!all_semicolons) 780 { 781 /* We skip all leading white space, but not EOLs. */ 782 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 783 comment_add (c); 784 } 785 } 786 comment_line_end (0); 787 continue; 788 } 789 790 case '"': 791 { 792 op->token = (struct token *) xmalloc (sizeof (struct token)); 793 init_token (op->token); 794 op->line_number_at_start = line_number; 795 for (;;) 796 { 797 int c = do_getc (); 798 if (c == EOF) 799 /* Invalid input. Be tolerant, no error message. */ 800 break; 801 if (c == '"') 802 break; 803 if (c == '\\') 804 { 805 c = do_getc (); 806 if (c == EOF) 807 /* Invalid input. Be tolerant, no error message. */ 808 break; 809 if (c == '\n') 810 /* Ignore escaped newline. */ 811 ; 812 else 813 { 814 c = do_getc_escaped (c); 815 if (c == EOF) 816 /* Invalid input. Be tolerant, no error message. */ 817 break; 818 grow_token (op->token); 819 op->token->chars[op->token->charcount++] = c; 820 } 821 } 822 else 823 { 824 grow_token (op->token); 825 op->token->chars[op->token->charcount++] = c; 826 } 827 } 828 op->type = t_string; 829 830 if (extract_all) 831 { 832 lex_pos_ty pos; 833 834 pos.file_name = logical_file_name; 835 pos.line_number = op->line_number_at_start; 836 remember_a_message (mlp, NULL, string_of_object (op), 837 null_context, &pos, savable_comment); 838 } 839 last_non_comment_line = line_number; 840 return; 841 } 842 843 case '?': 844 c = do_getc (); 845 if (c == EOF) 846 /* Invalid input. Be tolerant, no error message. */ 847 ; 848 else if (c == '\\') 849 { 850 c = do_getc (); 851 if (c == EOF) 852 /* Invalid input. Be tolerant, no error message. */ 853 ; 854 else 855 { 856 c = do_getc_escaped (c); 857 if (c == EOF) 858 /* Invalid input. Be tolerant, no error message. */ 859 ; 860 } 861 } 862 op->type = t_other; 863 last_non_comment_line = line_number; 864 return; 865 866 case '#': 867 /* Dispatch macro handling. */ 868 c = do_getc (); 869 if (c == EOF) 870 /* Invalid input. Be tolerant, no error message. */ 871 { 872 op->type = t_other; 873 return; 874 } 875 876 switch (c) 877 { 878 case '!': 879 if (ftell (fp) == 2) 880 /* Skip comment until !# */ 881 { 882 c = do_getc (); 883 for (;;) 884 { 885 if (c == EOF) 886 break; 887 if (c == '!') 888 { 889 c = do_getc (); 890 if (c == EOF || c == '#') 891 break; 892 } 893 else 894 c = do_getc (); 895 } 896 if (c == EOF) 897 { 898 /* EOF not allowed here. But be tolerant. */ 899 op->type = t_eof; 900 return; 901 } 902 continue; 903 } 904 /*FALLTHROUGH*/ 905 case '\'': 906 case ':': 907 { 908 struct object inner; 909 read_object (&inner, null_context); 910 /* Dots and EOF are not allowed here. 911 But be tolerant. */ 912 free_object (&inner); 913 op->type = t_other; 914 last_non_comment_line = line_number; 915 return; 916 } 917 918 case '[': 919 case '(': 920 { 921 struct object inner; 922 do_ungetc (c); 923 read_object (&inner, null_context); 924 /* Dots and EOF are not allowed here. 925 But be tolerant. */ 926 free_object (&inner); 927 op->type = t_other; 928 last_non_comment_line = line_number; 929 return; 930 } 931 932 case '|': 933 { 934 int depth = 0; 935 936 comment_start (); 937 c = do_getc (); 938 for (;;) 939 { 940 if (c == EOF) 941 break; 942 if (c == '|') 943 { 944 c = do_getc (); 945 if (c == EOF) 946 break; 947 if (c == '#') 948 { 949 if (depth == 0) 950 { 951 comment_line_end (0); 952 break; 953 } 954 depth--; 955 comment_add ('|'); 956 comment_add ('#'); 957 c = do_getc (); 958 } 959 else 960 comment_add ('|'); 961 } 962 else if (c == '#') 963 { 964 c = do_getc (); 965 if (c == EOF) 966 break; 967 comment_add ('#'); 968 if (c == '|') 969 { 970 depth++; 971 comment_add ('|'); 972 c = do_getc (); 973 } 974 } 975 else 976 { 977 /* We skip all leading white space. */ 978 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 979 comment_add (c); 980 if (c == '\n') 981 { 982 comment_line_end (1); 983 comment_start (); 984 } 985 c = do_getc (); 986 } 987 } 988 if (c == EOF) 989 { 990 /* EOF not allowed here. But be tolerant. */ 991 op->type = t_eof; 992 return; 993 } 994 last_comment_line = line_number; 995 continue; 996 } 997 998 case '\\': 999 { 1000 struct token token; 1001 int first = '\\'; 1002 read_token (&token, &first); 1003 free_token (&token); 1004 op->type = t_other; 1005 last_non_comment_line = line_number; 1006 return; 1007 } 1008 1009 case 'T': case 't': 1010 case 'F': case 'f': 1011 op->type = t_other; 1012 last_non_comment_line = line_number; 1013 return; 1014 1015 case 'B': case 'b': 1016 case 'O': case 'o': 1017 case 'D': case 'd': 1018 case 'X': case 'x': 1019 case 'E': case 'e': 1020 case 'I': case 'i': 1021 { 1022 struct token token; 1023 do_ungetc (c); 1024 c = '#'; 1025 read_token (&token, &c); 1026 free_token (&token); 1027 op->type = t_other; 1028 last_non_comment_line = line_number; 1029 return; 1030 } 1031 1032 default: 1033 /* Invalid input. Be tolerant, no error message. */ 1034 op->type = t_other; 1035 last_non_comment_line = line_number; 1036 return; 1037 } 1038 1039 /*NOTREACHED*/ 1040 abort (); 1041 1042 default: 1043 /* Read a token. */ 1044 { 1045 bool symbol; 1046 1047 op->token = (struct token *) xmalloc (sizeof (struct token)); 1048 symbol = read_token (op->token, &c); 1049 if (op->token->charcount == 1 && op->token->chars[0] == '.') 1050 { 1051 free_token (op->token); 1052 free (op->token); 1053 op->type = t_dot; 1054 last_non_comment_line = line_number; 1055 return; 1056 } 1057 if (!symbol) 1058 { 1059 free_token (op->token); 1060 free (op->token); 1061 op->type = t_other; 1062 last_non_comment_line = line_number; 1063 return; 1064 } 1065 /* Distinguish between "foo" and "foo#bar". */ 1066 c = do_getc (); 1067 if (c == '#') 1068 { 1069 struct token second_token; 1070 1071 free_token (op->token); 1072 free (op->token); 1073 read_token (&second_token, NULL); 1074 free_token (&second_token); 1075 op->type = t_other; 1076 last_non_comment_line = line_number; 1077 return; 1078 } 1079 else 1080 { 1081 if (c != EOF) 1082 do_ungetc (c); 1083 op->type = t_symbol; 1084 last_non_comment_line = line_number; 1085 return; 1086 } 1087 } 1088 } 1089 } 1090} 1091 1092 1093void 1094extract_librep (FILE *f, 1095 const char *real_filename, const char *logical_filename, 1096 flag_context_list_table_ty *flag_table, 1097 msgdomain_list_ty *mdlp) 1098{ 1099 mlp = mdlp->item[0]->messages; 1100 1101 fp = f; 1102 real_file_name = real_filename; 1103 logical_file_name = xstrdup (logical_filename); 1104 line_number = 1; 1105 1106 last_comment_line = -1; 1107 last_non_comment_line = -1; 1108 1109 flag_context_list_table = flag_table; 1110 1111 init_keywords (); 1112 1113 /* Eat tokens until eof is seen. When read_object returns 1114 due to an unbalanced closing parenthesis, just restart it. */ 1115 do 1116 { 1117 struct object toplevel_object; 1118 1119 read_object (&toplevel_object, null_context); 1120 1121 if (toplevel_object.type == t_eof) 1122 break; 1123 1124 free_object (&toplevel_object); 1125 } 1126 while (!feof (fp)); 1127 1128 /* Close scanner. */ 1129 fp = NULL; 1130 real_file_name = NULL; 1131 logical_file_name = NULL; 1132 line_number = 0; 1133} 1134