1/* xgettext awk backend. 2 Copyright (C) 2002-2003 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2002. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 19 20#ifdef HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24#include <errno.h> 25#include <stdbool.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include "message.h" 31#include "xgettext.h" 32#include "x-awk.h" 33#include "error.h" 34#include "error-progname.h" 35#include "xalloc.h" 36#include "exit.h" 37#include "gettext.h" 38 39#define _(s) gettext(s) 40 41 42/* The awk syntax is defined in the gawk manual page and documentation. 43 See also gawk/awkgram.y. */ 44 45 46/* ====================== Keyword set customization. ====================== */ 47 48/* If true extract all strings. */ 49static bool extract_all = false; 50 51static hash_table keywords; 52static bool default_keywords = true; 53 54 55void 56x_awk_extract_all () 57{ 58 extract_all = true; 59} 60 61 62void 63x_awk_keyword (const char *name) 64{ 65 if (name == NULL) 66 default_keywords = false; 67 else 68 { 69 const char *end; 70 int argnum1; 71 int argnum2; 72 const char *colon; 73 74 if (keywords.table == NULL) 75 init_hash (&keywords, 100); 76 77 split_keywordspec (name, &end, &argnum1, &argnum2); 78 79 /* The characters between name and end should form a valid C identifier. 80 A colon means an invalid parse in split_keywordspec(). */ 81 colon = strchr (name, ':'); 82 if (colon == NULL || colon >= end) 83 { 84 if (argnum1 == 0) 85 argnum1 = 1; 86 insert_entry (&keywords, name, end - name, 87 (void *) (long) (argnum1 + (argnum2 << 10))); 88 } 89 } 90} 91 92/* Finish initializing the keywords hash table. 93 Called after argument processing, before each file is processed. */ 94static void 95init_keywords () 96{ 97 if (default_keywords) 98 { 99 x_awk_keyword ("dcgettext"); 100 x_awk_keyword ("dcngettext:1,2"); 101 default_keywords = false; 102 } 103} 104 105void 106init_flag_table_awk () 107{ 108 xgettext_record_flag ("dcgettext:1:pass-awk-format"); 109 xgettext_record_flag ("dcngettext:1:pass-awk-format"); 110 xgettext_record_flag ("dcngettext:2:pass-awk-format"); 111 xgettext_record_flag ("printf:1:awk-format"); 112} 113 114 115/* ======================== Reading of characters. ======================== */ 116 117/* Real filename, used in error messages about the input file. */ 118static const char *real_file_name; 119 120/* Logical filename and line number, used to label the extracted messages. */ 121static char *logical_file_name; 122static int line_number; 123 124/* The input file stream. */ 125static FILE *fp; 126 127/* These are for tracking whether comments count as immediately before 128 keyword. */ 129static int last_comment_line; 130static int last_non_comment_line; 131 132 133/* 1. line_number handling. */ 134 135static int 136phase1_getc () 137{ 138 int c = getc (fp); 139 140 if (c == EOF) 141 { 142 if (ferror (fp)) 143 error (EXIT_FAILURE, errno, _("error while reading \"%s\""), 144 real_file_name); 145 return EOF; 146 } 147 148 if (c == '\n') 149 line_number++; 150 151 return c; 152} 153 154/* Supports only one pushback character. */ 155static void 156phase1_ungetc (int c) 157{ 158 if (c != EOF) 159 { 160 if (c == '\n') 161 --line_number; 162 163 ungetc (c, fp); 164 } 165} 166 167 168/* 2. Replace each comment that is not inside a string literal or regular 169 expression with a newline character. We need to remember the comment 170 for later, because it may be attached to a keyword string. */ 171 172static int 173phase2_getc () 174{ 175 static char *buffer; 176 static size_t bufmax; 177 size_t buflen; 178 int lineno; 179 int c; 180 181 c = phase1_getc (); 182 if (c == '#') 183 { 184 buflen = 0; 185 lineno = line_number; 186 for (;;) 187 { 188 c = phase1_getc (); 189 if (c == '\n' || c == EOF) 190 break; 191 /* We skip all leading white space, but not EOLs. */ 192 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 193 { 194 if (buflen >= bufmax) 195 { 196 bufmax = 2 * bufmax + 10; 197 buffer = xrealloc (buffer, bufmax); 198 } 199 buffer[buflen++] = c; 200 } 201 } 202 if (buflen >= bufmax) 203 { 204 bufmax = 2 * bufmax + 10; 205 buffer = xrealloc (buffer, bufmax); 206 } 207 buffer[buflen] = '\0'; 208 xgettext_comment_add (buffer); 209 last_comment_line = lineno; 210 } 211 return c; 212} 213 214/* Supports only one pushback character. */ 215static void 216phase2_ungetc (int c) 217{ 218 if (c != EOF) 219 phase1_ungetc (c); 220} 221 222 223/* ========================== Reading of tokens. ========================== */ 224 225 226enum token_type_ty 227{ 228 token_type_eof, 229 token_type_lparen, /* ( */ 230 token_type_rparen, /* ) */ 231 token_type_comma, /* , */ 232 token_type_string, /* "abc" */ 233 token_type_i18nstring, /* _"abc" */ 234 token_type_symbol, /* symbol, number */ 235 token_type_semicolon, /* ; */ 236 token_type_other /* regexp, misc. operator */ 237}; 238typedef enum token_type_ty token_type_ty; 239 240typedef struct token_ty token_ty; 241struct token_ty 242{ 243 token_type_ty type; 244 char *string; /* for token_type_{symbol,string,i18nstring} */ 245 int line_number; 246}; 247 248 249/* 7. Replace escape sequences within character strings with their 250 single character equivalents. */ 251 252#define P7_QUOTES (1000 + '"') 253 254static int 255phase7_getc () 256{ 257 int c; 258 259 for (;;) 260 { 261 /* Use phase 1, because phase 2 elides comments. */ 262 c = phase1_getc (); 263 264 if (c == EOF || c == '\n') 265 break; 266 if (c == '"') 267 return P7_QUOTES; 268 if (c != '\\') 269 return c; 270 c = phase1_getc (); 271 if (c == EOF) 272 break; 273 if (c != '\n') 274 switch (c) 275 { 276 case 'a': 277 return '\a'; 278 case 'b': 279 return '\b'; 280 case 'f': 281 return '\f'; 282 case 'n': 283 return '\n'; 284 case 'r': 285 return '\r'; 286 case 't': 287 return '\t'; 288 case 'v': 289 return '\v'; 290 case '0': case '1': case '2': case '3': case '4': 291 case '5': case '6': case '7': 292 { 293 int n = c - '0'; 294 295 c = phase1_getc (); 296 if (c != EOF) 297 { 298 if (c >= '0' && c <= '7') 299 { 300 n = (n << 3) + (c - '0'); 301 c = phase1_getc (); 302 if (c != EOF) 303 { 304 if (c >= '0' && c <= '7') 305 n = (n << 3) + (c - '0'); 306 else 307 phase1_ungetc (c); 308 } 309 } 310 else 311 phase1_ungetc (c); 312 } 313 return (unsigned char) n; 314 } 315 case 'x': 316 { 317 int n = 0; 318 319 for (;;) 320 { 321 c = phase1_getc (); 322 if (c == EOF) 323 break; 324 else if (c >= '0' && c <= '9') 325 n = (n << 4) + (c - '0'); 326 else if (c >= 'A' && c <= 'F') 327 n = (n << 4) + (c - 'A' + 10); 328 else if (c >= 'a' && c <= 'f') 329 n = (n << 4) + (c - 'a' + 10); 330 else 331 { 332 phase1_ungetc (c); 333 break; 334 } 335 } 336 return (unsigned char) n; 337 } 338 default: 339 return c; 340 } 341 } 342 343 phase1_ungetc (c); 344 error_with_progname = false; 345 error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name, 346 line_number); 347 error_with_progname = true; 348 return P7_QUOTES; 349} 350 351 352/* Free the memory pointed to by a 'struct token_ty'. */ 353static inline void 354free_token (token_ty *tp) 355{ 356 switch (tp->type) 357 { 358 case token_type_string: 359 case token_type_i18nstring: 360 case token_type_symbol: 361 free (tp->string); 362 break; 363 default: 364 break; 365 } 366} 367 368 369/* Combine characters into tokens. Discard whitespace. */ 370 371/* There is an ambiguity about '/': It can start a division operator ('/' or 372 '/=') or it can start a regular expression. The distinction is important 373 because inside regular expressions, '#' and '"' lose its special meanings. 374 If you look at the awk grammar, you see that the operator is only allowed 375 right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals 376 can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals. 377 So we prefer the division operator interpretation only right after 378 symbol, string, number, ')', ']', with whitespace but no newline allowed 379 in between. */ 380static bool prefer_division_over_regexp; 381 382static void 383x_awk_lex (token_ty *tp) 384{ 385 static char *buffer; 386 static int bufmax; 387 int bufpos; 388 int c; 389 390 for (;;) 391 { 392 tp->line_number = line_number; 393 c = phase2_getc (); 394 395 switch (c) 396 { 397 case EOF: 398 tp->type = token_type_eof; 399 return; 400 401 case '\n': 402 if (last_non_comment_line > last_comment_line) 403 xgettext_comment_reset (); 404 /* Newline is not allowed inside expressions. It usually 405 introduces a fresh statement. 406 FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else' 407 does *not* introduce a fresh statement. */ 408 prefer_division_over_regexp = false; 409 /* FALLTHROUGH */ 410 case '\t': 411 case ' ': 412 /* Ignore whitespace and comments. */ 413 continue; 414 415 case '\\': 416 /* Backslash ought to be immediately followed by a newline. */ 417 continue; 418 } 419 420 last_non_comment_line = tp->line_number; 421 422 switch (c) 423 { 424 case '.': 425 { 426 int c2 = phase2_getc (); 427 phase2_ungetc (c2); 428 if (!(c2 >= '0' && c2 <= '9')) 429 { 430 431 tp->type = token_type_other; 432 prefer_division_over_regexp = false; 433 return; 434 } 435 } 436 /* FALLTHROUGH */ 437 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 438 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 439 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 440 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 441 case 'Y': case 'Z': 442 case '_': 443 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 444 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 445 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 446 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 447 case 'y': case 'z': 448 case '0': case '1': case '2': case '3': case '4': 449 case '5': case '6': case '7': case '8': case '9': 450 /* Symbol, or part of a number. */ 451 bufpos = 0; 452 for (;;) 453 { 454 if (bufpos >= bufmax) 455 { 456 bufmax = 2 * bufmax + 10; 457 buffer = xrealloc (buffer, bufmax); 458 } 459 buffer[bufpos++] = c; 460 c = phase2_getc (); 461 switch (c) 462 { 463 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 464 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 465 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 466 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 467 case 'Y': case 'Z': 468 case '_': 469 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 470 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 471 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 472 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 473 case 'y': case 'z': 474 case '0': case '1': case '2': case '3': case '4': 475 case '5': case '6': case '7': case '8': case '9': 476 continue; 477 default: 478 if (bufpos == 1 && buffer[0] == '_' && c == '"') 479 { 480 tp->type = token_type_i18nstring; 481 goto case_string; 482 } 483 phase2_ungetc (c); 484 break; 485 } 486 break; 487 } 488 if (bufpos >= bufmax) 489 { 490 bufmax = 2 * bufmax + 10; 491 buffer = xrealloc (buffer, bufmax); 492 } 493 buffer[bufpos] = '\0'; 494 tp->string = xstrdup (buffer); 495 tp->type = token_type_symbol; 496 /* Most identifiers can be variable names; after them we must 497 interpret '/' as division operator. But for awk's builtin 498 keywords we have three cases: 499 (a) Must interpret '/' as division operator. "length". 500 (b) Must interpret '/' as start of a regular expression. 501 "do", "exit", "print", "printf", "return". 502 (c) '/' after this keyword in invalid anyway. All others. 503 I used the following script for the distinction. 504 for k in $awk_keywords; do 505 echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null 506 done 507 */ 508 if (strcmp (buffer, "do") == 0 509 || strcmp (buffer, "exit") == 0 510 || strcmp (buffer, "print") == 0 511 || strcmp (buffer, "printf") == 0 512 || strcmp (buffer, "return") == 0) 513 prefer_division_over_regexp = false; 514 else 515 prefer_division_over_regexp = true; 516 return; 517 518 case '"': 519 tp->type = token_type_string; 520 case_string: 521 bufpos = 0; 522 for (;;) 523 { 524 c = phase7_getc (); 525 if (c == EOF || c == P7_QUOTES) 526 break; 527 if (bufpos >= bufmax) 528 { 529 bufmax = 2 * bufmax + 10; 530 buffer = xrealloc (buffer, bufmax); 531 } 532 buffer[bufpos++] = c; 533 } 534 if (bufpos >= bufmax) 535 { 536 bufmax = 2 * bufmax + 10; 537 buffer = xrealloc (buffer, bufmax); 538 } 539 buffer[bufpos] = '\0'; 540 tp->string = xstrdup (buffer); 541 prefer_division_over_regexp = true; 542 return; 543 544 case '(': 545 tp->type = token_type_lparen; 546 prefer_division_over_regexp = false; 547 return; 548 549 case ')': 550 tp->type = token_type_rparen; 551 prefer_division_over_regexp = true; 552 return; 553 554 case ',': 555 tp->type = token_type_comma; 556 prefer_division_over_regexp = false; 557 return; 558 559 case ';': 560 tp->type = token_type_semicolon; 561 prefer_division_over_regexp = false; 562 return; 563 564 case ']': 565 tp->type = token_type_other; 566 prefer_division_over_regexp = true; 567 return; 568 569 case '/': 570 if (!prefer_division_over_regexp) 571 { 572 /* Regular expression. 573 Counting brackets is non-trivial. [[] is balanced, and so is 574 [\]]. Also, /[/]/ is balanced and ends at the third slash. 575 Do not count [ or ] if either one is preceded by a \. 576 A '[' should be counted if 577 a) it is the first one so far (brackets == 0), or 578 b) it is the '[' in '[:'. 579 A ']' should be counted if not preceded by a \. 580 According to POSIX, []] is how you put a ] into a set. 581 Try to handle that too. 582 */ 583 int brackets = 0; 584 bool pos0 = true; /* true at start of regexp */ 585 bool pos1_open = false; /* true after [ at start of regexp */ 586 bool pos2_open_not = false; /* true after [^ at start of regexp */ 587 588 for (;;) 589 { 590 c = phase1_getc (); 591 592 if (c == EOF || c == '\n') 593 { 594 phase1_ungetc (c); 595 error_with_progname = false; 596 error (0, 0, _("%s:%d: warning: unterminated regular expression"), 597 logical_file_name, line_number); 598 error_with_progname = true; 599 break; 600 } 601 else if (c == '[') 602 { 603 if (brackets == 0) 604 brackets++; 605 else 606 { 607 c = phase1_getc (); 608 if (c == ':') 609 brackets++; 610 phase1_ungetc (c); 611 } 612 if (pos0) 613 { 614 pos0 = false; 615 pos1_open = true; 616 continue; 617 } 618 } 619 else if (c == ']') 620 { 621 if (!(pos1_open || pos2_open_not)) 622 brackets--; 623 } 624 else if (c == '^') 625 { 626 if (pos1_open) 627 { 628 pos1_open = false; 629 pos2_open_not = true; 630 continue; 631 } 632 } 633 else if (c == '\\') 634 { 635 c = phase1_getc (); 636 /* Backslash-newline is valid and ignored. */ 637 } 638 else if (c == '/') 639 { 640 if (brackets <= 0) 641 break; 642 } 643 644 pos0 = false; 645 pos1_open = false; 646 pos2_open_not = false; 647 } 648 649 tp->type = token_type_other; 650 prefer_division_over_regexp = false; 651 return; 652 } 653 /* FALLTHROUGH */ 654 655 default: 656 /* We could carefully recognize each of the 2 and 3 character 657 operators, but it is not necessary, as we only need to recognize 658 gettext invocations. Don't bother. */ 659 tp->type = token_type_other; 660 prefer_division_over_regexp = false; 661 return; 662 } 663 } 664} 665 666 667/* ========================= Extracting strings. ========================== */ 668 669 670/* Context lookup table. */ 671static flag_context_list_table_ty *flag_context_list_table; 672 673 674/* The file is broken into tokens. Scan the token stream, looking for 675 a keyword, followed by a left paren, followed by a string. When we 676 see this sequence, we have something to remember. We assume we are 677 looking at a valid C or C++ program, and leave the complaints about 678 the grammar to the compiler. 679 680 Normal handling: Look for 681 keyword ( ... msgid ... ) 682 Plural handling: Look for 683 keyword ( ... msgid ... msgid_plural ... ) 684 685 We use recursion because the arguments before msgid or between msgid 686 and msgid_plural can contain subexpressions of the same form. */ 687 688 689/* Extract messages until the next balanced closing parenthesis. 690 Extracted messages are added to MLP. 691 When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and, 692 if also a plural argument shall be extracted, PLURAL_COMMAS > 0, 693 otherwise PLURAL_COMMAS = 0. 694 When no specific argument shall be extracted, COMMAS_TO_SKIP < 0. 695 Return true upon eof, false upon closing parenthesis. */ 696static bool 697extract_parenthesized (message_list_ty *mlp, 698 flag_context_ty outer_context, 699 flag_context_list_iterator_ty context_iter, 700 int commas_to_skip, int plural_commas) 701{ 702 /* Remember the message containing the msgid, for msgid_plural. */ 703 message_ty *plural_mp = NULL; 704 705 /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ 706 int state; 707 /* Parameters of the keyword just seen. Defined only in state 1. */ 708 int next_commas_to_skip = -1; 709 int next_plural_commas = 0; 710 /* Whether to implicitly assume the next tokens are arguments even without 711 a '('. */ 712 bool next_is_argument = false; 713 /* Context iterator that will be used if the next token is a '('. */ 714 flag_context_list_iterator_ty next_context_iter = 715 passthrough_context_list_iterator; 716 /* Current context. */ 717 flag_context_ty inner_context = 718 inherited_context (outer_context, 719 flag_context_list_iterator_advance (&context_iter)); 720 721 /* Start state is 0. */ 722 state = 0; 723 724 for (;;) 725 { 726 token_ty token; 727 728 x_awk_lex (&token); 729 730 if (next_is_argument && token.type != token_type_lparen) 731 { 732 /* An argument list starts, even though there is no '('. */ 733 context_iter = next_context_iter; 734 outer_context = inner_context; 735 inner_context = 736 inherited_context (outer_context, 737 flag_context_list_iterator_advance ( 738 &context_iter)); 739 } 740 741 switch (token.type) 742 { 743 case token_type_symbol: 744 { 745 void *keyword_value; 746 747 if (find_entry (&keywords, token.string, strlen (token.string), 748 &keyword_value) 749 == 0) 750 { 751 int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); 752 int argnum2 = (int) (long) keyword_value >> 10; 753 754 next_commas_to_skip = argnum1 - 1; 755 next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0); 756 state = 1; 757 } 758 else 759 state = 0; 760 } 761 next_is_argument = 762 (strcmp (token.string, "print") == 0 763 || strcmp (token.string, "printf") == 0); 764 next_context_iter = 765 flag_context_list_iterator ( 766 flag_context_list_table_lookup ( 767 flag_context_list_table, 768 token.string, strlen (token.string))); 769 free (token.string); 770 continue; 771 772 case token_type_lparen: 773 if (extract_parenthesized (mlp, inner_context, next_context_iter, 774 state ? next_commas_to_skip : -1, 775 state ? next_plural_commas : 0)) 776 return true; 777 next_is_argument = false; 778 next_context_iter = null_context_list_iterator; 779 state = 0; 780 continue; 781 782 case token_type_rparen: 783 return false; 784 785 case token_type_comma: 786 if (commas_to_skip >= 0) 787 { 788 if (commas_to_skip > 0) 789 commas_to_skip--; 790 else 791 if (plural_mp != NULL && plural_commas > 0) 792 { 793 commas_to_skip = plural_commas - 1; 794 plural_commas = 0; 795 } 796 else 797 commas_to_skip = -1; 798 } 799 inner_context = 800 inherited_context (outer_context, 801 flag_context_list_iterator_advance ( 802 &context_iter)); 803 next_is_argument = false; 804 next_context_iter = passthrough_context_list_iterator; 805 state = 0; 806 continue; 807 808 case token_type_string: 809 { 810 lex_pos_ty pos; 811 pos.file_name = logical_file_name; 812 pos.line_number = token.line_number; 813 814 if (extract_all) 815 remember_a_message (mlp, token.string, inner_context, &pos); 816 else 817 { 818 if (commas_to_skip == 0) 819 { 820 if (plural_mp == NULL) 821 { 822 /* Seen an msgid. */ 823 message_ty *mp = 824 remember_a_message (mlp, token.string, 825 inner_context, &pos); 826 if (plural_commas > 0) 827 plural_mp = mp; 828 } 829 else 830 { 831 /* Seen an msgid_plural. */ 832 remember_a_message_plural (plural_mp, token.string, 833 inner_context, &pos); 834 plural_mp = NULL; 835 } 836 } 837 else 838 free (token.string); 839 } 840 } 841 next_is_argument = false; 842 next_context_iter = null_context_list_iterator; 843 state = 0; 844 continue; 845 846 case token_type_i18nstring: 847 { 848 lex_pos_ty pos; 849 pos.file_name = logical_file_name; 850 pos.line_number = token.line_number; 851 852 remember_a_message (mlp, token.string, inner_context, &pos); 853 } 854 next_is_argument = false; 855 next_context_iter = null_context_list_iterator; 856 state = 0; 857 continue; 858 859 case token_type_semicolon: 860 /* An argument list ends, and a new statement begins. */ 861 /* FIXME: Should handle newline that acts as statement separator 862 in the same way. */ 863 /* FIXME: Instead of resetting outer_context here, it may be better 864 to recurse in the next_is_argument handling above, waiting for 865 the next semicolon or other statement terminator. */ 866 outer_context = null_context; 867 context_iter = null_context_list_iterator; 868 next_is_argument = false; 869 next_context_iter = passthrough_context_list_iterator; 870 inner_context = 871 inherited_context (outer_context, 872 flag_context_list_iterator_advance ( 873 &context_iter)); 874 state = 0; 875 continue; 876 877 case token_type_eof: 878 return true; 879 880 case token_type_other: 881 next_is_argument = false; 882 next_context_iter = null_context_list_iterator; 883 state = 0; 884 continue; 885 886 default: 887 abort (); 888 } 889 } 890} 891 892 893void 894extract_awk (FILE *f, 895 const char *real_filename, const char *logical_filename, 896 flag_context_list_table_ty *flag_table, 897 msgdomain_list_ty *mdlp) 898{ 899 message_list_ty *mlp = mdlp->item[0]->messages; 900 901 fp = f; 902 real_file_name = real_filename; 903 logical_file_name = xstrdup (logical_filename); 904 line_number = 1; 905 906 last_comment_line = -1; 907 last_non_comment_line = -1; 908 909 prefer_division_over_regexp = false; 910 911 flag_context_list_table = flag_table; 912 913 init_keywords (); 914 915 /* Eat tokens until eof is seen. When extract_parenthesized returns 916 due to an unbalanced closing parenthesis, just restart it. */ 917 while (!extract_parenthesized (mlp, null_context, null_context_list_iterator, 918 -1, 0)) 919 ; 920 921 fp = NULL; 922 real_file_name = NULL; 923 logical_file_name = NULL; 924 line_number = 0; 925} 926