1/* xgettext awk backend.
2   Copyright (C) 2002-2003, 2005-2006 Free Software Foundation, Inc.
3
4   This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19
20#ifdef HAVE_CONFIG_H
21# include "config.h"
22#endif
23
24#include <errno.h>
25#include <stdbool.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "message.h"
31#include "xgettext.h"
32#include "x-awk.h"
33#include "error.h"
34#include "error-progname.h"
35#include "xalloc.h"
36#include "exit.h"
37#include "gettext.h"
38
39#define _(s) gettext(s)
40
41
42/* The awk syntax is defined in the gawk manual page and documentation.
43   See also gawk/awkgram.y.  */
44
45
46/* ====================== Keyword set customization.  ====================== */
47
48/* If true extract all strings.  */
49static bool extract_all = false;
50
51static hash_table keywords;
52static bool default_keywords = true;
53
54
55void
56x_awk_extract_all ()
57{
58  extract_all = true;
59}
60
61
62void
63x_awk_keyword (const char *name)
64{
65  if (name == NULL)
66    default_keywords = false;
67  else
68    {
69      const char *end;
70      struct callshape shape;
71      const char *colon;
72
73      if (keywords.table == NULL)
74	hash_init (&keywords, 100);
75
76      split_keywordspec (name, &end, &shape);
77
78      /* The characters between name and end should form a valid C identifier.
79	 A colon means an invalid parse in split_keywordspec().  */
80      colon = strchr (name, ':');
81      if (colon == NULL || colon >= end)
82	insert_keyword_callshape (&keywords, name, end - name, &shape);
83    }
84}
85
86/* Finish initializing the keywords hash table.
87   Called after argument processing, before each file is processed.  */
88static void
89init_keywords ()
90{
91  if (default_keywords)
92    {
93      /* When adding new keywords here, also update the documentation in
94	 xgettext.texi!  */
95      x_awk_keyword ("dcgettext");
96      x_awk_keyword ("dcngettext:1,2");
97      default_keywords = false;
98    }
99}
100
101void
102init_flag_table_awk ()
103{
104  xgettext_record_flag ("dcgettext:1:pass-awk-format");
105  xgettext_record_flag ("dcngettext:1:pass-awk-format");
106  xgettext_record_flag ("dcngettext:2:pass-awk-format");
107  xgettext_record_flag ("printf:1:awk-format");
108}
109
110
111/* ======================== Reading of characters.  ======================== */
112
113/* Real filename, used in error messages about the input file.  */
114static const char *real_file_name;
115
116/* Logical filename and line number, used to label the extracted messages.  */
117static char *logical_file_name;
118static int line_number;
119
120/* The input file stream.  */
121static FILE *fp;
122
123/* These are for tracking whether comments count as immediately before
124   keyword.  */
125static int last_comment_line;
126static int last_non_comment_line;
127
128
129/* 1. line_number handling.  */
130
131static int
132phase1_getc ()
133{
134  int c = getc (fp);
135
136  if (c == EOF)
137    {
138      if (ferror (fp))
139	error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
140	       real_file_name);
141      return EOF;
142    }
143
144  if (c == '\n')
145    line_number++;
146
147  return c;
148}
149
150/* Supports only one pushback character.  */
151static void
152phase1_ungetc (int c)
153{
154  if (c != EOF)
155    {
156      if (c == '\n')
157	--line_number;
158
159      ungetc (c, fp);
160    }
161}
162
163
164/* 2. Replace each comment that is not inside a string literal or regular
165   expression with a newline character.  We need to remember the comment
166   for later, because it may be attached to a keyword string.  */
167
168static int
169phase2_getc ()
170{
171  static char *buffer;
172  static size_t bufmax;
173  size_t buflen;
174  int lineno;
175  int c;
176
177  c = phase1_getc ();
178  if (c == '#')
179    {
180      buflen = 0;
181      lineno = line_number;
182      for (;;)
183	{
184	  c = phase1_getc ();
185	  if (c == '\n' || c == EOF)
186	    break;
187	  /* We skip all leading white space, but not EOLs.  */
188	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
189	    {
190	      if (buflen >= bufmax)
191		{
192		  bufmax = 2 * bufmax + 10;
193		  buffer = xrealloc (buffer, bufmax);
194		}
195	      buffer[buflen++] = c;
196	    }
197	}
198      if (buflen >= bufmax)
199	{
200	  bufmax = 2 * bufmax + 10;
201	  buffer = xrealloc (buffer, bufmax);
202	}
203      buffer[buflen] = '\0';
204      savable_comment_add (buffer);
205      last_comment_line = lineno;
206    }
207  return c;
208}
209
210/* Supports only one pushback character.  */
211static void
212phase2_ungetc (int c)
213{
214  if (c != EOF)
215    phase1_ungetc (c);
216}
217
218
219/* ========================== Reading of tokens.  ========================== */
220
221
222enum token_type_ty
223{
224  token_type_eof,
225  token_type_lparen,		/* ( */
226  token_type_rparen,		/* ) */
227  token_type_comma,		/* , */
228  token_type_string,		/* "abc" */
229  token_type_i18nstring,	/* _"abc" */
230  token_type_symbol,		/* symbol, number */
231  token_type_semicolon,		/* ; */
232  token_type_other		/* regexp, misc. operator */
233};
234typedef enum token_type_ty token_type_ty;
235
236typedef struct token_ty token_ty;
237struct token_ty
238{
239  token_type_ty type;
240  char *string;		/* for token_type_{symbol,string,i18nstring} */
241  int line_number;
242};
243
244
245/* 7. Replace escape sequences within character strings with their
246   single character equivalents.  */
247
248#define P7_QUOTES (1000 + '"')
249
250static int
251phase7_getc ()
252{
253  int c;
254
255  for (;;)
256    {
257      /* Use phase 1, because phase 2 elides comments.  */
258      c = phase1_getc ();
259
260      if (c == EOF || c == '\n')
261	break;
262      if (c == '"')
263	return P7_QUOTES;
264      if (c != '\\')
265	return c;
266      c = phase1_getc ();
267      if (c == EOF)
268	break;
269      if (c != '\n')
270	switch (c)
271	  {
272	  case 'a':
273	    return '\a';
274	  case 'b':
275	    return '\b';
276	  case 'f':
277	    return '\f';
278	  case 'n':
279	    return '\n';
280	  case 'r':
281	    return '\r';
282	  case 't':
283	    return '\t';
284	  case 'v':
285	    return '\v';
286	  case '0': case '1': case '2': case '3': case '4':
287	  case '5': case '6': case '7':
288	    {
289	      int n = c - '0';
290
291	      c = phase1_getc ();
292	      if (c != EOF)
293		{
294		  if (c >= '0' && c <= '7')
295		    {
296		      n = (n << 3) + (c - '0');
297		      c = phase1_getc ();
298		      if (c != EOF)
299			{
300			  if (c >= '0' && c <= '7')
301			    n = (n << 3) + (c - '0');
302			  else
303			    phase1_ungetc (c);
304			}
305		    }
306		  else
307		    phase1_ungetc (c);
308		}
309	      return (unsigned char) n;
310	    }
311	  case 'x':
312	    {
313	      int n = 0;
314
315	      for (;;)
316		{
317		  c = phase1_getc ();
318		  if (c == EOF)
319		    break;
320		  else if (c >= '0' && c <= '9')
321		    n = (n << 4) + (c - '0');
322		  else if (c >= 'A' && c <= 'F')
323		    n = (n << 4) + (c - 'A' + 10);
324		  else if (c >= 'a' && c <= 'f')
325		    n = (n << 4) + (c - 'a' + 10);
326		  else
327		    {
328		      phase1_ungetc (c);
329		      break;
330		    }
331		}
332	      return (unsigned char) n;
333	    }
334	  default:
335	    return c;
336	  }
337    }
338
339  phase1_ungetc (c);
340  error_with_progname = false;
341  error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
342	 line_number);
343  error_with_progname = true;
344  return P7_QUOTES;
345}
346
347
348/* Free the memory pointed to by a 'struct token_ty'.  */
349static inline void
350free_token (token_ty *tp)
351{
352  switch (tp->type)
353    {
354    case token_type_string:
355    case token_type_i18nstring:
356    case token_type_symbol:
357      free (tp->string);
358      break;
359    default:
360      break;
361    }
362}
363
364
365/* Combine characters into tokens.  Discard whitespace.  */
366
367/* There is an ambiguity about '/': It can start a division operator ('/' or
368   '/=') or it can start a regular expression.  The distinction is important
369   because inside regular expressions, '#' and '"' lose its special meanings.
370   If you look at the awk grammar, you see that the operator is only allowed
371   right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
372   can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
373   So we prefer the division operator interpretation only right after
374   symbol, string, number, ')', ']', with whitespace but no newline allowed
375   in between.  */
376static bool prefer_division_over_regexp;
377
378static void
379x_awk_lex (token_ty *tp)
380{
381  static char *buffer;
382  static int bufmax;
383  int bufpos;
384  int c;
385
386  for (;;)
387    {
388      tp->line_number = line_number;
389      c = phase2_getc ();
390
391      switch (c)
392	{
393	case EOF:
394	  tp->type = token_type_eof;
395	  return;
396
397	case '\n':
398	  if (last_non_comment_line > last_comment_line)
399	    savable_comment_reset ();
400	  /* Newline is not allowed inside expressions.  It usually
401	     introduces a fresh statement.
402	     FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
403	     does *not* introduce a fresh statement.  */
404	  prefer_division_over_regexp = false;
405	  /* FALLTHROUGH */
406	case '\t':
407	case ' ':
408	  /* Ignore whitespace and comments.  */
409	  continue;
410
411	case '\\':
412	  /* Backslash ought to be immediately followed by a newline.  */
413	  continue;
414	}
415
416      last_non_comment_line = tp->line_number;
417
418      switch (c)
419	{
420	case '.':
421	  {
422	    int c2 = phase2_getc ();
423	    phase2_ungetc (c2);
424	    if (!(c2 >= '0' && c2 <= '9'))
425	      {
426
427		tp->type = token_type_other;
428		prefer_division_over_regexp = false;
429		return;
430	      }
431	  }
432	  /* FALLTHROUGH */
433	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
434	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
435	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
436	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
437	case 'Y': case 'Z':
438	case '_':
439	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
440	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
441	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
442	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
443	case 'y': case 'z':
444	case '0': case '1': case '2': case '3': case '4':
445	case '5': case '6': case '7': case '8': case '9':
446	  /* Symbol, or part of a number.  */
447	  bufpos = 0;
448	  for (;;)
449	    {
450	      if (bufpos >= bufmax)
451		{
452		  bufmax = 2 * bufmax + 10;
453		  buffer = xrealloc (buffer, bufmax);
454		}
455	      buffer[bufpos++] = c;
456	      c = phase2_getc ();
457	      switch (c)
458		{
459		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
460		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
461		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
462		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
463		case 'Y': case 'Z':
464		case '_':
465		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
466		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
467		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
468		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
469		case 'y': case 'z':
470		case '0': case '1': case '2': case '3': case '4':
471		case '5': case '6': case '7': case '8': case '9':
472		  continue;
473		default:
474		  if (bufpos == 1 && buffer[0] == '_' && c == '"')
475		    {
476		      tp->type = token_type_i18nstring;
477		      goto case_string;
478		    }
479		  phase2_ungetc (c);
480		  break;
481		}
482	      break;
483	    }
484	  if (bufpos >= bufmax)
485	    {
486	      bufmax = 2 * bufmax + 10;
487	      buffer = xrealloc (buffer, bufmax);
488	    }
489	  buffer[bufpos] = '\0';
490	  tp->string = xstrdup (buffer);
491	  tp->type = token_type_symbol;
492	  /* Most identifiers can be variable names; after them we must
493	     interpret '/' as division operator.  But for awk's builtin
494	     keywords we have three cases:
495	     (a) Must interpret '/' as division operator. "length".
496	     (b) Must interpret '/' as start of a regular expression.
497		 "do", "exit", "print", "printf", "return".
498	     (c) '/' after this keyword in invalid anyway. All others.
499	     I used the following script for the distinction.
500		for k in $awk_keywords; do
501		  echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
502		done
503	   */
504	  if (strcmp (buffer, "do") == 0
505	      || strcmp (buffer, "exit") == 0
506	      || strcmp (buffer, "print") == 0
507	      || strcmp (buffer, "printf") == 0
508	      || strcmp (buffer, "return") == 0)
509	    prefer_division_over_regexp = false;
510	  else
511	    prefer_division_over_regexp = true;
512	  return;
513
514	case '"':
515	  tp->type = token_type_string;
516	case_string:
517	  bufpos = 0;
518	  for (;;)
519	    {
520	      c = phase7_getc ();
521	      if (c == EOF || c == P7_QUOTES)
522		break;
523	      if (bufpos >= bufmax)
524		{
525		  bufmax = 2 * bufmax + 10;
526		  buffer = xrealloc (buffer, bufmax);
527		}
528	      buffer[bufpos++] = c;
529	    }
530	  if (bufpos >= bufmax)
531	    {
532	      bufmax = 2 * bufmax + 10;
533	      buffer = xrealloc (buffer, bufmax);
534	    }
535	  buffer[bufpos] = '\0';
536	  tp->string = xstrdup (buffer);
537	  prefer_division_over_regexp = true;
538	  return;
539
540	case '(':
541	  tp->type = token_type_lparen;
542	  prefer_division_over_regexp = false;
543	  return;
544
545	case ')':
546	  tp->type = token_type_rparen;
547	  prefer_division_over_regexp = true;
548	  return;
549
550	case ',':
551	  tp->type = token_type_comma;
552	  prefer_division_over_regexp = false;
553	  return;
554
555	case ';':
556	  tp->type = token_type_semicolon;
557	  prefer_division_over_regexp = false;
558	  return;
559
560	case ']':
561	  tp->type = token_type_other;
562	  prefer_division_over_regexp = true;
563	  return;
564
565	case '/':
566	  if (!prefer_division_over_regexp)
567	    {
568	      /* Regular expression.
569	         Counting brackets is non-trivial. [[] is balanced, and so is
570	         [\]]. Also, /[/]/ is balanced and ends at the third slash.
571	         Do not count [ or ] if either one is preceded by a \.
572	         A '[' should be counted if
573	          a) it is the first one so far (brackets == 0), or
574	          b) it is the '[' in '[:'.
575	         A ']' should be counted if not preceded by a \.
576	         According to POSIX, []] is how you put a ] into a set.
577	         Try to handle that too.
578	       */
579	      int brackets = 0;
580	      bool pos0 = true;		/* true at start of regexp */
581	      bool pos1_open = false;	/* true after [ at start of regexp */
582	      bool pos2_open_not = false; /* true after [^ at start of regexp */
583
584	      for (;;)
585		{
586		  c = phase1_getc ();
587
588		  if (c == EOF || c == '\n')
589		    {
590		      phase1_ungetc (c);
591		      error_with_progname = false;
592		      error (0, 0, _("%s:%d: warning: unterminated regular expression"),
593			     logical_file_name, line_number);
594		      error_with_progname = true;
595		      break;
596		    }
597		  else if (c == '[')
598		    {
599		      if (brackets == 0)
600			brackets++;
601		      else
602			{
603			  c = phase1_getc ();
604			  if (c == ':')
605			    brackets++;
606			  phase1_ungetc (c);
607			}
608		      if (pos0)
609			{
610			  pos0 = false;
611			  pos1_open = true;
612			  continue;
613			}
614		    }
615		  else if (c == ']')
616		    {
617		      if (!(pos1_open || pos2_open_not))
618			brackets--;
619		    }
620		  else if (c == '^')
621		    {
622		      if (pos1_open)
623			{
624			  pos1_open = false;
625			  pos2_open_not = true;
626			  continue;
627			}
628		    }
629		  else if (c == '\\')
630		    {
631		      c = phase1_getc ();
632		      /* Backslash-newline is valid and ignored.  */
633		    }
634		  else if (c == '/')
635		    {
636		      if (brackets <= 0)
637			break;
638		    }
639
640		  pos0 = false;
641		  pos1_open = false;
642		  pos2_open_not = false;
643		}
644
645	      tp->type = token_type_other;
646	      prefer_division_over_regexp = false;
647	      return;
648	    }
649	  /* FALLTHROUGH */
650
651	default:
652	  /* We could carefully recognize each of the 2 and 3 character
653	     operators, but it is not necessary, as we only need to recognize
654	     gettext invocations.  Don't bother.  */
655	  tp->type = token_type_other;
656	  prefer_division_over_regexp = false;
657	  return;
658	}
659    }
660}
661
662
663/* ========================= Extracting strings.  ========================== */
664
665
666/* Context lookup table.  */
667static flag_context_list_table_ty *flag_context_list_table;
668
669
670/* The file is broken into tokens.  Scan the token stream, looking for
671   a keyword, followed by a left paren, followed by a string.  When we
672   see this sequence, we have something to remember.  We assume we are
673   looking at a valid C or C++ program, and leave the complaints about
674   the grammar to the compiler.
675
676     Normal handling: Look for
677       keyword ( ... msgid ... )
678     Plural handling: Look for
679       keyword ( ... msgid ... msgid_plural ... )
680
681   We use recursion because the arguments before msgid or between msgid
682   and msgid_plural can contain subexpressions of the same form.  */
683
684
685/* Extract messages until the next balanced closing parenthesis.
686   Extracted messages are added to MLP.
687   Return true upon eof, false upon closing parenthesis.  */
688static bool
689extract_parenthesized (message_list_ty *mlp,
690		       flag_context_ty outer_context,
691		       flag_context_list_iterator_ty context_iter,
692		       struct arglist_parser *argparser)
693{
694  /* Current argument number.  */
695  int arg = 1;
696  /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
697  int state;
698  /* Parameters of the keyword just seen.  Defined only in state 1.  */
699  const struct callshapes *next_shapes = NULL;
700  /* Whether to implicitly assume the next tokens are arguments even without
701     a '('.  */
702  bool next_is_argument = false;
703  /* Context iterator that will be used if the next token is a '('.  */
704  flag_context_list_iterator_ty next_context_iter =
705    passthrough_context_list_iterator;
706  /* Current context.  */
707  flag_context_ty inner_context =
708    inherited_context (outer_context,
709		       flag_context_list_iterator_advance (&context_iter));
710
711  /* Start state is 0.  */
712  state = 0;
713
714  for (;;)
715    {
716      token_ty token;
717
718      x_awk_lex (&token);
719
720      if (next_is_argument && token.type != token_type_lparen)
721	{
722	  /* An argument list starts, even though there is no '('.  */
723	  context_iter = next_context_iter;
724	  outer_context = inner_context;
725	  inner_context =
726	    inherited_context (outer_context,
727			       flag_context_list_iterator_advance (
728				 &context_iter));
729	}
730
731      switch (token.type)
732	{
733	case token_type_symbol:
734	  {
735	    void *keyword_value;
736
737	    if (hash_find_entry (&keywords, token.string, strlen (token.string),
738				 &keyword_value)
739		== 0)
740	      {
741		next_shapes = (const struct callshapes *) keyword_value;
742		state = 1;
743	      }
744	    else
745	      state = 0;
746	  }
747	  next_is_argument =
748	    (strcmp (token.string, "print") == 0
749	     || strcmp (token.string, "printf") == 0);
750	  next_context_iter =
751	    flag_context_list_iterator (
752	      flag_context_list_table_lookup (
753		flag_context_list_table,
754		token.string, strlen (token.string)));
755	  free (token.string);
756	  continue;
757
758	case token_type_lparen:
759	  if (extract_parenthesized (mlp, inner_context, next_context_iter,
760				     arglist_parser_alloc (mlp,
761							   state ? next_shapes : NULL)))
762	    {
763	      arglist_parser_done (argparser, arg);
764	      return true;
765	    }
766	  next_is_argument = false;
767	  next_context_iter = null_context_list_iterator;
768	  state = 0;
769	  continue;
770
771	case token_type_rparen:
772	  arglist_parser_done (argparser, arg);
773	  return false;
774
775	case token_type_comma:
776	  arg++;
777	  inner_context =
778	    inherited_context (outer_context,
779			       flag_context_list_iterator_advance (
780				 &context_iter));
781	  next_is_argument = false;
782	  next_context_iter = passthrough_context_list_iterator;
783	  state = 0;
784	  continue;
785
786	case token_type_string:
787	  {
788	    lex_pos_ty pos;
789	    pos.file_name = logical_file_name;
790	    pos.line_number = token.line_number;
791
792	    if (extract_all)
793	      remember_a_message (mlp, NULL, token.string, inner_context, &pos,
794				  savable_comment);
795	    else
796	      arglist_parser_remember (argparser, arg, token.string,
797				       inner_context,
798				       pos.file_name, pos.line_number,
799				       savable_comment);
800	  }
801	  next_is_argument = false;
802	  next_context_iter = null_context_list_iterator;
803	  state = 0;
804	  continue;
805
806	case token_type_i18nstring:
807	  {
808	    lex_pos_ty pos;
809	    pos.file_name = logical_file_name;
810	    pos.line_number = token.line_number;
811
812	    remember_a_message (mlp, NULL, token.string, inner_context, &pos,
813				savable_comment);
814	  }
815	  next_is_argument = false;
816	  next_context_iter = null_context_list_iterator;
817	  state = 0;
818	  continue;
819
820	case token_type_semicolon:
821	  /* An argument list ends, and a new statement begins.  */
822	  /* FIXME: Should handle newline that acts as statement separator
823	     in the same way.  */
824	  /* FIXME: Instead of resetting outer_context here, it may be better
825	     to recurse in the next_is_argument handling above, waiting for
826	     the next semicolon or other statement terminator.  */
827	  outer_context = null_context;
828	  context_iter = null_context_list_iterator;
829	  next_is_argument = false;
830	  next_context_iter = passthrough_context_list_iterator;
831	  inner_context =
832	    inherited_context (outer_context,
833			       flag_context_list_iterator_advance (
834				 &context_iter));
835	  state = 0;
836	  continue;
837
838	case token_type_eof:
839	  arglist_parser_done (argparser, arg);
840	  return true;
841
842	case token_type_other:
843	  next_is_argument = false;
844	  next_context_iter = null_context_list_iterator;
845	  state = 0;
846	  continue;
847
848	default:
849	  abort ();
850	}
851    }
852}
853
854
855void
856extract_awk (FILE *f,
857	     const char *real_filename, const char *logical_filename,
858	     flag_context_list_table_ty *flag_table,
859	     msgdomain_list_ty *mdlp)
860{
861  message_list_ty *mlp = mdlp->item[0]->messages;
862
863  fp = f;
864  real_file_name = real_filename;
865  logical_file_name = xstrdup (logical_filename);
866  line_number = 1;
867
868  last_comment_line = -1;
869  last_non_comment_line = -1;
870
871  prefer_division_over_regexp = false;
872
873  flag_context_list_table = flag_table;
874
875  init_keywords ();
876
877  /* Eat tokens until eof is seen.  When extract_parenthesized returns
878     due to an unbalanced closing parenthesis, just restart it.  */
879  while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
880				 arglist_parser_alloc (mlp, NULL)))
881    ;
882
883  fp = NULL;
884  real_file_name = NULL;
885  logical_file_name = NULL;
886  line_number = 0;
887}
888