1/* xgettext awk backend.
2   Copyright (C) 2002-2003, 2005-2007 Free Software Foundation, Inc.
3
4   This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
5
6   This program is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19#ifdef HAVE_CONFIG_H
20# include "config.h"
21#endif
22
23/* Specification.  */
24#include "x-awk.h"
25
26#include <errno.h>
27#include <stdbool.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include "message.h"
33#include "xgettext.h"
34#include "x-awk.h"
35#include "error.h"
36#include "error-progname.h"
37#include "xalloc.h"
38#include "gettext.h"
39
40#define _(s) gettext(s)
41
42
43/* The awk syntax is defined in the gawk manual page and documentation.
44   See also gawk/awkgram.y.  */
45
46
47/* ====================== Keyword set customization.  ====================== */
48
49/* If true extract all strings.  */
50static bool extract_all = false;
51
52static hash_table keywords;
53static bool default_keywords = true;
54
55
56void
57x_awk_extract_all ()
58{
59  extract_all = true;
60}
61
62
63void
64x_awk_keyword (const char *name)
65{
66  if (name == NULL)
67    default_keywords = false;
68  else
69    {
70      const char *end;
71      struct callshape shape;
72      const char *colon;
73
74      if (keywords.table == NULL)
75	hash_init (&keywords, 100);
76
77      split_keywordspec (name, &end, &shape);
78
79      /* The characters between name and end should form a valid C identifier.
80	 A colon means an invalid parse in split_keywordspec().  */
81      colon = strchr (name, ':');
82      if (colon == NULL || colon >= end)
83	insert_keyword_callshape (&keywords, name, end - name, &shape);
84    }
85}
86
87/* Finish initializing the keywords hash table.
88   Called after argument processing, before each file is processed.  */
89static void
90init_keywords ()
91{
92  if (default_keywords)
93    {
94      /* When adding new keywords here, also update the documentation in
95	 xgettext.texi!  */
96      x_awk_keyword ("dcgettext");
97      x_awk_keyword ("dcngettext:1,2");
98      default_keywords = false;
99    }
100}
101
102void
103init_flag_table_awk ()
104{
105  xgettext_record_flag ("dcgettext:1:pass-awk-format");
106  xgettext_record_flag ("dcngettext:1:pass-awk-format");
107  xgettext_record_flag ("dcngettext:2:pass-awk-format");
108  xgettext_record_flag ("printf:1:awk-format");
109}
110
111
112/* ======================== Reading of characters.  ======================== */
113
114/* Real filename, used in error messages about the input file.  */
115static const char *real_file_name;
116
117/* Logical filename and line number, used to label the extracted messages.  */
118static char *logical_file_name;
119static int line_number;
120
121/* The input file stream.  */
122static FILE *fp;
123
124/* These are for tracking whether comments count as immediately before
125   keyword.  */
126static int last_comment_line;
127static int last_non_comment_line;
128
129
130/* 1. line_number handling.  */
131
132static int
133phase1_getc ()
134{
135  int c = getc (fp);
136
137  if (c == EOF)
138    {
139      if (ferror (fp))
140	error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
141	       real_file_name);
142      return EOF;
143    }
144
145  if (c == '\n')
146    line_number++;
147
148  return c;
149}
150
151/* Supports only one pushback character.  */
152static void
153phase1_ungetc (int c)
154{
155  if (c != EOF)
156    {
157      if (c == '\n')
158	--line_number;
159
160      ungetc (c, fp);
161    }
162}
163
164
165/* 2. Replace each comment that is not inside a string literal or regular
166   expression with a newline character.  We need to remember the comment
167   for later, because it may be attached to a keyword string.  */
168
169static int
170phase2_getc ()
171{
172  static char *buffer;
173  static size_t bufmax;
174  size_t buflen;
175  int lineno;
176  int c;
177
178  c = phase1_getc ();
179  if (c == '#')
180    {
181      buflen = 0;
182      lineno = line_number;
183      for (;;)
184	{
185	  c = phase1_getc ();
186	  if (c == '\n' || c == EOF)
187	    break;
188	  /* We skip all leading white space, but not EOLs.  */
189	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
190	    {
191	      if (buflen >= bufmax)
192		{
193		  bufmax = 2 * bufmax + 10;
194		  buffer = xrealloc (buffer, bufmax);
195		}
196	      buffer[buflen++] = c;
197	    }
198	}
199      if (buflen >= bufmax)
200	{
201	  bufmax = 2 * bufmax + 10;
202	  buffer = xrealloc (buffer, bufmax);
203	}
204      buffer[buflen] = '\0';
205      savable_comment_add (buffer);
206      last_comment_line = lineno;
207    }
208  return c;
209}
210
211/* Supports only one pushback character.  */
212static void
213phase2_ungetc (int c)
214{
215  if (c != EOF)
216    phase1_ungetc (c);
217}
218
219
220/* ========================== Reading of tokens.  ========================== */
221
222
223enum token_type_ty
224{
225  token_type_eof,
226  token_type_lparen,		/* ( */
227  token_type_rparen,		/* ) */
228  token_type_comma,		/* , */
229  token_type_string,		/* "abc" */
230  token_type_i18nstring,	/* _"abc" */
231  token_type_symbol,		/* symbol, number */
232  token_type_semicolon,		/* ; */
233  token_type_other		/* regexp, misc. operator */
234};
235typedef enum token_type_ty token_type_ty;
236
237typedef struct token_ty token_ty;
238struct token_ty
239{
240  token_type_ty type;
241  char *string;		/* for token_type_{symbol,string,i18nstring} */
242  int line_number;
243};
244
245
246/* 7. Replace escape sequences within character strings with their
247   single character equivalents.  */
248
249#define P7_QUOTES (1000 + '"')
250
251static int
252phase7_getc ()
253{
254  int c;
255
256  for (;;)
257    {
258      /* Use phase 1, because phase 2 elides comments.  */
259      c = phase1_getc ();
260
261      if (c == EOF || c == '\n')
262	break;
263      if (c == '"')
264	return P7_QUOTES;
265      if (c != '\\')
266	return c;
267      c = phase1_getc ();
268      if (c == EOF)
269	break;
270      if (c != '\n')
271	switch (c)
272	  {
273	  case 'a':
274	    return '\a';
275	  case 'b':
276	    return '\b';
277	  case 'f':
278	    return '\f';
279	  case 'n':
280	    return '\n';
281	  case 'r':
282	    return '\r';
283	  case 't':
284	    return '\t';
285	  case 'v':
286	    return '\v';
287	  case '0': case '1': case '2': case '3': case '4':
288	  case '5': case '6': case '7':
289	    {
290	      int n = c - '0';
291
292	      c = phase1_getc ();
293	      if (c != EOF)
294		{
295		  if (c >= '0' && c <= '7')
296		    {
297		      n = (n << 3) + (c - '0');
298		      c = phase1_getc ();
299		      if (c != EOF)
300			{
301			  if (c >= '0' && c <= '7')
302			    n = (n << 3) + (c - '0');
303			  else
304			    phase1_ungetc (c);
305			}
306		    }
307		  else
308		    phase1_ungetc (c);
309		}
310	      return (unsigned char) n;
311	    }
312	  case 'x':
313	    {
314	      int n = 0;
315
316	      for (;;)
317		{
318		  c = phase1_getc ();
319		  if (c == EOF)
320		    break;
321		  else if (c >= '0' && c <= '9')
322		    n = (n << 4) + (c - '0');
323		  else if (c >= 'A' && c <= 'F')
324		    n = (n << 4) + (c - 'A' + 10);
325		  else if (c >= 'a' && c <= 'f')
326		    n = (n << 4) + (c - 'a' + 10);
327		  else
328		    {
329		      phase1_ungetc (c);
330		      break;
331		    }
332		}
333	      return (unsigned char) n;
334	    }
335	  default:
336	    return c;
337	  }
338    }
339
340  phase1_ungetc (c);
341  error_with_progname = false;
342  error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
343	 line_number);
344  error_with_progname = true;
345  return P7_QUOTES;
346}
347
348
349/* Free the memory pointed to by a 'struct token_ty'.  */
350static inline void
351free_token (token_ty *tp)
352{
353  switch (tp->type)
354    {
355    case token_type_string:
356    case token_type_i18nstring:
357    case token_type_symbol:
358      free (tp->string);
359      break;
360    default:
361      break;
362    }
363}
364
365
366/* Combine characters into tokens.  Discard whitespace.  */
367
368/* There is an ambiguity about '/': It can start a division operator ('/' or
369   '/=') or it can start a regular expression.  The distinction is important
370   because inside regular expressions, '#' and '"' lose its special meanings.
371   If you look at the awk grammar, you see that the operator is only allowed
372   right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
373   can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
374   So we prefer the division operator interpretation only right after
375   symbol, string, number, ')', ']', with whitespace but no newline allowed
376   in between.  */
377static bool prefer_division_over_regexp;
378
379static void
380x_awk_lex (token_ty *tp)
381{
382  static char *buffer;
383  static int bufmax;
384  int bufpos;
385  int c;
386
387  for (;;)
388    {
389      tp->line_number = line_number;
390      c = phase2_getc ();
391
392      switch (c)
393	{
394	case EOF:
395	  tp->type = token_type_eof;
396	  return;
397
398	case '\n':
399	  if (last_non_comment_line > last_comment_line)
400	    savable_comment_reset ();
401	  /* Newline is not allowed inside expressions.  It usually
402	     introduces a fresh statement.
403	     FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
404	     does *not* introduce a fresh statement.  */
405	  prefer_division_over_regexp = false;
406	  /* FALLTHROUGH */
407	case '\t':
408	case ' ':
409	  /* Ignore whitespace and comments.  */
410	  continue;
411
412	case '\\':
413	  /* Backslash ought to be immediately followed by a newline.  */
414	  continue;
415	}
416
417      last_non_comment_line = tp->line_number;
418
419      switch (c)
420	{
421	case '.':
422	  {
423	    int c2 = phase2_getc ();
424	    phase2_ungetc (c2);
425	    if (!(c2 >= '0' && c2 <= '9'))
426	      {
427
428		tp->type = token_type_other;
429		prefer_division_over_regexp = false;
430		return;
431	      }
432	  }
433	  /* FALLTHROUGH */
434	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
435	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
436	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
437	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
438	case 'Y': case 'Z':
439	case '_':
440	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
441	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
442	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
443	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
444	case 'y': case 'z':
445	case '0': case '1': case '2': case '3': case '4':
446	case '5': case '6': case '7': case '8': case '9':
447	  /* Symbol, or part of a number.  */
448	  bufpos = 0;
449	  for (;;)
450	    {
451	      if (bufpos >= bufmax)
452		{
453		  bufmax = 2 * bufmax + 10;
454		  buffer = xrealloc (buffer, bufmax);
455		}
456	      buffer[bufpos++] = c;
457	      c = phase2_getc ();
458	      switch (c)
459		{
460		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
461		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
462		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
463		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
464		case 'Y': case 'Z':
465		case '_':
466		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
467		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
468		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
469		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
470		case 'y': case 'z':
471		case '0': case '1': case '2': case '3': case '4':
472		case '5': case '6': case '7': case '8': case '9':
473		  continue;
474		default:
475		  if (bufpos == 1 && buffer[0] == '_' && c == '"')
476		    {
477		      tp->type = token_type_i18nstring;
478		      goto case_string;
479		    }
480		  phase2_ungetc (c);
481		  break;
482		}
483	      break;
484	    }
485	  if (bufpos >= bufmax)
486	    {
487	      bufmax = 2 * bufmax + 10;
488	      buffer = xrealloc (buffer, bufmax);
489	    }
490	  buffer[bufpos] = '\0';
491	  tp->string = xstrdup (buffer);
492	  tp->type = token_type_symbol;
493	  /* Most identifiers can be variable names; after them we must
494	     interpret '/' as division operator.  But for awk's builtin
495	     keywords we have three cases:
496	     (a) Must interpret '/' as division operator. "length".
497	     (b) Must interpret '/' as start of a regular expression.
498		 "do", "exit", "print", "printf", "return".
499	     (c) '/' after this keyword in invalid anyway. All others.
500	     I used the following script for the distinction.
501		for k in $awk_keywords; do
502		  echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
503		done
504	   */
505	  if (strcmp (buffer, "do") == 0
506	      || strcmp (buffer, "exit") == 0
507	      || strcmp (buffer, "print") == 0
508	      || strcmp (buffer, "printf") == 0
509	      || strcmp (buffer, "return") == 0)
510	    prefer_division_over_regexp = false;
511	  else
512	    prefer_division_over_regexp = true;
513	  return;
514
515	case '"':
516	  tp->type = token_type_string;
517	case_string:
518	  bufpos = 0;
519	  for (;;)
520	    {
521	      c = phase7_getc ();
522	      if (c == EOF || c == P7_QUOTES)
523		break;
524	      if (bufpos >= bufmax)
525		{
526		  bufmax = 2 * bufmax + 10;
527		  buffer = xrealloc (buffer, bufmax);
528		}
529	      buffer[bufpos++] = c;
530	    }
531	  if (bufpos >= bufmax)
532	    {
533	      bufmax = 2 * bufmax + 10;
534	      buffer = xrealloc (buffer, bufmax);
535	    }
536	  buffer[bufpos] = '\0';
537	  tp->string = xstrdup (buffer);
538	  prefer_division_over_regexp = true;
539	  return;
540
541	case '(':
542	  tp->type = token_type_lparen;
543	  prefer_division_over_regexp = false;
544	  return;
545
546	case ')':
547	  tp->type = token_type_rparen;
548	  prefer_division_over_regexp = true;
549	  return;
550
551	case ',':
552	  tp->type = token_type_comma;
553	  prefer_division_over_regexp = false;
554	  return;
555
556	case ';':
557	  tp->type = token_type_semicolon;
558	  prefer_division_over_regexp = false;
559	  return;
560
561	case ']':
562	  tp->type = token_type_other;
563	  prefer_division_over_regexp = true;
564	  return;
565
566	case '/':
567	  if (!prefer_division_over_regexp)
568	    {
569	      /* Regular expression.
570	         Counting brackets is non-trivial. [[] is balanced, and so is
571	         [\]]. Also, /[/]/ is balanced and ends at the third slash.
572	         Do not count [ or ] if either one is preceded by a \.
573	         A '[' should be counted if
574	          a) it is the first one so far (brackets == 0), or
575	          b) it is the '[' in '[:'.
576	         A ']' should be counted if not preceded by a \.
577	         According to POSIX, []] is how you put a ] into a set.
578	         Try to handle that too.
579	       */
580	      int brackets = 0;
581	      bool pos0 = true;		/* true at start of regexp */
582	      bool pos1_open = false;	/* true after [ at start of regexp */
583	      bool pos2_open_not = false; /* true after [^ at start of regexp */
584
585	      for (;;)
586		{
587		  c = phase1_getc ();
588
589		  if (c == EOF || c == '\n')
590		    {
591		      phase1_ungetc (c);
592		      error_with_progname = false;
593		      error (0, 0, _("%s:%d: warning: unterminated regular expression"),
594			     logical_file_name, line_number);
595		      error_with_progname = true;
596		      break;
597		    }
598		  else if (c == '[')
599		    {
600		      if (brackets == 0)
601			brackets++;
602		      else
603			{
604			  c = phase1_getc ();
605			  if (c == ':')
606			    brackets++;
607			  phase1_ungetc (c);
608			}
609		      if (pos0)
610			{
611			  pos0 = false;
612			  pos1_open = true;
613			  continue;
614			}
615		    }
616		  else if (c == ']')
617		    {
618		      if (!(pos1_open || pos2_open_not))
619			brackets--;
620		    }
621		  else if (c == '^')
622		    {
623		      if (pos1_open)
624			{
625			  pos1_open = false;
626			  pos2_open_not = true;
627			  continue;
628			}
629		    }
630		  else if (c == '\\')
631		    {
632		      c = phase1_getc ();
633		      /* Backslash-newline is valid and ignored.  */
634		    }
635		  else if (c == '/')
636		    {
637		      if (brackets <= 0)
638			break;
639		    }
640
641		  pos0 = false;
642		  pos1_open = false;
643		  pos2_open_not = false;
644		}
645
646	      tp->type = token_type_other;
647	      prefer_division_over_regexp = false;
648	      return;
649	    }
650	  /* FALLTHROUGH */
651
652	default:
653	  /* We could carefully recognize each of the 2 and 3 character
654	     operators, but it is not necessary, as we only need to recognize
655	     gettext invocations.  Don't bother.  */
656	  tp->type = token_type_other;
657	  prefer_division_over_regexp = false;
658	  return;
659	}
660    }
661}
662
663
664/* ========================= Extracting strings.  ========================== */
665
666
667/* Context lookup table.  */
668static flag_context_list_table_ty *flag_context_list_table;
669
670
671/* The file is broken into tokens.  Scan the token stream, looking for
672   a keyword, followed by a left paren, followed by a string.  When we
673   see this sequence, we have something to remember.  We assume we are
674   looking at a valid C or C++ program, and leave the complaints about
675   the grammar to the compiler.
676
677     Normal handling: Look for
678       keyword ( ... msgid ... )
679     Plural handling: Look for
680       keyword ( ... msgid ... msgid_plural ... )
681
682   We use recursion because the arguments before msgid or between msgid
683   and msgid_plural can contain subexpressions of the same form.  */
684
685
686/* Extract messages until the next balanced closing parenthesis.
687   Extracted messages are added to MLP.
688   Return true upon eof, false upon closing parenthesis.  */
689static bool
690extract_parenthesized (message_list_ty *mlp,
691		       flag_context_ty outer_context,
692		       flag_context_list_iterator_ty context_iter,
693		       struct arglist_parser *argparser)
694{
695  /* Current argument number.  */
696  int arg = 1;
697  /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
698  int state;
699  /* Parameters of the keyword just seen.  Defined only in state 1.  */
700  const struct callshapes *next_shapes = NULL;
701  /* Whether to implicitly assume the next tokens are arguments even without
702     a '('.  */
703  bool next_is_argument = false;
704  /* Context iterator that will be used if the next token is a '('.  */
705  flag_context_list_iterator_ty next_context_iter =
706    passthrough_context_list_iterator;
707  /* Current context.  */
708  flag_context_ty inner_context =
709    inherited_context (outer_context,
710		       flag_context_list_iterator_advance (&context_iter));
711
712  /* Start state is 0.  */
713  state = 0;
714
715  for (;;)
716    {
717      token_ty token;
718
719      x_awk_lex (&token);
720
721      if (next_is_argument && token.type != token_type_lparen)
722	{
723	  /* An argument list starts, even though there is no '('.  */
724	  context_iter = next_context_iter;
725	  outer_context = inner_context;
726	  inner_context =
727	    inherited_context (outer_context,
728			       flag_context_list_iterator_advance (
729				 &context_iter));
730	}
731
732      switch (token.type)
733	{
734	case token_type_symbol:
735	  {
736	    void *keyword_value;
737
738	    if (hash_find_entry (&keywords, token.string, strlen (token.string),
739				 &keyword_value)
740		== 0)
741	      {
742		next_shapes = (const struct callshapes *) keyword_value;
743		state = 1;
744	      }
745	    else
746	      state = 0;
747	  }
748	  next_is_argument =
749	    (strcmp (token.string, "print") == 0
750	     || strcmp (token.string, "printf") == 0);
751	  next_context_iter =
752	    flag_context_list_iterator (
753	      flag_context_list_table_lookup (
754		flag_context_list_table,
755		token.string, strlen (token.string)));
756	  free (token.string);
757	  continue;
758
759	case token_type_lparen:
760	  if (extract_parenthesized (mlp, inner_context, next_context_iter,
761				     arglist_parser_alloc (mlp,
762							   state ? next_shapes : NULL)))
763	    {
764	      arglist_parser_done (argparser, arg);
765	      return true;
766	    }
767	  next_is_argument = false;
768	  next_context_iter = null_context_list_iterator;
769	  state = 0;
770	  continue;
771
772	case token_type_rparen:
773	  arglist_parser_done (argparser, arg);
774	  return false;
775
776	case token_type_comma:
777	  arg++;
778	  inner_context =
779	    inherited_context (outer_context,
780			       flag_context_list_iterator_advance (
781				 &context_iter));
782	  next_is_argument = false;
783	  next_context_iter = passthrough_context_list_iterator;
784	  state = 0;
785	  continue;
786
787	case token_type_string:
788	  {
789	    lex_pos_ty pos;
790	    pos.file_name = logical_file_name;
791	    pos.line_number = token.line_number;
792
793	    if (extract_all)
794	      remember_a_message (mlp, NULL, token.string, inner_context, &pos,
795				  savable_comment);
796	    else
797	      arglist_parser_remember (argparser, arg, token.string,
798				       inner_context,
799				       pos.file_name, pos.line_number,
800				       savable_comment);
801	  }
802	  next_is_argument = false;
803	  next_context_iter = null_context_list_iterator;
804	  state = 0;
805	  continue;
806
807	case token_type_i18nstring:
808	  {
809	    lex_pos_ty pos;
810	    pos.file_name = logical_file_name;
811	    pos.line_number = token.line_number;
812
813	    remember_a_message (mlp, NULL, token.string, inner_context, &pos,
814				savable_comment);
815	  }
816	  next_is_argument = false;
817	  next_context_iter = null_context_list_iterator;
818	  state = 0;
819	  continue;
820
821	case token_type_semicolon:
822	  /* An argument list ends, and a new statement begins.  */
823	  /* FIXME: Should handle newline that acts as statement separator
824	     in the same way.  */
825	  /* FIXME: Instead of resetting outer_context here, it may be better
826	     to recurse in the next_is_argument handling above, waiting for
827	     the next semicolon or other statement terminator.  */
828	  outer_context = null_context;
829	  context_iter = null_context_list_iterator;
830	  next_is_argument = false;
831	  next_context_iter = passthrough_context_list_iterator;
832	  inner_context =
833	    inherited_context (outer_context,
834			       flag_context_list_iterator_advance (
835				 &context_iter));
836	  state = 0;
837	  continue;
838
839	case token_type_eof:
840	  arglist_parser_done (argparser, arg);
841	  return true;
842
843	case token_type_other:
844	  next_is_argument = false;
845	  next_context_iter = null_context_list_iterator;
846	  state = 0;
847	  continue;
848
849	default:
850	  abort ();
851	}
852    }
853}
854
855
856void
857extract_awk (FILE *f,
858	     const char *real_filename, const char *logical_filename,
859	     flag_context_list_table_ty *flag_table,
860	     msgdomain_list_ty *mdlp)
861{
862  message_list_ty *mlp = mdlp->item[0]->messages;
863
864  fp = f;
865  real_file_name = real_filename;
866  logical_file_name = xstrdup (logical_filename);
867  line_number = 1;
868
869  last_comment_line = -1;
870  last_non_comment_line = -1;
871
872  prefer_division_over_regexp = false;
873
874  flag_context_list_table = flag_table;
875
876  init_keywords ();
877
878  /* Eat tokens until eof is seen.  When extract_parenthesized returns
879     due to an unbalanced closing parenthesis, just restart it.  */
880  while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
881				 arglist_parser_alloc (mlp, NULL)))
882    ;
883
884  fp = NULL;
885  real_file_name = NULL;
886  logical_file_name = NULL;
887  line_number = 0;
888}
889