1/* xgettext librep backend.
2   Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc.
3
4   This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19
20#ifdef HAVE_CONFIG_H
21# include "config.h"
22#endif
23
24#include <errno.h>
25#include <stdbool.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "c-ctype.h"
31#include "message.h"
32#include "xgettext.h"
33#include "x-librep.h"
34#include "error.h"
35#include "xalloc.h"
36#include "exit.h"
37#include "hash.h"
38#include "gettext.h"
39
40#define _(s) gettext(s)
41
42
43/* Summary of librep syntax:
44   - ';' starts a comment until end of line.
45   - Block comments start with '#|' and end with '|#'.
46   - Numbers are constituted of an optional prefix (#b, #B for binary,
47     #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
48     #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
49     the digits.
50   - Characters are written as '?' followed by the character, possibly
51     with an escape sequence, for examples '?a', '?\n', '?\177'.
52   - Strings are delimited by double quotes. Backslash introduces an escape
53     sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
54     '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
55   - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
56     if preceded by backslash or enclosed in |...|.
57   - Keywords: written as #:SYMBOL.
58   - () delimit lists.
59   - [] delimit vectors.
60   The reader is implemented in librep-0.14/src/lisp.c.  */
61
62
63/* ====================== Keyword set customization.  ====================== */
64
65/* If true extract all strings.  */
66static bool extract_all = false;
67
68static hash_table keywords;
69static bool default_keywords = true;
70
71
72void
73x_librep_extract_all ()
74{
75  extract_all = true;
76}
77
78
79void
80x_librep_keyword (const char *name)
81{
82  if (name == NULL)
83    default_keywords = false;
84  else
85    {
86      const char *end;
87      struct callshape shape;
88      const char *colon;
89
90      if (keywords.table == NULL)
91	hash_init (&keywords, 100);
92
93      split_keywordspec (name, &end, &shape);
94
95      /* The characters between name and end should form a valid Lisp
96	 symbol.  */
97      colon = strchr (name, ':');
98      if (colon == NULL || colon >= end)
99	insert_keyword_callshape (&keywords, name, end - name, &shape);
100    }
101}
102
103/* Finish initializing the keywords hash table.
104   Called after argument processing, before each file is processed.  */
105static void
106init_keywords ()
107{
108  if (default_keywords)
109    {
110      /* When adding new keywords here, also update the documentation in
111	 xgettext.texi!  */
112      x_librep_keyword ("_");
113      default_keywords = false;
114    }
115}
116
117void
118init_flag_table_librep ()
119{
120  xgettext_record_flag ("_:1:pass-librep-format");
121  xgettext_record_flag ("format:2:librep-format");
122}
123
124
125/* ======================== Reading of characters.  ======================== */
126
127/* Real filename, used in error messages about the input file.  */
128static const char *real_file_name;
129
130/* Logical filename and line number, used to label the extracted messages.  */
131static char *logical_file_name;
132static int line_number;
133
134/* The input file stream.  */
135static FILE *fp;
136
137
138/* Fetch the next character from the input file.  */
139static int
140do_getc ()
141{
142  int c = getc (fp);
143
144  if (c == EOF)
145    {
146      if (ferror (fp))
147	error (EXIT_FAILURE, errno, _("\
148error while reading \"%s\""), real_file_name);
149    }
150  else if (c == '\n')
151   line_number++;
152
153  return c;
154}
155
156/* Put back the last fetched character, not EOF.  */
157static void
158do_ungetc (int c)
159{
160  if (c == '\n')
161    line_number--;
162  ungetc (c, fp);
163}
164
165
166/* ========================== Reading of tokens.  ========================== */
167
168
169/* A token consists of a sequence of characters.  */
170struct token
171{
172  int allocated;		/* number of allocated 'token_char's */
173  int charcount;		/* number of used 'token_char's */
174  char *chars;			/* the token's constituents */
175};
176
177/* Initialize a 'struct token'.  */
178static inline void
179init_token (struct token *tp)
180{
181  tp->allocated = 10;
182  tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
183  tp->charcount = 0;
184}
185
186/* Free the memory pointed to by a 'struct token'.  */
187static inline void
188free_token (struct token *tp)
189{
190  free (tp->chars);
191}
192
193/* Ensure there is enough room in the token for one more character.  */
194static inline void
195grow_token (struct token *tp)
196{
197  if (tp->charcount == tp->allocated)
198    {
199      tp->allocated *= 2;
200      tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
201    }
202}
203
204/* Read the next token.  If 'first' is given, it points to the first
205   character, which has already been read.  Returns true for a symbol,
206   false for a number.  */
207static bool
208read_token (struct token *tp, const int *first)
209{
210  int c;
211  /* Variables for speculative number parsing:  */
212  int radix = -1;
213  int nfirst = 0;
214  bool exact = true;
215  bool rational = false;
216  bool exponent = false;
217  bool had_sign = false;
218  bool expecting_prefix = false;
219
220  init_token (tp);
221
222  if (first)
223    c = *first;
224  else
225    c = do_getc ();
226
227  for (;; c = do_getc ())
228    {
229      switch (c)
230	{
231	case EOF:
232	  goto done;
233
234	case ' ': case '\t': case '\n': case '\f': case '\r':
235	case '(': case ')': case '[': case ']':
236	case '\'': case '"': case ';': case ',': case '`':
237	  goto done;
238
239	case '\\':
240	  radix = 0;
241	  c = do_getc ();
242	  if (c == EOF)
243	    /* Invalid, but be tolerant.  */
244	    break;
245	  grow_token (tp);
246	  tp->chars[tp->charcount++] = c;
247	  break;
248
249	case '|':
250	  radix = 0;
251	  for (;;)
252	    {
253	      c = do_getc ();
254	      if (c == EOF || c == '|')
255		break;
256	      grow_token (tp);
257	      tp->chars[tp->charcount++] = c;
258	    }
259	  break;
260
261	default:
262	  if (radix != 0)
263	    {
264	      if (expecting_prefix)
265		{
266		  switch (c)
267		    {
268		    case 'B': case 'b':
269		      radix = 2;
270		      break;
271		    case 'O': case 'o':
272		      radix = 8;
273		      break;
274		    case 'D': case 'd':
275		      radix = 10;
276		      break;
277		    case 'X': case 'x':
278		      radix = 16;
279		      break;
280		    case 'E': case 'e':
281		    case 'I': case 'i':
282		      break;
283		    default:
284		      radix = 0;
285		      break;
286		    }
287		  expecting_prefix = false;
288		  nfirst = tp->charcount + 1;
289		}
290	      else if (tp->charcount == nfirst
291		       && (c == '+' || c == '-' || c == '#'))
292		{
293		  if (c == '#')
294		    {
295		      if (had_sign)
296			radix = 0;
297		      else
298			expecting_prefix = true;
299		    }
300		  else
301		    had_sign = true;
302		  nfirst = tp->charcount + 1;
303		}
304	      else
305		{
306		  switch (radix)
307		    {
308		    case -1:
309		      if (c == '.')
310			{
311			  radix = 10;
312			  exact = false;
313			}
314		      else if (!(c >= '0' && c <= '9'))
315			radix = 0;
316		      else if (c == '0')
317			radix = 1;
318		      else
319			radix = 10;
320		      break;
321
322		    case 1:
323		      switch (c)
324			{
325			case 'X': case 'x':
326			  radix = 16;
327			  nfirst = tp->charcount + 1;
328			  break;
329			case '0': case '1': case '2': case '3': case '4':
330			case '5': case '6': case '7':
331			  radix = 8;
332			  nfirst = tp->charcount;
333			  break;
334			case '.': case 'E': case 'e':
335			  radix = 10;
336			  exact = false;
337			  break;
338			case '/':
339			  radix = 10;
340			  rational = true;
341			  break;
342			default:
343			  radix = 0;
344			  break;
345			}
346		      break;
347
348		    default:
349		      switch (c)
350			{
351			case '.':
352			  if (exact && radix == 10 && !rational)
353			    exact = false;
354			  else
355			    radix = 0;
356			  break;
357			case '/':
358			  if (exact && !rational)
359			    rational = true;
360			  else
361			    radix = 0;
362			  break;
363			case 'E': case 'e':
364			  if (radix == 10)
365			    {
366			      if (!rational && !exponent)
367				{
368				  exponent = true;
369				  exact = false;
370				}
371			      else
372				radix = 0;
373			      break;
374			    }
375			  /*FALLTHROUGH*/
376			default:
377			  if (exponent && (c == '+' || c == '-'))
378			    break;
379			  if ((radix <= 10
380			       && !(c >= '0' && c <= '0' + radix - 1))
381			      || (radix == 16 && !c_isxdigit (c)))
382			    radix = 0;
383			  break;
384			}
385		      break;
386		    }
387		}
388	    }
389	  else
390	    {
391	      if (c == '#')
392		goto done;
393	    }
394	  grow_token (tp);
395	  tp->chars[tp->charcount++] = c;
396	}
397    }
398 done:
399  if (c != EOF)
400    do_ungetc (c);
401  if (radix > 0 && nfirst < tp->charcount)
402    return false; /* number */
403  else
404    return true; /* symbol */
405}
406
407
408/* ========================= Accumulating comments ========================= */
409
410
411static char *buffer;
412static size_t bufmax;
413static size_t buflen;
414
415static inline void
416comment_start ()
417{
418  buflen = 0;
419}
420
421static inline void
422comment_add (int c)
423{
424  if (buflen >= bufmax)
425    {
426      bufmax = 2 * bufmax + 10;
427      buffer = xrealloc (buffer, bufmax);
428    }
429  buffer[buflen++] = c;
430}
431
432static inline void
433comment_line_end (size_t chars_to_remove)
434{
435  buflen -= chars_to_remove;
436  while (buflen >= 1
437	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
438    --buflen;
439  if (chars_to_remove == 0 && buflen >= bufmax)
440    {
441      bufmax = 2 * bufmax + 10;
442      buffer = xrealloc (buffer, bufmax);
443    }
444  buffer[buflen] = '\0';
445  savable_comment_add (buffer);
446}
447
448
449/* These are for tracking whether comments count as immediately before
450   keyword.  */
451static int last_comment_line;
452static int last_non_comment_line;
453
454
455/* ========================= Accumulating messages ========================= */
456
457
458static message_list_ty *mlp;
459
460
461/* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
462
463
464/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
465   Other objects need not to be represented precisely.  */
466enum object_type
467{
468  t_symbol,	/* symbol */
469  t_string,	/* string */
470  t_other,	/* other kind of real object */
471  t_dot,	/* '.' pseudo object */
472  t_close,	/* ')' or ']' pseudo object */
473  t_eof		/* EOF marker */
474};
475
476struct object
477{
478  enum object_type type;
479  struct token *token;		/* for t_symbol and t_string */
480  int line_number_at_start;	/* for t_string */
481};
482
483/* Free the memory pointed to by a 'struct object'.  */
484static inline void
485free_object (struct object *op)
486{
487  if (op->type == t_symbol || op->type == t_string)
488    {
489      free_token (op->token);
490      free (op->token);
491    }
492}
493
494/* Convert a t_symbol/t_string token to a char*.  */
495static char *
496string_of_object (const struct object *op)
497{
498  char *str;
499  int n;
500
501  if (!(op->type == t_symbol || op->type == t_string))
502    abort ();
503  n = op->token->charcount;
504  str = (char *) xmalloc (n + 1);
505  memcpy (str, op->token->chars, n);
506  str[n] = '\0';
507  return str;
508}
509
510/* Context lookup table.  */
511static flag_context_list_table_ty *flag_context_list_table;
512
513/* Returns the character represented by an escape sequence.  */
514static int
515do_getc_escaped (int c)
516{
517  switch (c)
518    {
519    case 'n':
520      return '\n';
521    case 'r':
522      return '\r';
523    case 'f':
524      return '\f';
525    case 't':
526      return '\t';
527    case 'v':
528      return '\v';
529    case 'a':
530      return '\a';
531    case '^':
532      c = do_getc ();
533      if (c == EOF)
534	return EOF;
535      return c & 0x1f;
536    case '0': case '1': case '2': case '3': case '4':
537    case '5': case '6': case '7':
538      {
539	int n = c - '0';
540
541	c = do_getc ();
542	if (c != EOF)
543	  {
544	    if (c >= '0' && c <= '7')
545	      {
546		n = (n << 3) + (c - '0');
547		c = do_getc ();
548		if (c != EOF)
549		  {
550		    if (c >= '0' && c <= '7')
551		      n = (n << 3) + (c - '0');
552		    else
553		      do_ungetc (c);
554		  }
555	      }
556	    else
557	      do_ungetc (c);
558	  }
559	return (unsigned char) n;
560      }
561    case 'x':
562      {
563	int n = 0;
564
565	for (;;)
566	  {
567	    c = do_getc ();
568	    if (c == EOF)
569	      break;
570	    else if (c >= '0' && c <= '9')
571	      n = (n << 4) + (c - '0');
572	    else if (c >= 'A' && c <= 'F')
573	      n = (n << 4) + (c - 'A' + 10);
574	    else if (c >= 'a' && c <= 'f')
575	      n = (n << 4) + (c - 'a' + 10);
576	    else
577	      {
578		do_ungetc (c);
579		break;
580	      }
581	  }
582	return (unsigned char) n;
583      }
584    default:
585      return c;
586    }
587}
588
589/* Read the next object.  */
590static void
591read_object (struct object *op, flag_context_ty outer_context)
592{
593  for (;;)
594    {
595      int c;
596
597      c = do_getc ();
598
599      switch (c)
600	{
601	case EOF:
602	  op->type = t_eof;
603	  return;
604
605	case '\n':
606	  /* Comments assumed to be grouped with a message must immediately
607	     precede it, with no non-whitespace token on a line between
608	     both.  */
609	  if (last_non_comment_line > last_comment_line)
610	    savable_comment_reset ();
611	  continue;
612
613	case ' ': case '\t': case '\f': case '\r':
614	  continue;
615
616	case '(':
617	  {
618	    int arg = 0;		/* Current argument number.  */
619	    flag_context_list_iterator_ty context_iter;
620	    const struct callshapes *shapes = NULL;
621	    struct arglist_parser *argparser = NULL;
622
623	    for (;; arg++)
624	      {
625		struct object inner;
626		flag_context_ty inner_context;
627
628		if (arg == 0)
629		  inner_context = null_context;
630		else
631		  inner_context =
632		    inherited_context (outer_context,
633				       flag_context_list_iterator_advance (
634					 &context_iter));
635
636		read_object (&inner, inner_context);
637
638		/* Recognize end of list.  */
639		if (inner.type == t_close)
640		  {
641		    op->type = t_other;
642		    /* Don't bother converting "()" to "NIL".  */
643		    last_non_comment_line = line_number;
644		    if (argparser != NULL)
645		      arglist_parser_done (argparser, arg);
646		    return;
647		  }
648
649		/* Dots are not allowed in every position.
650		   But be tolerant.  */
651
652		/* EOF inside list is illegal.  But be tolerant.  */
653		if (inner.type == t_eof)
654		  break;
655
656		if (arg == 0)
657		  {
658		    /* This is the function position.  */
659		    if (inner.type == t_symbol)
660		      {
661			char *symbol_name = string_of_object (&inner);
662			void *keyword_value;
663
664			if (hash_find_entry (&keywords,
665					     symbol_name, strlen (symbol_name),
666					     &keyword_value)
667			    == 0)
668			  shapes = (const struct callshapes *) keyword_value;
669
670			argparser = arglist_parser_alloc (mlp, shapes);
671
672			context_iter =
673			  flag_context_list_iterator (
674			    flag_context_list_table_lookup (
675			      flag_context_list_table,
676			      symbol_name, strlen (symbol_name)));
677
678			free (symbol_name);
679		      }
680		    else
681		      context_iter = null_context_list_iterator;
682		  }
683		else
684		  {
685		    /* These are the argument positions.  */
686		    if (argparser != NULL && inner.type == t_string)
687		      arglist_parser_remember (argparser, arg,
688					       string_of_object (&inner),
689					       inner_context,
690					       logical_file_name,
691					       inner.line_number_at_start,
692					       savable_comment);
693		  }
694
695		free_object (&inner);
696	      }
697
698	    if (argparser != NULL)
699	      arglist_parser_done (argparser, arg);
700	  }
701	  op->type = t_other;
702	  last_non_comment_line = line_number;
703	  return;
704
705	case '[':
706	  {
707	    for (;;)
708	      {
709		struct object inner;
710
711		read_object (&inner, null_context);
712
713		/* Recognize end of vector.  */
714		if (inner.type == t_close)
715		  {
716		    op->type = t_other;
717		    last_non_comment_line = line_number;
718		    return;
719		  }
720
721		/* Dots are not allowed.  But be tolerant.  */
722
723		/* EOF inside vector is illegal.  But be tolerant.  */
724		if (inner.type == t_eof)
725		  break;
726
727		free_object (&inner);
728	      }
729	  }
730	  op->type = t_other;
731	  last_non_comment_line = line_number;
732	  return;
733
734	case ')': case ']':
735	  /* Tell the caller about the end of list or vector.
736	     Unmatched closing parenthesis is illegal.  But be tolerant.  */
737	  op->type = t_close;
738	  last_non_comment_line = line_number;
739	  return;
740
741	case ',':
742	  {
743	    int c = do_getc ();
744	    /* The ,@ handling inside lists is wrong anyway, because
745	       ,@form expands to an unknown number of elements.  */
746	    if (c != EOF && c != '@')
747	      do_ungetc (c);
748	  }
749	  /*FALLTHROUGH*/
750	case '\'':
751	case '`':
752	  {
753	    struct object inner;
754
755	    read_object (&inner, null_context);
756
757	    /* Dots and EOF are not allowed here.  But be tolerant.  */
758
759	    free_object (&inner);
760
761	    op->type = t_other;
762	    last_non_comment_line = line_number;
763	    return;
764	  }
765
766	case ';':
767	  {
768	    bool all_semicolons = true;
769
770	    last_comment_line = line_number;
771	    comment_start ();
772	    for (;;)
773	      {
774		int c = do_getc ();
775		if (c == EOF || c == '\n' || c == '\f' || c == '\r')
776		  break;
777		if (c != ';')
778		  all_semicolons = false;
779		if (!all_semicolons)
780		  {
781		    /* We skip all leading white space, but not EOLs.  */
782		    if (!(buflen == 0 && (c == ' ' || c == '\t')))
783		      comment_add (c);
784		  }
785	      }
786	    comment_line_end (0);
787	    continue;
788	  }
789
790	case '"':
791	  {
792	    op->token = (struct token *) xmalloc (sizeof (struct token));
793	    init_token (op->token);
794	    op->line_number_at_start = line_number;
795	    for (;;)
796	      {
797		int c = do_getc ();
798		if (c == EOF)
799		  /* Invalid input.  Be tolerant, no error message.  */
800		  break;
801		if (c == '"')
802		  break;
803		if (c == '\\')
804		  {
805		    c = do_getc ();
806		    if (c == EOF)
807		      /* Invalid input.  Be tolerant, no error message.  */
808		      break;
809		    if (c == '\n')
810		      /* Ignore escaped newline.  */
811		      ;
812		    else
813		      {
814			c = do_getc_escaped (c);
815			if (c == EOF)
816			  /* Invalid input.  Be tolerant, no error message.  */
817			  break;
818			grow_token (op->token);
819			op->token->chars[op->token->charcount++] = c;
820		      }
821		  }
822		else
823		  {
824		    grow_token (op->token);
825		    op->token->chars[op->token->charcount++] = c;
826		  }
827	      }
828	    op->type = t_string;
829
830	    if (extract_all)
831	      {
832		lex_pos_ty pos;
833
834		pos.file_name = logical_file_name;
835		pos.line_number = op->line_number_at_start;
836		remember_a_message (mlp, NULL, string_of_object (op),
837				    null_context, &pos, savable_comment);
838	      }
839	    last_non_comment_line = line_number;
840	    return;
841	  }
842
843	case '?':
844	  c = do_getc ();
845	  if (c == EOF)
846	    /* Invalid input.  Be tolerant, no error message.  */
847	    ;
848	  else if (c == '\\')
849	    {
850	      c = do_getc ();
851	      if (c == EOF)
852		/* Invalid input.  Be tolerant, no error message.  */
853		;
854	      else
855		{
856		  c = do_getc_escaped (c);
857		  if (c == EOF)
858		    /* Invalid input.  Be tolerant, no error message.  */
859		    ;
860		}
861	    }
862	  op->type = t_other;
863	  last_non_comment_line = line_number;
864	  return;
865
866	case '#':
867	  /* Dispatch macro handling.  */
868	  c = do_getc ();
869	  if (c == EOF)
870	    /* Invalid input.  Be tolerant, no error message.  */
871	    {
872	      op->type = t_other;
873	      return;
874	    }
875
876	  switch (c)
877	    {
878	    case '!':
879	      if (ftell (fp) == 2)
880		/* Skip comment until !# */
881		{
882		  c = do_getc ();
883		  for (;;)
884		    {
885		      if (c == EOF)
886			break;
887		      if (c == '!')
888			{
889			  c = do_getc ();
890			  if (c == EOF || c == '#')
891			    break;
892			}
893		      else
894			c = do_getc ();
895		    }
896		  if (c == EOF)
897		    {
898		      /* EOF not allowed here.  But be tolerant.  */
899		      op->type = t_eof;
900		      return;
901		    }
902		  continue;
903		}
904	      /*FALLTHROUGH*/
905	    case '\'':
906	    case ':':
907	      {
908		struct object inner;
909		read_object (&inner, null_context);
910		/* Dots and EOF are not allowed here.
911		   But be tolerant.  */
912		free_object (&inner);
913		op->type = t_other;
914		last_non_comment_line = line_number;
915		return;
916	      }
917
918	    case '[':
919	    case '(':
920	      {
921		struct object inner;
922		do_ungetc (c);
923		read_object (&inner, null_context);
924		/* Dots and EOF are not allowed here.
925		   But be tolerant.  */
926		free_object (&inner);
927		op->type = t_other;
928		last_non_comment_line = line_number;
929		return;
930	      }
931
932	    case '|':
933	      {
934		int depth = 0;
935
936		comment_start ();
937		c = do_getc ();
938		for (;;)
939		  {
940		    if (c == EOF)
941		      break;
942		    if (c == '|')
943		      {
944			c = do_getc ();
945			if (c == EOF)
946			  break;
947			if (c == '#')
948			  {
949			    if (depth == 0)
950			      {
951				comment_line_end (0);
952				break;
953			      }
954			    depth--;
955			    comment_add ('|');
956			    comment_add ('#');
957			    c = do_getc ();
958			  }
959			else
960			  comment_add ('|');
961		      }
962		    else if (c == '#')
963		      {
964			c = do_getc ();
965			if (c == EOF)
966			  break;
967			comment_add ('#');
968			if (c == '|')
969			  {
970			    depth++;
971			    comment_add ('|');
972			    c = do_getc ();
973			  }
974		      }
975		    else
976		      {
977			/* We skip all leading white space.  */
978			if (!(buflen == 0 && (c == ' ' || c == '\t')))
979			  comment_add (c);
980			if (c == '\n')
981			  {
982			    comment_line_end (1);
983			    comment_start ();
984			  }
985			c = do_getc ();
986		      }
987		  }
988		if (c == EOF)
989		  {
990		    /* EOF not allowed here.  But be tolerant.  */
991		    op->type = t_eof;
992		    return;
993		  }
994		last_comment_line = line_number;
995		continue;
996	      }
997
998	    case '\\':
999	      {
1000		struct token token;
1001		int first = '\\';
1002		read_token (&token, &first);
1003		free_token (&token);
1004		op->type = t_other;
1005		last_non_comment_line = line_number;
1006		return;
1007	      }
1008
1009	    case 'T': case 't':
1010	    case 'F': case 'f':
1011	      op->type = t_other;
1012	      last_non_comment_line = line_number;
1013	      return;
1014
1015	    case 'B': case 'b':
1016	    case 'O': case 'o':
1017	    case 'D': case 'd':
1018	    case 'X': case 'x':
1019	    case 'E': case 'e':
1020	    case 'I': case 'i':
1021	      {
1022		struct token token;
1023		do_ungetc (c);
1024		c = '#';
1025		read_token (&token, &c);
1026		free_token (&token);
1027		op->type = t_other;
1028		last_non_comment_line = line_number;
1029		return;
1030	      }
1031
1032	    default:
1033	      /* Invalid input.  Be tolerant, no error message.  */
1034	      op->type = t_other;
1035	      last_non_comment_line = line_number;
1036	      return;
1037	    }
1038
1039	  /*NOTREACHED*/
1040	  abort ();
1041
1042	default:
1043	  /* Read a token.  */
1044	  {
1045	    bool symbol;
1046
1047	    op->token = (struct token *) xmalloc (sizeof (struct token));
1048	    symbol = read_token (op->token, &c);
1049	    if (op->token->charcount == 1 && op->token->chars[0] == '.')
1050	      {
1051		free_token (op->token);
1052		free (op->token);
1053		op->type = t_dot;
1054		last_non_comment_line = line_number;
1055		return;
1056	      }
1057	    if (!symbol)
1058	      {
1059		free_token (op->token);
1060		free (op->token);
1061		op->type = t_other;
1062		last_non_comment_line = line_number;
1063		return;
1064	      }
1065	    /* Distinguish between "foo" and "foo#bar".  */
1066	    c = do_getc ();
1067	    if (c == '#')
1068	      {
1069		struct token second_token;
1070
1071		free_token (op->token);
1072		free (op->token);
1073		read_token (&second_token, NULL);
1074		free_token (&second_token);
1075		op->type = t_other;
1076		last_non_comment_line = line_number;
1077		return;
1078	      }
1079	    else
1080	      {
1081		if (c != EOF)
1082		  do_ungetc (c);
1083		op->type = t_symbol;
1084		last_non_comment_line = line_number;
1085		return;
1086	      }
1087	  }
1088	}
1089    }
1090}
1091
1092
1093void
1094extract_librep (FILE *f,
1095		const char *real_filename, const char *logical_filename,
1096		flag_context_list_table_ty *flag_table,
1097		msgdomain_list_ty *mdlp)
1098{
1099  mlp = mdlp->item[0]->messages;
1100
1101  fp = f;
1102  real_file_name = real_filename;
1103  logical_file_name = xstrdup (logical_filename);
1104  line_number = 1;
1105
1106  last_comment_line = -1;
1107  last_non_comment_line = -1;
1108
1109  flag_context_list_table = flag_table;
1110
1111  init_keywords ();
1112
1113  /* Eat tokens until eof is seen.  When read_object returns
1114     due to an unbalanced closing parenthesis, just restart it.  */
1115  do
1116    {
1117      struct object toplevel_object;
1118
1119      read_object (&toplevel_object, null_context);
1120
1121      if (toplevel_object.type == t_eof)
1122	break;
1123
1124      free_object (&toplevel_object);
1125    }
1126  while (!feof (fp));
1127
1128  /* Close scanner.  */
1129  fp = NULL;
1130  real_file_name = NULL;
1131  logical_file_name = NULL;
1132  line_number = 0;
1133}
1134