• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/src/
1/* xgettext Emacs Lisp backend.
2   Copyright (C) 2001-2003, 2005-2007 Free Software Foundation, Inc.
3
4   This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
5
6   This program is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19#ifdef HAVE_CONFIG_H
20# include "config.h"
21#endif
22
23/* Specification.  */
24#include "x-elisp.h"
25
26#include <errno.h>
27#include <stdbool.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include "message.h"
33#include "xgettext.h"
34#include "x-elisp.h"
35#include "error.h"
36#include "xalloc.h"
37#include "hash.h"
38#include "c-ctype.h"
39#include "gettext.h"
40
41#define _(s) gettext(s)
42
43
44/* Summary of Emacs Lisp syntax:
45   - ';' starts a comment until end of line.
46   - '#@nn' starts a comment of nn bytes.
47   - Integers are constituted of an optional prefix (#b, #B for binary,
48     #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
49     an optional sign (+ or -), the digits, and an optional trailing dot.
50   - Characters are written as '?' followed by the character, possibly
51     with an escape sequence, for examples '?a', '?\n', '?\177'.
52   - Strings are delimited by double quotes. Backslash introduces an escape
53     sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
54     '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
55   - Symbols: can contain meta-characters if preceded by backslash.
56   - Uninterned symbols: written as #:SYMBOL.
57   - () delimit lists.
58   - [] delimit vectors.
59   The reader is implemented in emacs-21.1/src/lread.c.  */
60
61
62/* ====================== Keyword set customization.  ====================== */
63
64/* If true extract all strings.  */
65static bool extract_all = false;
66
67static hash_table keywords;
68static bool default_keywords = true;
69
70
71void
72x_elisp_extract_all ()
73{
74  extract_all = true;
75}
76
77
78void
79x_elisp_keyword (const char *name)
80{
81  if (name == NULL)
82    default_keywords = false;
83  else
84    {
85      const char *end;
86      struct callshape shape;
87      const char *colon;
88
89      if (keywords.table == NULL)
90	hash_init (&keywords, 100);
91
92      split_keywordspec (name, &end, &shape);
93
94      /* The characters between name and end should form a valid Lisp
95	 symbol.  */
96      colon = strchr (name, ':');
97      if (colon == NULL || colon >= end)
98	insert_keyword_callshape (&keywords, name, end - name, &shape);
99    }
100}
101
102/* Finish initializing the keywords hash table.
103   Called after argument processing, before each file is processed.  */
104static void
105init_keywords ()
106{
107  if (default_keywords)
108    {
109      /* When adding new keywords here, also update the documentation in
110	 xgettext.texi!  */
111      x_elisp_keyword ("_");
112      default_keywords = false;
113    }
114}
115
116void
117init_flag_table_elisp ()
118{
119  xgettext_record_flag ("_:1:pass-elisp-format");
120  xgettext_record_flag ("format:1:elisp-format");
121}
122
123
124/* ======================== Reading of characters.  ======================== */
125
126/* Real filename, used in error messages about the input file.  */
127static const char *real_file_name;
128
129/* Logical filename and line number, used to label the extracted messages.  */
130static char *logical_file_name;
131static int line_number;
132
133/* The input file stream.  */
134static FILE *fp;
135
136
137/* Fetch the next character from the input file.  */
138static int
139do_getc ()
140{
141  int c = getc (fp);
142
143  if (c == EOF)
144    {
145      if (ferror (fp))
146	error (EXIT_FAILURE, errno, _("\
147error while reading \"%s\""), real_file_name);
148    }
149  else if (c == '\n')
150   line_number++;
151
152  return c;
153}
154
155/* Put back the last fetched character, not EOF.  */
156static void
157do_ungetc (int c)
158{
159  if (c == '\n')
160    line_number--;
161  ungetc (c, fp);
162}
163
164
165/* ========================== Reading of tokens.  ========================== */
166
167
168/* A token consists of a sequence of characters.  */
169struct token
170{
171  int allocated;		/* number of allocated 'token_char's */
172  int charcount;		/* number of used 'token_char's */
173  char *chars;			/* the token's constituents */
174};
175
176/* Initialize a 'struct token'.  */
177static inline void
178init_token (struct token *tp)
179{
180  tp->allocated = 10;
181  tp->chars = XNMALLOC (tp->allocated, char);
182  tp->charcount = 0;
183}
184
185/* Free the memory pointed to by a 'struct token'.  */
186static inline void
187free_token (struct token *tp)
188{
189  free (tp->chars);
190}
191
192/* Ensure there is enough room in the token for one more character.  */
193static inline void
194grow_token (struct token *tp)
195{
196  if (tp->charcount == tp->allocated)
197    {
198      tp->allocated *= 2;
199      tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
200    }
201}
202
203/* Test whether a token has integer syntax.  */
204static inline bool
205is_integer (const char *p)
206{
207  /* NB: Yes, '+.' and '-.' both designate the integer 0.  */
208  const char *p_start = p;
209
210  if (*p == '+' || *p == '-')
211    p++;
212  if (*p == '\0')
213    return false;
214  while (*p >= '0' && *p <= '9')
215    p++;
216  if (p > p_start && *p == '.')
217    p++;
218  return (*p == '\0');
219}
220
221/* Test whether a token has float syntax.  */
222static inline bool
223is_float (const char *p)
224{
225  enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
226  int state;
227
228  state = 0;
229  if (*p == '+' || *p == '-')
230    p++;
231  if (*p >= '0' && *p <= '9')
232    {
233      state |= LEAD_INT;
234      do
235	p++;
236      while (*p >= '0' && *p <= '9');
237    }
238  if (*p == '.')
239    {
240      state |= DOT_CHAR;
241      p++;
242    }
243  if (*p >= '0' && *p <= '9')
244    {
245      state |= TRAIL_INT;
246      do
247	p++;
248      while (*p >= '0' && *p <= '9');
249    }
250  if (*p == 'e' || *p == 'E')
251    {
252      state |= E_CHAR;
253      p++;
254      if (*p == '+' || *p == '-')
255	p++;
256      if (*p >= '0' && *p <= '9')
257	{
258	  state |= EXP_INT;
259	  do
260	    p++;
261	  while (*p >= '0' && *p <= '9');
262	}
263      else if (p[-1] == '+'
264	       && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
265		   || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
266	{
267	  state |= EXP_INT;
268	  p += 3;
269	}
270    }
271  return (*p == '\0')
272	 && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
273	     || state == (DOT_CHAR | TRAIL_INT)
274	     || state == (LEAD_INT | E_CHAR | EXP_INT)
275	     || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
276	     || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
277}
278
279/* Read the next token.  'first' is the first character, which has already
280   been read.  Returns true for a symbol, false for a number.  */
281static bool
282read_token (struct token *tp, int first)
283{
284  int c;
285  bool quoted = false;
286
287  init_token (tp);
288
289  c = first;
290
291  for (;; c = do_getc ())
292    {
293      if (c == EOF)
294	break;
295      if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
296	break;
297      if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
298	  || c == '[' || c == ']' || c == '#')
299	break;
300      if (c == '\\')
301	{
302	  quoted = true;
303	  c = do_getc ();
304	  if (c == EOF)
305	    /* Invalid, but be tolerant.  */
306	    break;
307	}
308      grow_token (tp);
309      tp->chars[tp->charcount++] = c;
310    }
311  if (c != EOF)
312    do_ungetc (c);
313
314  if (quoted)
315    return true; /* symbol */
316
317  /* Add a NUL byte at the end, for is_integer and is_float.  */
318  grow_token (tp);
319  tp->chars[tp->charcount] = '\0';
320
321  if (is_integer (tp->chars) || is_float (tp->chars))
322    return false; /* number */
323  else
324    return true; /* symbol */
325}
326
327
328/* ========================= Accumulating comments ========================= */
329
330
331static char *buffer;
332static size_t bufmax;
333static size_t buflen;
334
335static inline void
336comment_start ()
337{
338  buflen = 0;
339}
340
341static inline void
342comment_add (int c)
343{
344  if (buflen >= bufmax)
345    {
346      bufmax = 2 * bufmax + 10;
347      buffer = xrealloc (buffer, bufmax);
348    }
349  buffer[buflen++] = c;
350}
351
352static inline void
353comment_line_end (size_t chars_to_remove)
354{
355  buflen -= chars_to_remove;
356  while (buflen >= 1
357	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
358    --buflen;
359  if (chars_to_remove == 0 && buflen >= bufmax)
360    {
361      bufmax = 2 * bufmax + 10;
362      buffer = xrealloc (buffer, bufmax);
363    }
364  buffer[buflen] = '\0';
365  savable_comment_add (buffer);
366}
367
368
369/* These are for tracking whether comments count as immediately before
370   keyword.  */
371static int last_comment_line;
372static int last_non_comment_line;
373
374
375/* ========================= Accumulating messages ========================= */
376
377
378static message_list_ty *mlp;
379
380
381/* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
382
383
384/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
385   Other objects need not to be represented precisely.  */
386enum object_type
387{
388  t_symbol,	/* symbol */
389  t_string,	/* string */
390  t_other,	/* other kind of real object */
391  t_dot,	/* '.' pseudo object */
392  t_listclose,	/* ')' pseudo object */
393  t_vectorclose,/* ']' pseudo object */
394  t_eof		/* EOF marker */
395};
396
397struct object
398{
399  enum object_type type;
400  struct token *token;		/* for t_symbol and t_string */
401  int line_number_at_start;	/* for t_string */
402};
403
404/* Free the memory pointed to by a 'struct object'.  */
405static inline void
406free_object (struct object *op)
407{
408  if (op->type == t_symbol || op->type == t_string)
409    {
410      free_token (op->token);
411      free (op->token);
412    }
413}
414
415/* Convert a t_symbol/t_string token to a char*.  */
416static char *
417string_of_object (const struct object *op)
418{
419  char *str;
420  int n;
421
422  if (!(op->type == t_symbol || op->type == t_string))
423    abort ();
424  n = op->token->charcount;
425  str = XNMALLOC (n + 1, char);
426  memcpy (str, op->token->chars, n);
427  str[n] = '\0';
428  return str;
429}
430
431/* Context lookup table.  */
432static flag_context_list_table_ty *flag_context_list_table;
433
434/* Returns the character represented by an escape sequence.  */
435#define IGNORABLE_ESCAPE (EOF - 1)
436static int
437do_getc_escaped (int c, bool in_string)
438{
439  switch (c)
440    {
441    case 'a':
442      return '\a';
443    case 'b':
444      return '\b';
445    case 'd':
446      return 0x7F;
447    case 'e':
448      return 0x1B;
449    case 'f':
450      return '\f';
451    case 'n':
452      return '\n';
453    case 'r':
454      return '\r';
455    case 't':
456      return '\t';
457    case 'v':
458      return '\v';
459
460    case '\n':
461      return IGNORABLE_ESCAPE;
462
463    case ' ':
464      return (in_string ? IGNORABLE_ESCAPE : ' ');
465
466    case 'M': /* meta */
467      c = do_getc ();
468      if (c == EOF)
469	return EOF;
470      if (c != '-')
471	/* Invalid input.  But be tolerant.  */
472	return c;
473      c = do_getc ();
474      if (c == EOF)
475	return EOF;
476      if (c == '\\')
477	{
478	  c = do_getc ();
479	  if (c == EOF)
480	    return EOF;
481	  c = do_getc_escaped (c, false);
482	}
483      return c | 0x80;
484
485    case 'S': /* shift */
486      c = do_getc ();
487      if (c == EOF)
488	return EOF;
489      if (c != '-')
490	/* Invalid input.  But be tolerant.  */
491	return c;
492      c = do_getc ();
493      if (c == EOF)
494	return EOF;
495      if (c == '\\')
496	{
497	  c = do_getc ();
498	  if (c == EOF)
499	    return EOF;
500	  c = do_getc_escaped (c, false);
501	}
502      return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
503
504    case 'H': /* hyper */
505    case 'A': /* alt */
506    case 's': /* super */
507      c = do_getc ();
508      if (c == EOF)
509	return EOF;
510      if (c != '-')
511	/* Invalid input.  But be tolerant.  */
512	return c;
513      c = do_getc ();
514      if (c == EOF)
515	return EOF;
516      if (c == '\\')
517	{
518	  c = do_getc ();
519	  if (c == EOF)
520	    return EOF;
521	  c = do_getc_escaped (c, false);
522	}
523      return c;
524
525    case 'C': /* ctrl */
526      c = do_getc ();
527      if (c == EOF)
528	return EOF;
529      if (c != '-')
530	/* Invalid input.  But be tolerant.  */
531	return c;
532      /*FALLTHROUGH*/
533    case '^':
534      c = do_getc ();
535      if (c == EOF)
536	return EOF;
537      if (c == '\\')
538	{
539	  c = do_getc ();
540	  if (c == EOF)
541	    return EOF;
542	  c = do_getc_escaped (c, false);
543	}
544      if (c == '?')
545	return 0x7F;
546      if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
547	return c & 0x9F;
548      if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
549	return c & 0x9F;
550#if 0 /* We cannot handle NUL bytes in strings.  */
551      if (c == ' ')
552	return 0x00;
553#endif
554      return c;
555
556    case '0': case '1': case '2': case '3': case '4':
557    case '5': case '6': case '7':
558      /* An octal escape, as in ANSI C.  */
559      {
560	int n = c - '0';
561
562	c = do_getc ();
563	if (c != EOF)
564	  {
565	    if (c >= '0' && c <= '7')
566	      {
567		n = (n << 3) + (c - '0');
568		c = do_getc ();
569		if (c != EOF)
570		  {
571		    if (c >= '0' && c <= '7')
572		      n = (n << 3) + (c - '0');
573		    else
574		      do_ungetc (c);
575		  }
576	      }
577	    else
578	      do_ungetc (c);
579	  }
580	return (unsigned char) n;
581      }
582
583    case 'x':
584      /* A hexadecimal escape, as in ANSI C.  */
585      {
586	int n = 0;
587
588	for (;;)
589	  {
590	    c = do_getc ();
591	    if (c == EOF)
592	      break;
593	    else if (c >= '0' && c <= '9')
594	      n = (n << 4) + (c - '0');
595	    else if (c >= 'A' && c <= 'F')
596	      n = (n << 4) + (c - 'A' + 10);
597	    else if (c >= 'a' && c <= 'f')
598	      n = (n << 4) + (c - 'a' + 10);
599	    else
600	      {
601		do_ungetc (c);
602		break;
603	      }
604	  }
605	return (unsigned char) n;
606      }
607
608    default:
609      /* Ignore Emacs multibyte character stuff.  All the strings we are
610	 interested in are ASCII strings.  */
611      return c;
612    }
613}
614
615/* Read the next object.
616   'first_in_list' and 'new_backquote_flag' are used for reading old
617   backquote syntax and new backquote syntax.  */
618static void
619read_object (struct object *op, bool first_in_list, bool new_backquote_flag,
620	     flag_context_ty outer_context)
621{
622  for (;;)
623    {
624      int c;
625
626      c = do_getc ();
627
628      switch (c)
629	{
630	case EOF:
631	  op->type = t_eof;
632	  return;
633
634	case '\n':
635	  /* Comments assumed to be grouped with a message must immediately
636	     precede it, with no non-whitespace token on a line between
637	     both.  */
638	  if (last_non_comment_line > last_comment_line)
639	    savable_comment_reset ();
640	  continue;
641
642	case '(':
643	  {
644	    int arg = 0;		/* Current argument number.  */
645	    flag_context_list_iterator_ty context_iter;
646	    const struct callshapes *shapes = NULL;
647	    struct arglist_parser *argparser = NULL;
648
649	    for (;; arg++)
650	      {
651		struct object inner;
652		flag_context_ty inner_context;
653
654		if (arg == 0)
655		  inner_context = null_context;
656		else
657		  inner_context =
658		    inherited_context (outer_context,
659				       flag_context_list_iterator_advance (
660					 &context_iter));
661
662		read_object (&inner, arg == 0, new_backquote_flag,
663			     inner_context);
664
665		/* Recognize end of list.  */
666		if (inner.type == t_listclose)
667		  {
668		    op->type = t_other;
669		    /* Don't bother converting "()" to "NIL".  */
670		    last_non_comment_line = line_number;
671		    if (argparser != NULL)
672		      arglist_parser_done (argparser, arg);
673		    return;
674		  }
675
676		/* Dots are not allowed in every position. ']' is not allowed.
677		   But be tolerant.  */
678
679		/* EOF inside list is illegal.  But be tolerant.  */
680		if (inner.type == t_eof)
681		  break;
682
683		if (arg == 0)
684		  {
685		    /* This is the function position.  */
686		    if (inner.type == t_symbol)
687		      {
688			char *symbol_name = string_of_object (&inner);
689			void *keyword_value;
690
691			if (hash_find_entry (&keywords,
692					     symbol_name, strlen (symbol_name),
693					     &keyword_value)
694			    == 0)
695			  shapes = (const struct callshapes *) keyword_value;
696
697			argparser = arglist_parser_alloc (mlp, shapes);
698
699			context_iter =
700			  flag_context_list_iterator (
701			    flag_context_list_table_lookup (
702			      flag_context_list_table,
703			      symbol_name, strlen (symbol_name)));
704
705			free (symbol_name);
706		      }
707		    else
708		      context_iter = null_context_list_iterator;
709		  }
710		else
711		  {
712		    /* These are the argument positions.  */
713		    if (argparser != NULL && inner.type == t_string)
714		      arglist_parser_remember (argparser, arg,
715					       string_of_object (&inner),
716					       inner_context,
717					       logical_file_name,
718					       inner.line_number_at_start,
719					       savable_comment);
720		  }
721
722		free_object (&inner);
723	      }
724
725	    if (argparser != NULL)
726	      arglist_parser_done (argparser, arg);
727	  }
728	  op->type = t_other;
729	  last_non_comment_line = line_number;
730	  return;
731
732	case ')':
733	  /* Tell the caller about the end of list.
734	     Unmatched closing parenthesis is illegal.  But be tolerant.  */
735	  op->type = t_listclose;
736	  last_non_comment_line = line_number;
737	  return;
738
739	case '[':
740	  {
741	    for (;;)
742	      {
743		struct object inner;
744
745		read_object (&inner, false, new_backquote_flag, null_context);
746
747		/* Recognize end of vector.  */
748		if (inner.type == t_vectorclose)
749		  {
750		    op->type = t_other;
751		    last_non_comment_line = line_number;
752		    return;
753		  }
754
755		/* Dots and ')' are not allowed.  But be tolerant.  */
756
757		/* EOF inside vector is illegal.  But be tolerant.  */
758		if (inner.type == t_eof)
759		  break;
760
761		free_object (&inner);
762	      }
763	  }
764	  op->type = t_other;
765	  last_non_comment_line = line_number;
766	  return;
767
768	case ']':
769	  /* Tell the caller about the end of vector.
770	     Unmatched closing bracket is illegal.  But be tolerant.  */
771	  op->type = t_vectorclose;
772	  last_non_comment_line = line_number;
773	  return;
774
775	case '\'':
776	  {
777	    struct object inner;
778
779	    read_object (&inner, false, new_backquote_flag, null_context);
780
781	    /* Dots and EOF are not allowed here.  But be tolerant.  */
782
783	    free_object (&inner);
784
785	    op->type = t_other;
786	    last_non_comment_line = line_number;
787	    return;
788	  }
789
790	case '`':
791	  if (first_in_list)
792	    goto default_label;
793	  {
794	    struct object inner;
795
796	    read_object (&inner, false, true, null_context);
797
798	    /* Dots and EOF are not allowed here.  But be tolerant.  */
799
800	    free_object (&inner);
801
802	    op->type = t_other;
803	    last_non_comment_line = line_number;
804	    return;
805	  }
806
807	case ',':
808	  if (!new_backquote_flag)
809	    goto default_label;
810	  {
811	    int c = do_getc ();
812	    /* The ,@ handling inside lists is wrong anyway, because
813	       ,@form expands to an unknown number of elements.  */
814	    if (c != EOF && c != '@' && c != '.')
815	      do_ungetc (c);
816	  }
817	  {
818	    struct object inner;
819
820	    read_object (&inner, false, false, null_context);
821
822	    /* Dots and EOF are not allowed here.  But be tolerant.  */
823
824	    free_object (&inner);
825
826	    op->type = t_other;
827	    last_non_comment_line = line_number;
828	    return;
829	  }
830
831	case ';':
832	  {
833	    bool all_semicolons = true;
834
835	    last_comment_line = line_number;
836	    comment_start ();
837	    for (;;)
838	      {
839		int c = do_getc ();
840		if (c == EOF || c == '\n')
841		  break;
842		if (c != ';')
843		  all_semicolons = false;
844		if (!all_semicolons)
845		  {
846		    /* We skip all leading white space, but not EOLs.  */
847		    if (!(buflen == 0 && (c == ' ' || c == '\t')))
848		      comment_add (c);
849		  }
850	      }
851	    comment_line_end (0);
852	    continue;
853	  }
854
855	case '"':
856	  {
857	    op->token = XMALLOC (struct token);
858	    init_token (op->token);
859	    op->line_number_at_start = line_number;
860	    for (;;)
861	      {
862		int c = do_getc ();
863		if (c == EOF)
864		  /* Invalid input.  Be tolerant, no error message.  */
865		  break;
866		if (c == '"')
867		  break;
868		if (c == '\\')
869		  {
870		    c = do_getc ();
871		    if (c == EOF)
872		      /* Invalid input.  Be tolerant, no error message.  */
873		      break;
874		    c = do_getc_escaped (c, true);
875		    if (c == EOF)
876		      /* Invalid input.  Be tolerant, no error message.  */
877		      break;
878		    if (c == IGNORABLE_ESCAPE)
879		      /* Ignore escaped newline and escaped space.  */
880		      ;
881		    else
882		      {
883			grow_token (op->token);
884			op->token->chars[op->token->charcount++] = c;
885		      }
886		  }
887		else
888		  {
889		    grow_token (op->token);
890		    op->token->chars[op->token->charcount++] = c;
891		  }
892	      }
893	    op->type = t_string;
894
895	    if (extract_all)
896	      {
897		lex_pos_ty pos;
898
899		pos.file_name = logical_file_name;
900		pos.line_number = op->line_number_at_start;
901		remember_a_message (mlp, NULL, string_of_object (op),
902				    null_context, &pos, savable_comment);
903	      }
904	    last_non_comment_line = line_number;
905	    return;
906	  }
907
908	case '?':
909	  c = do_getc ();
910	  if (c == EOF)
911	    /* Invalid input.  Be tolerant, no error message.  */
912	    ;
913	  else if (c == '\\')
914	    {
915	      c = do_getc ();
916	      if (c == EOF)
917		/* Invalid input.  Be tolerant, no error message.  */
918		;
919	      else
920		{
921		  c = do_getc_escaped (c, false);
922		  if (c == EOF)
923		    /* Invalid input.  Be tolerant, no error message.  */
924		    ;
925		}
926	    }
927	  /* Impossible to deal with Emacs multibyte character stuff here.  */
928	  op->type = t_other;
929	  last_non_comment_line = line_number;
930	  return;
931
932	case '#':
933	  /* Dispatch macro handling.  */
934	  c = do_getc ();
935	  if (c == EOF)
936	    /* Invalid input.  Be tolerant, no error message.  */
937	    {
938	      op->type = t_other;
939	      return;
940	    }
941
942	  switch (c)
943	    {
944	    case '^':
945	      c = do_getc ();
946	      if (c == '^')
947		c = do_getc ();
948	      if (c == '[')
949		{
950		  /* Read a char table, same syntax as a vector.  */
951		  for (;;)
952		    {
953		      struct object inner;
954
955		      read_object (&inner, false, new_backquote_flag,
956				   null_context);
957
958		      /* Recognize end of vector.  */
959		      if (inner.type == t_vectorclose)
960			{
961			  op->type = t_other;
962			  last_non_comment_line = line_number;
963			  return;
964			}
965
966		      /* Dots and ')' are not allowed.  But be tolerant.  */
967
968		      /* EOF inside vector is illegal.  But be tolerant.  */
969		      if (inner.type == t_eof)
970			break;
971
972		      free_object (&inner);
973		    }
974		  op->type = t_other;
975		  last_non_comment_line = line_number;
976		  return;
977		}
978	      else
979		/* Invalid input.  Be tolerant, no error message.  */
980		{
981		  op->type = t_other;
982		  if (c != EOF)
983		    last_non_comment_line = line_number;
984		  return;
985		}
986
987	    case '&':
988	      /* Read a bit vector.  */
989	      {
990		struct object length;
991		read_object (&length, first_in_list, new_backquote_flag,
992			     null_context);
993		/* Dots and EOF are not allowed here.
994		   But be tolerant.  */
995		free_object (&length);
996	      }
997	      c = do_getc ();
998	      if (c == '"')
999		{
1000		  struct object string;
1001		  read_object (&string, first_in_list, new_backquote_flag,
1002			       null_context);
1003		  free_object (&string);
1004		}
1005	      else
1006		/* Invalid input.  Be tolerant, no error message.  */
1007		do_ungetc (c);
1008	      op->type = t_other;
1009	      last_non_comment_line = line_number;
1010	      return;
1011
1012	    case '[':
1013	      /* Read a compiled function, same syntax as a vector.  */
1014	    case '(':
1015	      /* Read a string with properties, same syntax as a list.  */
1016	      {
1017		struct object inner;
1018		do_ungetc (c);
1019		read_object (&inner, false, new_backquote_flag, null_context);
1020		/* Dots and EOF are not allowed here.
1021		   But be tolerant.  */
1022		free_object (&inner);
1023		op->type = t_other;
1024		last_non_comment_line = line_number;
1025		return;
1026	      }
1027
1028	    case '@':
1029	      /* Read a comment consisting of a given number of bytes.  */
1030	      {
1031		unsigned int nskip = 0;
1032
1033		for (;;)
1034		  {
1035		    c = do_getc ();
1036		    if (!(c >= '0' && c <= '9'))
1037		      break;
1038		    nskip = 10 * nskip + (c - '0');
1039		  }
1040		if (c != EOF)
1041		  {
1042		    do_ungetc (c);
1043		    for (; nskip > 0; nskip--)
1044		      if (do_getc () == EOF)
1045			break;
1046		  }
1047		continue;
1048	      }
1049
1050	    case '$':
1051	      op->type = t_other;
1052	      last_non_comment_line = line_number;
1053	      return;
1054
1055	    case '\'':
1056	    case ':':
1057	    case 'S': case 's': /* XEmacs only */
1058	      {
1059		struct object inner;
1060		read_object (&inner, false, new_backquote_flag, null_context);
1061		/* Dots and EOF are not allowed here.
1062		   But be tolerant.  */
1063		free_object (&inner);
1064		op->type = t_other;
1065		last_non_comment_line = line_number;
1066		return;
1067	      }
1068
1069	    case '0': case '1': case '2': case '3': case '4':
1070	    case '5': case '6': case '7': case '8': case '9':
1071	      /* Read Common Lisp style #n# or #n=.  */
1072	      for (;;)
1073		{
1074		  c = do_getc ();
1075		  if (!(c >= '0' && c <= '9'))
1076		    break;
1077		}
1078	      if (c == EOF)
1079		/* Invalid input.  Be tolerant, no error message.  */
1080		{
1081		  op->type = t_other;
1082		  return;
1083		}
1084	      if (c == '=')
1085		{
1086		  read_object (op, false, new_backquote_flag, outer_context);
1087		  last_non_comment_line = line_number;
1088		  return;
1089		}
1090	      if (c == '#')
1091		{
1092		  op->type = t_other;
1093		  last_non_comment_line = line_number;
1094		  return;
1095		}
1096	      if (c == 'R' || c == 'r')
1097		{
1098		  /* Read an integer.  */
1099		  c = do_getc ();
1100		  if (c == '+' || c == '-')
1101		    c = do_getc ();
1102		  for (; c != EOF; c = do_getc ())
1103		    if (!c_isalnum (c))
1104		      {
1105			do_ungetc (c);
1106			break;
1107		      }
1108		  op->type = t_other;
1109		  last_non_comment_line = line_number;
1110		  return;
1111		}
1112	      /* Invalid input.  Be tolerant, no error message.  */
1113	      op->type = t_other;
1114	      last_non_comment_line = line_number;
1115	      return;
1116
1117	    case 'X': case 'x':
1118	    case 'O': case 'o':
1119	    case 'B': case 'b':
1120	      {
1121		/* Read an integer.  */
1122		c = do_getc ();
1123		if (c == '+' || c == '-')
1124		  c = do_getc ();
1125		for (; c != EOF; c = do_getc ())
1126		  if (!c_isalnum (c))
1127		    {
1128		      do_ungetc (c);
1129		      break;
1130		    }
1131		op->type = t_other;
1132		last_non_comment_line = line_number;
1133		return;
1134	      }
1135
1136	    case '*': /* XEmacs only */
1137	      {
1138		/* Read a bit-vector.  */
1139		do
1140		  c = do_getc ();
1141		while (c == '0' || c == '1');
1142		if (c != EOF)
1143		  do_ungetc (c);
1144		op->type = t_other;
1145		last_non_comment_line = line_number;
1146		return;
1147	      }
1148
1149	    case '+': /* XEmacs only */
1150	    case '-': /* XEmacs only */
1151	      /* Simply assume every feature expression is true.  */
1152	      {
1153		struct object inner;
1154		read_object (&inner, false, new_backquote_flag, null_context);
1155		/* Dots and EOF are not allowed here.
1156		   But be tolerant.  */
1157		free_object (&inner);
1158		continue;
1159	      }
1160
1161	    default:
1162	      /* Invalid input.  Be tolerant, no error message.  */
1163	      op->type = t_other;
1164	      last_non_comment_line = line_number;
1165	      return;
1166	    }
1167
1168	  /*NOTREACHED*/
1169	  abort ();
1170
1171	case '.':
1172	  c = do_getc ();
1173	  if (c != EOF)
1174	    {
1175	      do_ungetc (c);
1176	      if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
1177		  || strchr ("\"'`,(", c) != NULL)
1178		{
1179		  op->type = t_dot;
1180		  last_non_comment_line = line_number;
1181		  return;
1182		}
1183	    }
1184	  c = '.';
1185	  /*FALLTHROUGH*/
1186	default:
1187	default_label:
1188	  if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
1189	    continue;
1190	  /* Read a token.  */
1191	  {
1192	    bool symbol;
1193
1194	    op->token = XMALLOC (struct token);
1195	    symbol = read_token (op->token, c);
1196	    if (symbol)
1197	      {
1198		op->type = t_symbol;
1199		last_non_comment_line = line_number;
1200		return;
1201	      }
1202	    else
1203	      {
1204		free_token (op->token);
1205		free (op->token);
1206		op->type = t_other;
1207		last_non_comment_line = line_number;
1208		return;
1209	      }
1210	  }
1211	}
1212    }
1213}
1214
1215
1216void
1217extract_elisp (FILE *f,
1218	       const char *real_filename, const char *logical_filename,
1219	       flag_context_list_table_ty *flag_table,
1220	       msgdomain_list_ty *mdlp)
1221{
1222  mlp = mdlp->item[0]->messages;
1223
1224  fp = f;
1225  real_file_name = real_filename;
1226  logical_file_name = xstrdup (logical_filename);
1227  line_number = 1;
1228
1229  last_comment_line = -1;
1230  last_non_comment_line = -1;
1231
1232  flag_context_list_table = flag_table;
1233
1234  init_keywords ();
1235
1236  /* Eat tokens until eof is seen.  When read_object returns
1237     due to an unbalanced closing parenthesis, just restart it.  */
1238  do
1239    {
1240      struct object toplevel_object;
1241
1242      read_object (&toplevel_object, false, false, null_context);
1243
1244      if (toplevel_object.type == t_eof)
1245	break;
1246
1247      free_object (&toplevel_object);
1248    }
1249  while (!feof (fp));
1250
1251  /* Close scanner.  */
1252  fp = NULL;
1253  real_file_name = NULL;
1254  logical_file_name = NULL;
1255  line_number = 0;
1256}
1257