• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/src/
1/* xgettext Scheme backend.
2   Copyright (C) 2004-2007 Free Software Foundation, Inc.
3
4   This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005.
5
6   This program is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19#ifdef HAVE_CONFIG_H
20# include "config.h"
21#endif
22
23/* Specification.  */
24#include "x-scheme.h"
25
26#include <errno.h>
27#include <stdbool.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include "message.h"
33#include "xgettext.h"
34#include "x-scheme.h"
35#include "error.h"
36#include "xalloc.h"
37#include "hash.h"
38#include "gettext.h"
39
40#define _(s) gettext(s)
41
42
43/* The Scheme syntax is described in R5RS.  It is implemented in
44   guile-1.6.4/libguile/read.c.
45   Since we are interested only in strings and in forms similar to
46        (gettext msgid ...)
47   or   (ngettext msgid msgid_plural ...)
48   we make the following simplifications:
49
50   - Assume the keywords and strings are in an ASCII compatible encoding.
51     This means we can read the input file one byte at a time, instead of
52     one character at a time.  No need to worry about multibyte characters:
53     If they occur as part of identifiers, they most probably act as
54     constituent characters, and the byte based approach will do the same.
55
56   - Assume the read-hash-procedures is in the default state.
57     Non-standard reader extensions are mostly used to read data, not programs.
58
59   The remaining syntax rules are:
60
61   - The syntax code assigned to each character, and how tokens are built
62     up from characters (single escape, multiple escape etc.).
63
64   - Comment syntax: ';' and '#! ... \n!#\n'.
65
66   - String syntax: "..." with single escapes.
67
68   - Read macros and dispatch macro character '#'.  Needed to be able to
69     tell which is the n-th argument of a function call.
70
71 */
72
73
74/* ====================== Keyword set customization.  ====================== */
75
76/* If true extract all strings.  */
77static bool extract_all = false;
78
79static hash_table keywords;
80static bool default_keywords = true;
81
82
83void
84x_scheme_extract_all ()
85{
86  extract_all = true;
87}
88
89
90void
91x_scheme_keyword (const char *name)
92{
93  if (name == NULL)
94    default_keywords = false;
95  else
96    {
97      const char *end;
98      struct callshape shape;
99      const char *colon;
100
101      if (keywords.table == NULL)
102	hash_init (&keywords, 100);
103
104      split_keywordspec (name, &end, &shape);
105
106      /* The characters between name and end should form a valid Lisp symbol.
107	 Extract the symbol name part.  */
108      colon = strchr (name, ':');
109      if (colon != NULL && colon < end)
110	{
111	  name = colon + 1;
112	  if (name < end && *name == ':')
113	    name++;
114	  colon = strchr (name, ':');
115	  if (colon != NULL && colon < end)
116	    return;
117	}
118
119      insert_keyword_callshape (&keywords, name, end - name, &shape);
120    }
121}
122
123/* Finish initializing the keywords hash table.
124   Called after argument processing, before each file is processed.  */
125static void
126init_keywords ()
127{
128  if (default_keywords)
129    {
130      /* When adding new keywords here, also update the documentation in
131	 xgettext.texi!  */
132      x_scheme_keyword ("gettext");		/* libguile/i18n.c */
133      x_scheme_keyword ("ngettext:1,2");	/* libguile/i18n.c */
134      x_scheme_keyword ("gettext-noop");
135      default_keywords = false;
136    }
137}
138
139void
140init_flag_table_scheme ()
141{
142  xgettext_record_flag ("gettext:1:pass-scheme-format");
143  xgettext_record_flag ("ngettext:1:pass-scheme-format");
144  xgettext_record_flag ("ngettext:2:pass-scheme-format");
145  xgettext_record_flag ("gettext-noop:1:pass-scheme-format");
146  xgettext_record_flag ("format:2:scheme-format");
147}
148
149
150/* ======================== Reading of characters.  ======================== */
151
152/* Real filename, used in error messages about the input file.  */
153static const char *real_file_name;
154
155/* Logical filename and line number, used to label the extracted messages.  */
156static char *logical_file_name;
157static int line_number;
158
159/* The input file stream.  */
160static FILE *fp;
161
162
163/* Fetch the next character from the input file.  */
164static int
165do_getc ()
166{
167  int c = getc (fp);
168
169  if (c == EOF)
170    {
171      if (ferror (fp))
172	error (EXIT_FAILURE, errno, _("\
173error while reading \"%s\""), real_file_name);
174    }
175  else if (c == '\n')
176   line_number++;
177
178  return c;
179}
180
181/* Put back the last fetched character, not EOF.  */
182static void
183do_ungetc (int c)
184{
185  if (c == '\n')
186    line_number--;
187  ungetc (c, fp);
188}
189
190
191/* ========================== Reading of tokens.  ========================== */
192
193
194/* A token consists of a sequence of characters.  */
195struct token
196{
197  int allocated;		/* number of allocated 'token_char's */
198  int charcount;		/* number of used 'token_char's */
199  char *chars;			/* the token's constituents */
200};
201
202/* Initialize a 'struct token'.  */
203static inline void
204init_token (struct token *tp)
205{
206  tp->allocated = 10;
207  tp->chars = XNMALLOC (tp->allocated, char);
208  tp->charcount = 0;
209}
210
211/* Free the memory pointed to by a 'struct token'.  */
212static inline void
213free_token (struct token *tp)
214{
215  free (tp->chars);
216}
217
218/* Ensure there is enough room in the token for one more character.  */
219static inline void
220grow_token (struct token *tp)
221{
222  if (tp->charcount == tp->allocated)
223    {
224      tp->allocated *= 2;
225      tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
226    }
227}
228
229/* Read the next token.  'first' is the first character, which has already
230   been read.  */
231static void
232read_token (struct token *tp, int first)
233{
234  init_token (tp);
235
236  grow_token (tp);
237  tp->chars[tp->charcount++] = first;
238
239  for (;;)
240    {
241      int c = do_getc ();
242
243      if (c == EOF)
244	break;
245      if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n'
246	  || c == '"' || c == '(' || c == ')' || c == ';')
247	{
248	  do_ungetc (c);
249	  break;
250	}
251      grow_token (tp);
252      tp->chars[tp->charcount++] = c;
253    }
254}
255
256/* Tests if a token represents an integer.
257   Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int().  */
258static inline bool
259is_integer_syntax (const char *str, int len, int radix)
260{
261  const char *p = str;
262  const char *p_end = str + len;
263
264  /* The accepted syntax is
265       ['+'|'-'] DIGIT+
266     where DIGIT is a hexadecimal digit whose value is below radix.  */
267
268  if (p == p_end)
269    return false;
270  if (*p == '+' || *p == '-')
271    {
272      p++;
273      if (p == p_end)
274	return false;
275    }
276  do
277    {
278      int c = *p++;
279
280      if (c >= '0' && c <= '9')
281	c = c - '0';
282      else if (c >= 'A' && c <= 'F')
283	c = c - 'A' + 10;
284      else if (c >= 'a' && c <= 'f')
285	c = c - 'a' + 10;
286      else
287	return false;
288      if (c >= radix)
289	return false;
290    }
291  while (p < p_end);
292  return true;
293}
294
295/* Tests if a token represents a rational, floating-point or complex number.
296   If unconstrained is false, only real numbers are accepted; otherwise,
297   complex numbers are accepted as well.
298   Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo().  */
299static inline bool
300is_other_number_syntax (const char *str, int len, int radix, bool unconstrained)
301{
302  const char *p = str;
303  const char *p_end = str + len;
304  bool seen_sign;
305  bool seen_digits;
306
307  /* The accepted syntaxes are:
308     for a floating-point number:
309       ['+'|'-'] DIGIT+ [EXPONENT]
310       ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT]
311       where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+
312       (Dot and exponent are allowed only if radix is 10.)
313     for a rational number:
314       ['+'|'-'] DIGIT+ '/' DIGIT+
315     for a complex number:
316       REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
317       REAL-NUMBER {'+'|'-'} 'i'
318       {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
319       {'+'|'-'} 'i'
320       REAL-NUMBER '@' REAL-NUMBER
321   */
322  if (p == p_end)
323    return false;
324  /* Parse leading sign.  */
325  seen_sign = false;
326  if (*p == '+' || *p == '-')
327    {
328      p++;
329      if (p == p_end)
330	return false;
331      seen_sign = true;
332      /* Recognize complex number syntax: {'+'|'-'} 'i'  */
333      if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end)
334	return true;
335    }
336  /* Parse digits before dot or exponent or slash.  */
337  seen_digits = false;
338  do
339    {
340      int c = *p;
341
342      if (c >= '0' && c <= '9')
343	c = c - '0';
344      else if (c >= 'A' && c <= 'F')
345	{
346	  if (c >= 'D' && radix == 10) /* exponent? */
347	    break;
348	  c = c - 'A' + 10;
349	}
350      else if (c >= 'a' && c <= 'f')
351	{
352	  if (c >= 'd' && radix == 10) /* exponent? */
353	    break;
354	  c = c - 'a' + 10;
355	}
356      else
357	break;
358      if (c >= radix)
359	return false;
360      seen_digits = true;
361      p++;
362    }
363  while (p < p_end);
364  /* If p == p_end, we know that seen_digits = true, and the number is an
365     integer without exponent.  */
366  if (p < p_end)
367    {
368      /* If we have no digits so far, we need a decimal point later.  */
369      if (!seen_digits && !(*p == '.' && radix == 10))
370	return false;
371      /* Trailing '#' signs are equivalent to zeroes.  */
372      while (p < p_end && *p == '#')
373	p++;
374      if (p < p_end)
375	{
376	  if (*p == '/')
377	    {
378	      /* Parse digits after the slash.  */
379	      bool all_zeroes = true;
380	      p++;
381	      for (; p < p_end; p++)
382		{
383		  int c = *p;
384
385		  if (c >= '0' && c <= '9')
386		    c = c - '0';
387		  else if (c >= 'A' && c <= 'F')
388		    c = c - 'A' + 10;
389		  else if (c >= 'a' && c <= 'f')
390		    c = c - 'a' + 10;
391		  else
392		    break;
393		  if (c >= radix)
394		    return false;
395		  if (c != 0)
396		    all_zeroes = false;
397		}
398	      /* A zero denominator is not allowed.  */
399	      if (all_zeroes)
400		return false;
401	      /* Trailing '#' signs are equivalent to zeroes.  */
402	      while (p < p_end && *p == '#')
403		p++;
404	    }
405	  else
406	    {
407	      if (*p == '.')
408		{
409		  /* Decimal point notation.  */
410		  if (radix != 10)
411		    return false;
412		  /* Parse digits after the decimal point.  */
413		  p++;
414		  for (; p < p_end; p++)
415		    {
416		      int c = *p;
417
418		      if (c >= '0' && c <= '9')
419			seen_digits = true;
420		      else
421			break;
422		    }
423		  /* Digits are required before or after the decimal point.  */
424		  if (!seen_digits)
425		    return false;
426		  /* Trailing '#' signs are equivalent to zeroes.  */
427		  while (p < p_end && *p == '#')
428		    p++;
429		}
430	      if (p < p_end)
431		{
432		  /* Parse exponent.  */
433		  switch (*p)
434		    {
435		    case 'D': case 'd':
436		    case 'E': case 'e':
437		    case 'F': case 'f':
438		    case 'L': case 'l':
439		    case 'S': case 's':
440		      if (radix != 10)
441			return false;
442		      p++;
443		      if (p == p_end)
444			return false;
445		      if (*p == '+' || *p == '-')
446			{
447			  p++;
448			  if (p == p_end)
449			    return false;
450			}
451		      if (!(*p >= '0' && *p <= '9'))
452			return false;
453		      for (;;)
454			{
455			  p++;
456			  if (p == p_end)
457			    break;
458			  if (!(*p >= '0' && *p <= '9'))
459			    break;
460			}
461		      break;
462		    default:
463		      break;
464		    }
465		}
466	    }
467	}
468    }
469  if (p == p_end)
470    return true;
471  /* Recognize complex number syntax.  */
472  if (unconstrained)
473    {
474      /* Recognize the syntax  {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'  */
475      if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end)
476	return true;
477      /* Recognize the syntaxes
478	   REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
479	   REAL-NUMBER {'+'|'-'} 'i'
480       */
481      if (*p == '+' || *p == '-')
482	return (p_end[-1] == 'I' || p_end[-1] == 'i')
483		&& (p + 1 == p_end - 1
484		    || is_other_number_syntax (p, p_end - 1 - p, radix, false));
485      /* Recognize the syntax  REAL-NUMBER '@' REAL-NUMBER  */
486      if (*p == '@')
487	{
488	  p++;
489	  return is_other_number_syntax (p, p_end - p, radix, false);
490	}
491    }
492  return false;
493}
494
495/* Tests if a token represents a number.
496   Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number().  */
497static bool
498is_number (const struct token *tp)
499{
500  const char *str = tp->chars;
501  int len = tp->charcount;
502  int radix = 10;
503  enum { unknown, exact, inexact } exactness = unknown;
504  bool seen_radix_prefix = false;
505  bool seen_exactness_prefix = false;
506
507  if (len == 1)
508    if (*str == '+' || *str == '-')
509      return false;
510  while (len >= 2 && *str == '#')
511    {
512      switch (str[1])
513	{
514	case 'B': case 'b':
515	  if (seen_radix_prefix)
516	    return false;
517	  radix = 2;
518	  seen_radix_prefix = true;
519	  break;
520	case 'O': case 'o':
521	  if (seen_radix_prefix)
522	    return false;
523	  radix = 8;
524	  seen_radix_prefix = true;
525	  break;
526	case 'D': case 'd':
527	  if (seen_radix_prefix)
528	    return false;
529	  radix = 10;
530	  seen_radix_prefix = true;
531	  break;
532	case 'X': case 'x':
533	  if (seen_radix_prefix)
534	    return false;
535	  radix = 16;
536	  seen_radix_prefix = true;
537	  break;
538	case 'E': case 'e':
539	  if (seen_exactness_prefix)
540	    return false;
541	  exactness = exact;
542	  seen_exactness_prefix = true;
543	  break;
544	case 'I': case 'i':
545	  if (seen_exactness_prefix)
546	    return false;
547	  exactness = inexact;
548	  seen_exactness_prefix = true;
549	  break;
550	default:
551	  return false;
552	}
553      str += 2;
554      len -= 2;
555    }
556  if (exactness != inexact)
557    {
558      /* Try to parse an integer.  */
559      if (is_integer_syntax (str, len, 10))
560	return true;
561      /* FIXME: Other Scheme implementations support exact rational numbers
562	 or exact complex numbers.  */
563    }
564  if (exactness != exact)
565    {
566      /* Try to parse a rational, floating-point or complex number.  */
567      if (is_other_number_syntax (str, len, 10, true))
568	return true;
569    }
570  return false;
571}
572
573
574/* ========================= Accumulating comments ========================= */
575
576
577static char *buffer;
578static size_t bufmax;
579static size_t buflen;
580
581static inline void
582comment_start ()
583{
584  buflen = 0;
585}
586
587static inline void
588comment_add (int c)
589{
590  if (buflen >= bufmax)
591    {
592      bufmax = 2 * bufmax + 10;
593      buffer = xrealloc (buffer, bufmax);
594    }
595  buffer[buflen++] = c;
596}
597
598static inline void
599comment_line_end (size_t chars_to_remove)
600{
601  buflen -= chars_to_remove;
602  while (buflen >= 1
603	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
604    --buflen;
605  if (chars_to_remove == 0 && buflen >= bufmax)
606    {
607      bufmax = 2 * bufmax + 10;
608      buffer = xrealloc (buffer, bufmax);
609    }
610  buffer[buflen] = '\0';
611  savable_comment_add (buffer);
612}
613
614
615/* These are for tracking whether comments count as immediately before
616   keyword.  */
617static int last_comment_line;
618static int last_non_comment_line;
619
620
621/* ========================= Accumulating messages ========================= */
622
623
624static message_list_ty *mlp;
625
626
627/* ========================== Reading of objects.  ========================= */
628
629
630/* We are only interested in symbols (e.g. gettext or ngettext) and strings.
631   Other objects need not to be represented precisely.  */
632enum object_type
633{
634  t_symbol,	/* symbol */
635  t_string,	/* string */
636  t_other,	/* other kind of real object */
637  t_dot,	/* '.' pseudo object */
638  t_close,	/* ')' pseudo object */
639  t_eof		/* EOF marker */
640};
641
642struct object
643{
644  enum object_type type;
645  struct token *token;		/* for t_symbol and t_string */
646  int line_number_at_start;	/* for t_string */
647};
648
649/* Free the memory pointed to by a 'struct object'.  */
650static inline void
651free_object (struct object *op)
652{
653  if (op->type == t_symbol || op->type == t_string)
654    {
655      free_token (op->token);
656      free (op->token);
657    }
658}
659
660/* Convert a t_symbol/t_string token to a char*.  */
661static char *
662string_of_object (const struct object *op)
663{
664  char *str;
665  int n;
666
667  if (!(op->type == t_symbol || op->type == t_string))
668    abort ();
669  n = op->token->charcount;
670  str = XNMALLOC (n + 1, char);
671  memcpy (str, op->token->chars, n);
672  str[n] = '\0';
673  return str;
674}
675
676/* Context lookup table.  */
677static flag_context_list_table_ty *flag_context_list_table;
678
679/* Read the next object.  */
680static void
681read_object (struct object *op, flag_context_ty outer_context)
682{
683  for (;;)
684    {
685      int c = do_getc ();
686
687      switch (c)
688	{
689	case EOF:
690	  op->type = t_eof;
691	  return;
692
693	case ' ': case '\r': case '\f': case '\t':
694	  continue;
695
696	case '\n':
697	  /* Comments assumed to be grouped with a message must immediately
698	     precede it, with no non-whitespace token on a line between
699	     both.  */
700	  if (last_non_comment_line > last_comment_line)
701	    savable_comment_reset ();
702	  continue;
703
704	case ';':
705	  {
706	    bool all_semicolons = true;
707
708	    last_comment_line = line_number;
709	    comment_start ();
710	    for (;;)
711	      {
712		c = do_getc ();
713		if (c == EOF || c == '\n')
714		  break;
715		if (c != ';')
716		  all_semicolons = false;
717		if (!all_semicolons)
718		  {
719		    /* We skip all leading white space, but not EOLs.  */
720		    if (!(buflen == 0 && (c == ' ' || c == '\t')))
721		      comment_add (c);
722		  }
723	      }
724	    comment_line_end (0);
725	    continue;
726	  }
727
728	case '(':
729	  {
730	     int arg = 0;		/* Current argument number.  */
731	     flag_context_list_iterator_ty context_iter;
732	    const struct callshapes *shapes = NULL;
733	    struct arglist_parser *argparser = NULL;
734
735	     for (;; arg++)
736	       {
737		struct object inner;
738		flag_context_ty inner_context;
739
740		if (arg == 0)
741		  inner_context = null_context;
742		else
743		  inner_context =
744		    inherited_context (outer_context,
745				       flag_context_list_iterator_advance (
746					 &context_iter));
747
748		read_object (&inner, inner_context);
749
750		/* Recognize end of list.  */
751		if (inner.type == t_close)
752		  {
753		    op->type = t_other;
754		    last_non_comment_line = line_number;
755		    if (argparser != NULL)
756		      arglist_parser_done (argparser, arg);
757		    return;
758		  }
759
760		/* Dots are not allowed in every position.
761		   But be tolerant.  */
762
763		/* EOF inside list is illegal.
764		   But be tolerant.  */
765		if (inner.type == t_eof)
766		  break;
767
768		if (arg == 0)
769		  {
770		    /* This is the function position.  */
771		    if (inner.type == t_symbol)
772		      {
773			char *symbol_name = string_of_object (&inner);
774			void *keyword_value;
775
776			if (hash_find_entry (&keywords,
777					     symbol_name, strlen (symbol_name),
778					     &keyword_value)
779			    == 0)
780			  shapes = (const struct callshapes *) keyword_value;
781
782			argparser = arglist_parser_alloc (mlp, shapes);
783
784			context_iter =
785			  flag_context_list_iterator (
786			    flag_context_list_table_lookup (
787			      flag_context_list_table,
788			      symbol_name, strlen (symbol_name)));
789
790			free (symbol_name);
791		      }
792		    else
793		      context_iter = null_context_list_iterator;
794		  }
795		else
796		  {
797		    /* These are the argument positions.  */
798		    if (argparser != NULL && inner.type == t_string)
799		      arglist_parser_remember (argparser, arg,
800					       string_of_object (&inner),
801					       inner_context,
802					       logical_file_name,
803					       inner.line_number_at_start,
804					       savable_comment);
805		  }
806
807		free_object (&inner);
808	      }
809	    if (argparser != NULL)
810	      arglist_parser_done (argparser, arg);
811	  }
812	  op->type = t_other;
813	  last_non_comment_line = line_number;
814	  return;
815
816	case ')':
817	  /* Tell the caller about the end of list.
818	     Unmatched closing parenthesis is illegal.
819	     But be tolerant.  */
820	  op->type = t_close;
821	  last_non_comment_line = line_number;
822	  return;
823
824	case ',':
825	  {
826	    int c = do_getc ();
827	    /* The ,@ handling inside lists is wrong anyway, because
828	       ,@form expands to an unknown number of elements.  */
829	    if (c != EOF && c != '@')
830	      do_ungetc (c);
831	  }
832	  /*FALLTHROUGH*/
833	case '\'':
834	case '`':
835	  {
836	    struct object inner;
837
838	    read_object (&inner, null_context);
839
840	    /* Dots and EOF are not allowed here.  But be tolerant.  */
841
842	    free_object (&inner);
843
844	    op->type = t_other;
845	    last_non_comment_line = line_number;
846	    return;
847	  }
848
849	case '#':
850	  /* Dispatch macro handling.  */
851	  {
852	    c = do_getc ();
853	    if (c == EOF)
854	      /* Invalid input.  Be tolerant, no error message.  */
855	      {
856		op->type = t_other;
857		return;
858	      }
859
860	    switch (c)
861	      {
862	      case '(': /* Vector */
863		do_ungetc (c);
864		{
865		  struct object inner;
866		  read_object (&inner, null_context);
867		  /* Dots and EOF are not allowed here.
868		     But be tolerant.  */
869		  free_object (&inner);
870		  op->type = t_other;
871		  last_non_comment_line = line_number;
872		  return;
873		}
874
875	      case 'T': case 't': /* Boolean true */
876	      case 'F': case 'f': /* Boolean false */
877		op->type = t_other;
878		last_non_comment_line = line_number;
879		return;
880
881	      case 'B': case 'b':
882	      case 'O': case 'o':
883	      case 'D': case 'd':
884	      case 'X': case 'x':
885	      case 'E': case 'e':
886	      case 'I': case 'i':
887		{
888		  struct token token;
889		  do_ungetc (c);
890		  read_token (&token, '#');
891		  if (is_number (&token))
892		    {
893		      /* A number.  */
894		      free_token (&token);
895		      op->type = t_other;
896		      last_non_comment_line = line_number;
897		      return;
898		    }
899		  else
900		    {
901		      if (token.charcount == 2
902			  && (token.chars[1] == 'e' || token.chars[1] == 'i'))
903			{
904			  c = do_getc ();
905			  if (c != EOF)
906			    do_ungetc (c);
907			  if (c == '(')
908			    /* Homogenous vector syntax, see arrays.scm.  */
909			    case 'a':	/* Vectors of char */
910			    case 'c':	/* Vectors of complex */
911			  /*case 'e':*/	/* Vectors of long */
912			    case 'h':	/* Vectors of short */
913			  /*case 'i':*/	/* Vectors of double-float */
914			    case 'l':	/* Vectors of long long */
915			    case 's':	/* Vectors of single-float */
916			    case 'u':	/* Vectors of unsigned long */
917			    case 'y':	/* Vectors of byte */
918			      {
919				struct object inner;
920				read_object (&inner, null_context);
921				/* Dots and EOF are not allowed here.
922				   But be tolerant.  */
923				free_token (&token);
924				free_object (&inner);
925				op->type = t_other;
926				last_non_comment_line = line_number;
927				return;
928			      }
929			}
930		      /* Unknown # object.  But be tolerant.  */
931		      free_token (&token);
932		      op->type = t_other;
933		      last_non_comment_line = line_number;
934		      return;
935		    }
936		}
937
938	      case '!':
939		/* Block comment '#! ... \n!#\n'.  We don't extract it
940		   because it's only used to introduce scripts on Unix.  */
941		{
942		  int last1 = 0;
943		  int last2 = 0;
944		  int last3 = 0;
945
946		  for (;;)
947		    {
948		      c = do_getc ();
949		      if (c == EOF)
950			/* EOF is not allowed here.  But be tolerant.  */
951			break;
952		      if (last3 == '\n' && last2 == '!' && last1 == '#'
953			  && c == '\n')
954			break;
955		      last3 = last2;
956		      last2 = last1;
957		      last1 = c;
958		    }
959		  continue;
960		}
961
962	      case '*':
963		/* Bit vector.  */
964		{
965		  struct token token;
966		  read_token (&token, c);
967		  /* The token should consists only of '0' and '1', except
968		     for the initial '*'.  But be tolerant.  */
969		  free_token (&token);
970		  op->type = t_other;
971		  last_non_comment_line = line_number;
972		  return;
973		}
974
975	      case '{':
976		/* Symbol with multiple escapes: #{...}#  */
977		{
978		  op->token = XMALLOC (struct token);
979
980		  init_token (op->token);
981
982		  for (;;)
983		    {
984		      c = do_getc ();
985
986		      if (c == EOF)
987			break;
988		      if (c == '\\')
989			{
990			  c = do_getc ();
991			  if (c == EOF)
992			    break;
993			}
994		      else if (c == '}')
995			{
996			  c = do_getc ();
997			  if (c == '#')
998			    break;
999			  if (c != EOF)
1000			    do_ungetc (c);
1001			  c = '}';
1002			}
1003		      grow_token (op->token);
1004		      op->token->chars[op->token->charcount++] = c;
1005		    }
1006
1007		  op->type = t_symbol;
1008		  last_non_comment_line = line_number;
1009		  return;
1010		}
1011
1012	      case '\\':
1013		/* Character.  */
1014		{
1015		  struct token token;
1016		  c = do_getc ();
1017		  if (c != EOF)
1018		    {
1019		      read_token (&token, c);
1020		      free_token (&token);
1021		    }
1022		  op->type = t_other;
1023		  last_non_comment_line = line_number;
1024		  return;
1025		}
1026
1027	      case ':': /* Keyword.  */
1028	      case '&': /* Deprecated keyword, installed in optargs.scm.  */
1029		{
1030		  struct token token;
1031		  read_token (&token, '-');
1032		  free_token (&token);
1033		  op->type = t_other;
1034		  last_non_comment_line = line_number;
1035		  return;
1036		}
1037
1038	      /* The following are installed through read-hash-extend.  */
1039
1040	      /* arrays.scm */
1041	      case '0': case '1': case '2': case '3': case '4':
1042	      case '5': case '6': case '7': case '8': case '9':
1043		/* Multidimensional array syntax: #nx(...) where
1044		     n ::= DIGIT+
1045		     x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'}
1046		 */
1047		do
1048		  c = do_getc ();
1049		while (c >= '0' && c <= '9');
1050		/* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}.
1051		   But be tolerant.  */
1052		/*FALLTHROUGH*/
1053	      case '\'': /* boot-9.scm */
1054	      case '.': /* boot-9.scm */
1055	      case ',': /* srfi-10.scm */
1056		{
1057		  struct object inner;
1058		  read_object (&inner, null_context);
1059		  /* Dots and EOF are not allowed here.
1060		     But be tolerant.  */
1061		  free_object (&inner);
1062		  op->type = t_other;
1063		  last_non_comment_line = line_number;
1064		  return;
1065		}
1066
1067	      default:
1068		/* Unknown.  */
1069		op->type = t_other;
1070		last_non_comment_line = line_number;
1071		return;
1072	      }
1073	    /*NOTREACHED*/
1074	    abort ();
1075	  }
1076
1077	case '"':
1078	  {
1079	    op->token = XMALLOC (struct token);
1080	    init_token (op->token);
1081	    op->line_number_at_start = line_number;
1082	    for (;;)
1083	      {
1084		int c = do_getc ();
1085		if (c == EOF)
1086		  /* Invalid input.  Be tolerant, no error message.  */
1087		  break;
1088		if (c == '"')
1089		  break;
1090		if (c == '\\')
1091		  {
1092		    c = do_getc ();
1093		    if (c == EOF)
1094		      /* Invalid input.  Be tolerant, no error message.  */
1095		      break;
1096		    switch (c)
1097		      {
1098		      case '\n':
1099			continue;
1100		      case '0':
1101			c = '\0';
1102			break;
1103		      case 'a':
1104			c = '\a';
1105			break;
1106		      case 'f':
1107			c = '\f';
1108			break;
1109		      case 'n':
1110			c = '\n';
1111			break;
1112		      case 'r':
1113			c = '\r';
1114			break;
1115		      case 't':
1116			c = '\t';
1117			break;
1118		      case 'v':
1119			c = '\v';
1120			break;
1121		      default:
1122			break;
1123		      }
1124		  }
1125		grow_token (op->token);
1126		op->token->chars[op->token->charcount++] = c;
1127	      }
1128	    op->type = t_string;
1129
1130	    if (extract_all)
1131	      {
1132		lex_pos_ty pos;
1133
1134		pos.file_name = logical_file_name;
1135		pos.line_number = op->line_number_at_start;
1136		remember_a_message (mlp, NULL, string_of_object (op),
1137				    null_context, &pos, savable_comment);
1138	      }
1139	    last_non_comment_line = line_number;
1140	    return;
1141	  }
1142
1143	case '0': case '1': case '2': case '3': case '4':
1144	case '5': case '6': case '7': case '8': case '9':
1145	case '+': case '-': case '.':
1146	  /* Read a number or symbol token.  */
1147	  op->token = XMALLOC (struct token);
1148	  read_token (op->token, c);
1149	  if (op->token->charcount == 1 && op->token->chars[0] == '.')
1150	    {
1151	      free_token (op->token);
1152	      free (op->token);
1153	      op->type = t_dot;
1154	    }
1155	  else if (is_number (op->token))
1156	    {
1157	      /* A number.  */
1158	      free_token (op->token);
1159	      free (op->token);
1160	      op->type = t_other;
1161	    }
1162	  else
1163	    {
1164	      /* A symbol.  */
1165	      op->type = t_symbol;
1166	    }
1167	  last_non_comment_line = line_number;
1168	  return;
1169
1170	case ':':
1171	default:
1172	  /* Read a symbol token.  */
1173	  op->token = XMALLOC (struct token);
1174	  read_token (op->token, c);
1175	  op->type = t_symbol;
1176	  last_non_comment_line = line_number;
1177	  return;
1178	}
1179    }
1180}
1181
1182
1183void
1184extract_scheme (FILE *f,
1185		const char *real_filename, const char *logical_filename,
1186		flag_context_list_table_ty *flag_table,
1187		msgdomain_list_ty *mdlp)
1188{
1189  mlp = mdlp->item[0]->messages;
1190
1191  fp = f;
1192  real_file_name = real_filename;
1193  logical_file_name = xstrdup (logical_filename);
1194  line_number = 1;
1195
1196  last_comment_line = -1;
1197  last_non_comment_line = -1;
1198
1199  flag_context_list_table = flag_table;
1200
1201  init_keywords ();
1202
1203  /* Eat tokens until eof is seen.  When read_object returns
1204     due to an unbalanced closing parenthesis, just restart it.  */
1205  do
1206    {
1207      struct object toplevel_object;
1208
1209      read_object (&toplevel_object, null_context);
1210
1211      if (toplevel_object.type == t_eof)
1212	break;
1213
1214      free_object (&toplevel_object);
1215    }
1216  while (!feof (fp));
1217
1218  /* Close scanner.  */
1219  fp = NULL;
1220  real_file_name = NULL;
1221  logical_file_name = NULL;
1222  line_number = 0;
1223}
1224